diff --git a/Makefile b/Makefile index bfba8fd0..7af3fb9b 100644 --- a/Makefile +++ b/Makefile @@ -151,6 +151,10 @@ ifndef USE_INTERNAL_ZLIB USE_INTERNAL_ZLIB=1 endif +ifndef USE_INTERNAL_JPEG +USE_INTERNAL_JPEG=1 +endif + ifndef USE_LOCAL_HEADERS USE_LOCAL_HEADERS=1 endif @@ -176,7 +180,7 @@ BLIBDIR=$(MOUNT_DIR)/botlib NDIR=$(MOUNT_DIR)/null UIDIR=$(MOUNT_DIR)/ui Q3UIDIR=$(MOUNT_DIR)/q3_ui -JPDIR=$(MOUNT_DIR)/jpeg-6b +JPDIR=$(MOUNT_DIR)/jpeg-8c SPEEXDIR=$(MOUNT_DIR)/libspeex ZDIR=$(MOUNT_DIR)/zlib Q3ASMDIR=$(MOUNT_DIR)/tools/asm @@ -908,6 +912,13 @@ else LIBS += -lz endif +ifeq ($(USE_INTERNAL_JPEG),1) + BASE_CFLAGS += -DUSE_INTERNAL_JPEG + BASE_CFLAGS += -I$(JPDIR) +else + LIBS += -ljpeg +endif + ifdef DEFAULT_BASEDIR BASE_CFLAGS += -DDEFAULT_BASEDIR=\\\"$(DEFAULT_BASEDIR)\\\" endif @@ -1398,43 +1409,6 @@ Q3OBJ = \ $(B)/client/l_precomp.o \ $(B)/client/l_script.o \ $(B)/client/l_struct.o \ - \ - $(B)/client/jcapimin.o \ - $(B)/client/jcapistd.o \ - $(B)/client/jccoefct.o \ - $(B)/client/jccolor.o \ - $(B)/client/jcdctmgr.o \ - $(B)/client/jchuff.o \ - $(B)/client/jcinit.o \ - $(B)/client/jcmainct.o \ - $(B)/client/jcmarker.o \ - $(B)/client/jcmaster.o \ - $(B)/client/jcomapi.o \ - $(B)/client/jcparam.o \ - $(B)/client/jcphuff.o \ - $(B)/client/jcprepct.o \ - $(B)/client/jcsample.o \ - $(B)/client/jdapimin.o \ - $(B)/client/jdapistd.o \ - $(B)/client/jdatasrc.o \ - $(B)/client/jdcoefct.o \ - $(B)/client/jdcolor.o \ - $(B)/client/jddctmgr.o \ - $(B)/client/jdhuff.o \ - $(B)/client/jdinput.o \ - $(B)/client/jdmainct.o \ - $(B)/client/jdmarker.o \ - $(B)/client/jdmaster.o \ - $(B)/client/jdpostct.o \ - $(B)/client/jdsample.o \ - $(B)/client/jdtrans.o \ - $(B)/client/jerror.o \ - $(B)/client/jfdctflt.o \ - $(B)/client/jidctflt.o \ - $(B)/client/jmemmgr.o \ - $(B)/client/jmemnobs.o \ - $(B)/client/jutils.o \ - \ $(B)/client/tr_animation.o \ $(B)/client/tr_backend.o \ $(B)/client/tr_bsp.o \ @@ -1472,6 +1446,56 @@ Q3OBJ = \ $(B)/client/con_log.o \ $(B)/client/sys_main.o +ifneq ($(USE_INTERNAL_JPEG),0) + Q3OBJ += \ + $(B)/client/jaricom.o \ + $(B)/client/jcapimin.o \ + $(B)/client/jcapistd.o \ + $(B)/client/jcarith.o \ + $(B)/client/jccoefct.o \ + $(B)/client/jccolor.o \ + $(B)/client/jcdctmgr.o \ + $(B)/client/jchuff.o \ + $(B)/client/jcinit.o \ + $(B)/client/jcmainct.o \ + $(B)/client/jcmarker.o \ + $(B)/client/jcmaster.o \ + $(B)/client/jcomapi.o \ + $(B)/client/jcparam.o \ + $(B)/client/jcprepct.o \ + $(B)/client/jcsample.o \ + $(B)/client/jctrans.o \ + $(B)/client/jdapimin.o \ + $(B)/client/jdapistd.o \ + $(B)/client/jdarith.o \ + $(B)/client/jdatadst.o \ + $(B)/client/jdatasrc.o \ + $(B)/client/jdcoefct.o \ + $(B)/client/jdcolor.o \ + $(B)/client/jddctmgr.o \ + $(B)/client/jdhuff.o \ + $(B)/client/jdinput.o \ + $(B)/client/jdmainct.o \ + $(B)/client/jdmarker.o \ + $(B)/client/jdmaster.o \ + $(B)/client/jdmerge.o \ + $(B)/client/jdpostct.o \ + $(B)/client/jdsample.o \ + $(B)/client/jdtrans.o \ + $(B)/client/jerror.o \ + $(B)/client/jfdctflt.o \ + $(B)/client/jfdctfst.o \ + $(B)/client/jfdctint.o \ + $(B)/client/jidctflt.o \ + $(B)/client/jidctfst.o \ + $(B)/client/jidctint.o \ + $(B)/client/jmemmgr.o \ + $(B)/client/jmemnobs.o \ + $(B)/client/jquant1.o \ + $(B)/client/jquant2.o \ + $(B)/client/jutils.o +endif + ifeq ($(ARCH),i386) Q3OBJ += \ $(B)/client/snd_mixa.o \ diff --git a/README b/README index 64867684..a2dcaf61 100644 --- a/README +++ b/README @@ -77,23 +77,30 @@ script to match your environment. The following variables may be set, either on the command line or in Makefile.local: - CFLAGS - use this for custom CFLAGS - V - set to show cc command line when building - DEFAULT_BASEDIR - extra path to search for baseq3 and such - BUILD_SERVER - build the 'ioq3ded' server binary - BUILD_CLIENT - build the 'ioquake3' client binary - BUILD_CLIENT_SMP - build the 'ioquake3-smp' client binary - BUILD_GAME_SO - build the game shared libraries - BUILD_GAME_QVM - build the game qvms - BUILD_STANDALONE - build binaries suited for stand-alone games - USE_OPENAL - use OpenAL where available - USE_OPENAL_DLOPEN - link with OpenAL at runtime - USE_CURL - use libcurl for http/ftp download support - USE_CURL_DLOPEN - link with libcurl at runtime - USE_CODEC_VORBIS - enable Ogg Vorbis support - USE_LOCAL_HEADERS - use headers local to ioq3 instead of system ones - COPYDIR - the target installation directory - TEMPDIR - specify user defined directory for temp files + CFLAGS - use this for custom CFLAGS + V - set to show cc command line when building + DEFAULT_BASEDIR - extra path to search for baseq3 and such + BUILD_SERVER - build the 'ioq3ded' server binary + BUILD_CLIENT - build the 'ioquake3' client binary + BUILD_CLIENT_SMP - build the 'ioquake3-smp' client binary + BUILD_GAME_SO - build the game shared libraries + BUILD_GAME_QVM - build the game qvms + BUILD_STANDALONE - build binaries suited for stand-alone games + USE_OPENAL - use OpenAL where available + USE_OPENAL_DLOPEN - link with OpenAL at runtime + USE_CURL - use libcurl for http/ftp download support + USE_CURL_DLOPEN - link with libcurl at runtime + USE_CODEC_VORBIS - enable Ogg Vorbis support + USE_MUMBLE - enable Mumble support + USE_VOIP - enable built-in VoIP support + USE_INTERNAL_SPEEX - build internal speex library instead of dynamically + linking against system libspeex + USE_INTERNAL_ZLIB - build and link against internal zlib + USE_INTERNAL_JPEG - build and link against internal JPEG library + USE_LOCAL_HEADERS - use headers local to ioq3 instead of system ones + DEBUG_CFLAGS - C compiler flags to use for building debug version + COPYDIR - the target installation directory + TEMPDIR - specify user defined directory for temp files The defaults for these variables differ depending on the target platform. diff --git a/code/jpeg-6b/README b/code/jpeg-6b/README deleted file mode 100644 index 86cc2066..00000000 --- a/code/jpeg-6b/README +++ /dev/null @@ -1,385 +0,0 @@ -The Independent JPEG Group's JPEG software -========================================== - -README for release 6b of 27-Mar-1998 -==================================== - -This distribution contains the sixth public release of the Independent JPEG -Group's free JPEG software. You are welcome to redistribute this software and -to use it for any purpose, subject to the conditions under LEGAL ISSUES, below. - -Serious users of this software (particularly those incorporating it into -larger programs) should contact IJG at jpeg-info@uunet.uu.net to be added to -our electronic mailing list. Mailing list members are notified of updates -and have a chance to participate in technical discussions, etc. - -This software is the work of Tom Lane, Philip Gladstone, Jim Boucher, -Lee Crocker, Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, -Guido Vollbeding, Ge' Weijers, and other members of the Independent JPEG -Group. - -IJG is not affiliated with the official ISO JPEG standards committee. - - -DOCUMENTATION ROADMAP -===================== - -This file contains the following sections: - -OVERVIEW General description of JPEG and the IJG software. -LEGAL ISSUES Copyright, lack of warranty, terms of distribution. -REFERENCES Where to learn more about JPEG. -ARCHIVE LOCATIONS Where to find newer versions of this software. -RELATED SOFTWARE Other stuff you should get. -FILE FORMAT WARS Software *not* to get. -TO DO Plans for future IJG releases. - -Other documentation files in the distribution are: - -User documentation: - install.doc How to configure and install the IJG software. - usage.doc Usage instructions for cjpeg, djpeg, jpegtran, - rdjpgcom, and wrjpgcom. - *.1 Unix-style man pages for programs (same info as usage.doc). - wizard.doc Advanced usage instructions for JPEG wizards only. - change.log Version-to-version change highlights. -Programmer and internal documentation: - libjpeg.doc How to use the JPEG library in your own programs. - example.c Sample code for calling the JPEG library. - structure.doc Overview of the JPEG library's internal structure. - filelist.doc Road map of IJG files. - coderules.doc Coding style rules --- please read if you contribute code. - -Please read at least the files install.doc and usage.doc. Useful information -can also be found in the JPEG FAQ (Frequently Asked Questions) article. See -ARCHIVE LOCATIONS below to find out where to obtain the FAQ article. - -If you want to understand how the JPEG code works, we suggest reading one or -more of the REFERENCES, then looking at the documentation files (in roughly -the order listed) before diving into the code. - - -OVERVIEW -======== - -This package contains C software to implement JPEG image compression and -decompression. JPEG (pronounced "jay-peg") is a standardized compression -method for full-color and gray-scale images. JPEG is intended for compressing -"real-world" scenes; line drawings, cartoons and other non-realistic images -are not its strong suit. JPEG is lossy, meaning that the output image is not -exactly identical to the input image. Hence you must not use JPEG if you -have to have identical output bits. However, on typical photographic images, -very good compression levels can be obtained with no visible change, and -remarkably high compression levels are possible if you can tolerate a -low-quality image. For more details, see the references, or just experiment -with various compression settings. - -This software implements JPEG baseline, extended-sequential, and progressive -compression processes. Provision is made for supporting all variants of these -processes, although some uncommon parameter settings aren't implemented yet. -For legal reasons, we are not distributing code for the arithmetic-coding -variants of JPEG; see LEGAL ISSUES. We have made no provision for supporting -the hierarchical or lossless processes defined in the standard. - -We provide a set of library routines for reading and writing JPEG image files, -plus two sample applications "cjpeg" and "djpeg", which use the library to -perform conversion between JPEG and some other popular image file formats. -The library is intended to be reused in other applications. - -In order to support file conversion and viewing software, we have included -considerable functionality beyond the bare JPEG coding/decoding capability; -for example, the color quantization modules are not strictly part of JPEG -decoding, but they are essential for output to colormapped file formats or -colormapped displays. These extra functions can be compiled out of the -library if not required for a particular application. We have also included -"jpegtran", a utility for lossless transcoding between different JPEG -processes, and "rdjpgcom" and "wrjpgcom", two simple applications for -inserting and extracting textual comments in JFIF files. - -The emphasis in designing this software has been on achieving portability and -flexibility, while also making it fast enough to be useful. In particular, -the software is not intended to be read as a tutorial on JPEG. (See the -REFERENCES section for introductory material.) Rather, it is intended to -be reliable, portable, industrial-strength code. We do not claim to have -achieved that goal in every aspect of the software, but we strive for it. - -We welcome the use of this software as a component of commercial products. -No royalty is required, but we do ask for an acknowledgement in product -documentation, as described under LEGAL ISSUES. - - -LEGAL ISSUES -============ - -In plain English: - -1. We don't promise that this software works. (But if you find any bugs, - please let us know!) -2. You can use this software for whatever you want. You don't have to pay us. -3. You may not pretend that you wrote this software. If you use it in a - program, you must acknowledge somewhere in your documentation that - you've used the IJG code. - -In legalese: - -The authors make NO WARRANTY or representation, either express or implied, -with respect to this software, its quality, accuracy, merchantability, or -fitness for a particular purpose. This software is provided "AS IS", and you, -its user, assume the entire risk as to its quality and accuracy. - -This software is copyright (C) 1991-1998, Thomas G. Lane. -All Rights Reserved except as specified below. - -Permission is hereby granted to use, copy, modify, and distribute this -software (or portions thereof) for any purpose, without fee, subject to these -conditions: -(1) If any part of the source code for this software is distributed, then this -README file must be included, with this copyright and no-warranty notice -unaltered; and any additions, deletions, or changes to the original files -must be clearly indicated in accompanying documentation. -(2) If only executable code is distributed, then the accompanying -documentation must state that "this software is based in part on the work of -the Independent JPEG Group". -(3) Permission for use of this software is granted only if the user accepts -full responsibility for any undesirable consequences; the authors accept -NO LIABILITY for damages of any kind. - -These conditions apply to any software derived from or based on the IJG code, -not just to the unmodified library. If you use our work, you ought to -acknowledge us. - -Permission is NOT granted for the use of any IJG author's name or company name -in advertising or publicity relating to this software or products derived from -it. This software may be referred to only as "the Independent JPEG Group's -software". - -We specifically permit and encourage the use of this software as the basis of -commercial products, provided that all warranty or liability claims are -assumed by the product vendor. - - -ansi2knr.c is included in this distribution by permission of L. Peter Deutsch, -sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA. -ansi2knr.c is NOT covered by the above copyright and conditions, but instead -by the usual distribution terms of the Free Software Foundation; principally, -that you must include source code if you redistribute it. (See the file -ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part -of any program generated from the IJG code, this does not limit you more than -the foregoing paragraphs do. - -The Unix configuration script "configure" was produced with GNU Autoconf. -It is copyright by the Free Software Foundation but is freely distributable. -The same holds for its supporting scripts (config.guess, config.sub, -ltconfig, ltmain.sh). Another support script, install-sh, is copyright -by M.I.T. but is also freely distributable. - -It appears that the arithmetic coding option of the JPEG spec is covered by -patents owned by IBM, AT&T, and Mitsubishi. Hence arithmetic coding cannot -legally be used without obtaining one or more licenses. For this reason, -support for arithmetic coding has been removed from the free JPEG software. -(Since arithmetic coding provides only a marginal gain over the unpatented -Huffman mode, it is unlikely that very many implementations will support it.) -So far as we are aware, there are no patent restrictions on the remaining -code. - -The IJG distribution formerly included code to read and write GIF files. -To avoid entanglement with the Unisys LZW patent, GIF reading support has -been removed altogether, and the GIF writer has been simplified to produce -"uncompressed GIFs". This technique does not use the LZW algorithm; the -resulting GIF files are larger than usual, but are readable by all standard -GIF decoders. - -We are required to state that - "The Graphics Interchange Format(c) is the Copyright property of - CompuServe Incorporated. GIF(sm) is a Service Mark property of - CompuServe Incorporated." - - -REFERENCES -========== - -We highly recommend reading one or more of these references before trying to -understand the innards of the JPEG software. - -The best short technical introduction to the JPEG compression algorithm is - Wallace, Gregory K. "The JPEG Still Picture Compression Standard", - Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. -(Adjacent articles in that issue discuss MPEG motion picture compression, -applications of JPEG, and related topics.) If you don't have the CACM issue -handy, a PostScript file containing a revised version of Wallace's article is -available at ftp://ftp.uu.net/graphics/jpeg/wallace.ps.gz. The file (actually -a preprint for an article that appeared in IEEE Trans. Consumer Electronics) -omits the sample images that appeared in CACM, but it includes corrections -and some added material. Note: the Wallace article is copyright ACM and IEEE, -and it may not be used for commercial purposes. - -A somewhat less technical, more leisurely introduction to JPEG can be found in -"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by -M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1. This book provides -good explanations and example C code for a multitude of compression methods -including JPEG. It is an excellent source if you are comfortable reading C -code but don't know much about data compression in general. The book's JPEG -sample code is far from industrial-strength, but when you are ready to look -at a full implementation, you've got one here... - -The best full description of JPEG is the textbook "JPEG Still Image Data -Compression Standard" by William B. Pennebaker and Joan L. Mitchell, published -by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. Price US$59.95, 638 pp. -The book includes the complete text of the ISO JPEG standards (DIS 10918-1 -and draft DIS 10918-2). This is by far the most complete exposition of JPEG -in existence, and we highly recommend it. - -The JPEG standard itself is not available electronically; you must order a -paper copy through ISO or ITU. (Unless you feel a need to own a certified -official copy, we recommend buying the Pennebaker and Mitchell book instead; -it's much cheaper and includes a great deal of useful explanatory material.) -In the USA, copies of the standard may be ordered from ANSI Sales at (212) -642-4900, or from Global Engineering Documents at (800) 854-7179. (ANSI -doesn't take credit card orders, but Global does.) It's not cheap: as of -1992, ANSI was charging $95 for Part 1 and $47 for Part 2, plus 7% -shipping/handling. The standard is divided into two parts, Part 1 being the -actual specification, while Part 2 covers compliance testing methods. Part 1 -is titled "Digital Compression and Coding of Continuous-tone Still Images, -Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS -10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of -Continuous-tone Still Images, Part 2: Compliance testing" and has document -numbers ISO/IEC IS 10918-2, ITU-T T.83. - -Some extensions to the original JPEG standard are defined in JPEG Part 3, -a newer ISO standard numbered ISO/IEC IS 10918-3 and ITU-T T.84. IJG -currently does not support any Part 3 extensions. - -The JPEG standard does not specify all details of an interchangeable file -format. For the omitted details we follow the "JFIF" conventions, revision -1.02. A copy of the JFIF spec is available from: - Literature Department - C-Cube Microsystems, Inc. - 1778 McCarthy Blvd. - Milpitas, CA 95035 - phone (408) 944-6300, fax (408) 944-6314 -A PostScript version of this document is available by FTP at -ftp://ftp.uu.net/graphics/jpeg/jfif.ps.gz. There is also a plain text -version at ftp://ftp.uu.net/graphics/jpeg/jfif.txt.gz, but it is missing -the figures. - -The TIFF 6.0 file format specification can be obtained by FTP from -ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme -found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems. -IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6). -Instead, we recommend the JPEG design proposed by TIFF Technical Note #2 -(Compression tag 7). Copies of this Note can be obtained from ftp.sgi.com or -from ftp://ftp.uu.net/graphics/jpeg/. It is expected that the next revision -of the TIFF spec will replace the 6.0 JPEG design with the Note's design. -Although IJG's own code does not support TIFF/JPEG, the free libtiff library -uses our library to implement TIFF/JPEG per the Note. libtiff is available -from ftp://ftp.sgi.com/graphics/tiff/. - - -ARCHIVE LOCATIONS -================= - -The "official" archive site for this software is ftp.uu.net (Internet -address 192.48.96.9). The most recent released version can always be found -there in directory graphics/jpeg. This particular version will be archived -as ftp://ftp.uu.net/graphics/jpeg/jpegsrc.v6b.tar.gz. If you don't have -direct Internet access, UUNET's archives are also available via UUCP; contact -help@uunet.uu.net for information on retrieving files that way. - -Numerous Internet sites maintain copies of the UUNET files. However, only -ftp.uu.net is guaranteed to have the latest official version. - -You can also obtain this software in DOS-compatible "zip" archive format from -the SimTel archives (ftp://ftp.simtel.net/pub/simtelnet/msdos/graphics/), or -on CompuServe in the Graphics Support forum (GO CIS:GRAPHSUP), library 12 -"JPEG Tools". Again, these versions may sometimes lag behind the ftp.uu.net -release. - -The JPEG FAQ (Frequently Asked Questions) article is a useful source of -general information about JPEG. It is updated constantly and therefore is -not included in this distribution. The FAQ is posted every two weeks to -Usenet newsgroups comp.graphics.misc, news.answers, and other groups. -It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/ -and other news.answers archive sites, including the official news.answers -archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. -If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu -with body - send usenet/news.answers/jpeg-faq/part1 - send usenet/news.answers/jpeg-faq/part2 - - -RELATED SOFTWARE -================ - -Numerous viewing and image manipulation programs now support JPEG. (Quite a -few of them use this library to do so.) The JPEG FAQ described above lists -some of the more popular free and shareware viewers, and tells where to -obtain them on Internet. - -If you are on a Unix machine, we highly recommend Jef Poskanzer's free -PBMPLUS software, which provides many useful operations on PPM-format image -files. In particular, it can convert PPM images to and from a wide range of -other formats, thus making cjpeg/djpeg considerably more useful. The latest -version is distributed by the NetPBM group, and is available from numerous -sites, notably ftp://wuarchive.wustl.edu/graphics/graphics/packages/NetPBM/. -Unfortunately PBMPLUS/NETPBM is not nearly as portable as the IJG software is; -you are likely to have difficulty making it work on any non-Unix machine. - -A different free JPEG implementation, written by the PVRG group at Stanford, -is available from ftp://havefun.stanford.edu/pub/jpeg/. This program -is designed for research and experimentation rather than production use; -it is slower, harder to use, and less portable than the IJG code, but it -is easier to read and modify. Also, the PVRG code supports lossless JPEG, -which we do not. (On the other hand, it doesn't do progressive JPEG.) - - -FILE FORMAT WARS -================ - -Some JPEG programs produce files that are not compatible with our library. -The root of the problem is that the ISO JPEG committee failed to specify a -concrete file format. Some vendors "filled in the blanks" on their own, -creating proprietary formats that no one else could read. (For example, none -of the early commercial JPEG implementations for the Macintosh were able to -exchange compressed files.) - -The file format we have adopted is called JFIF (see REFERENCES). This format -has been agreed to by a number of major commercial JPEG vendors, and it has -become the de facto standard. JFIF is a minimal or "low end" representation. -We recommend the use of TIFF/JPEG (TIFF revision 6.0 as modified by TIFF -Technical Note #2) for "high end" applications that need to record a lot of -additional data about an image. TIFF/JPEG is fairly new and not yet widely -supported, unfortunately. - -The upcoming JPEG Part 3 standard defines a file format called SPIFF. -SPIFF is interoperable with JFIF, in the sense that most JFIF decoders should -be able to read the most common variant of SPIFF. SPIFF has some technical -advantages over JFIF, but its major claim to fame is simply that it is an -official standard rather than an informal one. At this point it is unclear -whether SPIFF will supersede JFIF or whether JFIF will remain the de-facto -standard. IJG intends to support SPIFF once the standard is frozen, but we -have not decided whether it should become our default output format or not. -(In any case, our decoder will remain capable of reading JFIF indefinitely.) - -Various proprietary file formats incorporating JPEG compression also exist. -We have little or no sympathy for the existence of these formats. Indeed, -one of the original reasons for developing this free software was to help -force convergence on common, open format standards for JPEG files. Don't -use a proprietary file format! - - -TO DO -===== - -The major thrust for v7 will probably be improvement of visual quality. -The current method for scaling the quantization tables is known not to be -very good at low Q values. We also intend to investigate block boundary -smoothing, "poor man's variable quantization", and other means of improving -quality-vs-file-size performance without sacrificing compatibility. - -In future versions, we are considering supporting some of the upcoming JPEG -Part 3 extensions --- principally, variable quantization and the SPIFF file -format. - -As always, speeding things up is of great interest. - -Please send bug reports, offers of help, etc. to jpeg-info@uunet.uu.net. diff --git a/code/jpeg-6b/ioq3-changes.diff b/code/jpeg-6b/ioq3-changes.diff deleted file mode 100644 index 1ce424bc..00000000 --- a/code/jpeg-6b/ioq3-changes.diff +++ /dev/null @@ -1,916 +0,0 @@ -diff -u -w /home/tma/sources/jpeg-6b/jcdctmgr.c ./jcdctmgr.c ---- /home/tma/sources/jpeg-6b/jcdctmgr.c 1996-01-13 19:15:12.000000000 +0000 -+++ ./jcdctmgr.c 2008-08-22 00:07:09.000000000 +0100 -@@ -57,7 +57,6 @@ - int ci, qtblno, i; - jpeg_component_info *compptr; - JQUANT_TBL * qtbl; -- DCTELEM * dtbl; - - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { -@@ -168,6 +167,8 @@ - } - - -+/* code/jpeg-6b/jcdctmgr.c:184: warning: ‘forward_DCT’ defined but not used */ -+#if 0 - /* - * Perform forward DCT on one or more blocks of a component. - * -@@ -262,6 +263,7 @@ - } - } - } -+#endif - - - #ifdef DCT_FLOAT_SUPPORTED -diff -u -w /home/tma/sources/jpeg-6b/jcmainct.c ./jcmainct.c ---- /home/tma/sources/jpeg-6b/jcmainct.c 1996-01-06 23:24:59.000000000 +0000 -+++ ./jcmainct.c 2008-08-22 00:10:21.000000000 +0100 -@@ -68,32 +68,32 @@ - METHODDEF(void) - start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode) - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - - /* Do nothing in raw-data mode. */ - if (cinfo->raw_data_in) - return; - -- main->cur_iMCU_row = 0; /* initialize counters */ -- main->rowgroup_ctr = 0; -- main->suspended = FALSE; -- main->pass_mode = pass_mode; /* save mode for use by process_data */ -+ jmain->cur_iMCU_row = 0; /* initialize counters */ -+ jmain->rowgroup_ctr = 0; -+ jmain->suspended = FALSE; -+ jmain->pass_mode = pass_mode; /* save mode for use by process_data */ - - switch (pass_mode) { - case JBUF_PASS_THRU: - #ifdef FULL_MAIN_BUFFER_SUPPORTED -- if (main->whole_image[0] != NULL) -+ if (jmain->whole_image[0] != NULL) - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); - #endif -- main->pub.process_data = process_data_simple_main; -+ jmain->pub.process_data = process_data_simple_main; - break; - #ifdef FULL_MAIN_BUFFER_SUPPORTED - case JBUF_SAVE_SOURCE: - case JBUF_CRANK_DEST: - case JBUF_SAVE_AND_PASS: -- if (main->whole_image[0] == NULL) -+ if (jmain->whole_image[0] == NULL) - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); -- main->pub.process_data = process_data_buffer_main; -+ jmain->pub.process_data = process_data_buffer_main; - break; - #endif - default: -@@ -114,46 +114,46 @@ - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail) - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - -- while (main->cur_iMCU_row < cinfo->total_iMCU_rows) { -- /* Read input data if we haven't filled the main buffer yet */ -- if (main->rowgroup_ctr < DCTSIZE) -+ while (jmain->cur_iMCU_row < cinfo->total_iMCU_rows) { -+ /* Read input data if we haven't filled the jmain buffer yet */ -+ if (jmain->rowgroup_ctr < DCTSIZE) - (*cinfo->prep->pre_process_data) (cinfo, - input_buf, in_row_ctr, in_rows_avail, -- main->buffer, &main->rowgroup_ctr, -+ jmain->buffer, &jmain->rowgroup_ctr, - (JDIMENSION) DCTSIZE); - - /* If we don't have a full iMCU row buffered, return to application for - * more data. Note that preprocessor will always pad to fill the iMCU row - * at the bottom of the image. - */ -- if (main->rowgroup_ctr != DCTSIZE) -+ if (jmain->rowgroup_ctr != DCTSIZE) - return; - - /* Send the completed row to the compressor */ -- if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) { -+ if (! (*cinfo->coef->compress_data) (cinfo, jmain->buffer)) { - /* If compressor did not consume the whole row, then we must need to - * suspend processing and return to the application. In this situation - * we pretend we didn't yet consume the last input row; otherwise, if - * it happened to be the last row of the image, the application would - * think we were done. - */ -- if (! main->suspended) { -+ if (! jmain->suspended) { - (*in_row_ctr)--; -- main->suspended = TRUE; -+ jmain->suspended = TRUE; - } - return; - } - /* We did finish the row. Undo our little suspension hack if a previous -- * call suspended; then mark the main buffer empty. -+ * call suspended; then mark the jmain buffer empty. - */ -- if (main->suspended) { -+ if (jmain->suspended) { - (*in_row_ctr)++; -- main->suspended = FALSE; -+ jmain->suspended = FALSE; - } -- main->rowgroup_ctr = 0; -- main->cur_iMCU_row++; -+ jmain->rowgroup_ctr = 0; -+ jmain->cur_iMCU_row++; - } - } - -@@ -170,25 +170,25 @@ - JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, - JDIMENSION in_rows_avail) - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - int ci; - jpeg_component_info *compptr; -- boolean writing = (main->pass_mode != JBUF_CRANK_DEST); -+ boolean writing = (jmain->pass_mode != JBUF_CRANK_DEST); - -- while (main->cur_iMCU_row < cinfo->total_iMCU_rows) { -+ while (jmain->cur_iMCU_row < cinfo->total_iMCU_rows) { - /* Realign the virtual buffers if at the start of an iMCU row. */ -- if (main->rowgroup_ctr == 0) { -+ if (jmain->rowgroup_ctr == 0) { - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { -- main->buffer[ci] = (*cinfo->mem->access_virt_sarray) -- ((j_common_ptr) cinfo, main->whole_image[ci], -- main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), -+ jmain->buffer[ci] = (*cinfo->mem->access_virt_sarray) -+ ((j_common_ptr) cinfo, jmain->whole_image[ci], -+ jmain->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing); - } - /* In a read pass, pretend we just read some source data. */ - if (! writing) { - *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE; -- main->rowgroup_ctr = DCTSIZE; -+ jmain->rowgroup_ctr = DCTSIZE; - } - } - -@@ -197,40 +197,40 @@ - if (writing) { - (*cinfo->prep->pre_process_data) (cinfo, - input_buf, in_row_ctr, in_rows_avail, -- main->buffer, &main->rowgroup_ctr, -+ jmain->buffer, &jmain->rowgroup_ctr, - (JDIMENSION) DCTSIZE); - /* Return to application if we need more data to fill the iMCU row. */ -- if (main->rowgroup_ctr < DCTSIZE) -+ if (jmain->rowgroup_ctr < DCTSIZE) - return; - } - - /* Emit data, unless this is a sink-only pass. */ -- if (main->pass_mode != JBUF_SAVE_SOURCE) { -- if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) { -+ if (jmain->pass_mode != JBUF_SAVE_SOURCE) { -+ if (! (*cinfo->coef->compress_data) (cinfo, jmain->buffer)) { - /* If compressor did not consume the whole row, then we must need to - * suspend processing and return to the application. In this situation - * we pretend we didn't yet consume the last input row; otherwise, if - * it happened to be the last row of the image, the application would - * think we were done. - */ -- if (! main->suspended) { -+ if (! jmain->suspended) { - (*in_row_ctr)--; -- main->suspended = TRUE; -+ jmain->suspended = TRUE; - } - return; - } - /* We did finish the row. Undo our little suspension hack if a previous -- * call suspended; then mark the main buffer empty. -+ * call suspended; then mark the jmain buffer empty. - */ -- if (main->suspended) { -+ if (jmain->suspended) { - (*in_row_ctr)++; -- main->suspended = FALSE; -+ jmain->suspended = FALSE; - } - } - - /* If get here, we are done with this iMCU row. Mark buffer empty. */ -- main->rowgroup_ctr = 0; -- main->cur_iMCU_row++; -+ jmain->rowgroup_ctr = 0; -+ jmain->cur_iMCU_row++; - } - } - -@@ -238,21 +238,21 @@ - - - /* -- * Initialize main buffer controller. -+ * Initialize jmain buffer controller. - */ - - GLOBAL(void) - jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer) - { -- my_main_ptr main; -+ my_main_ptr jmain; - int ci; - jpeg_component_info *compptr; - -- main = (my_main_ptr) -+ jmain = (my_main_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_main_controller)); -- cinfo->main = (struct jpeg_c_main_controller *) main; -- main->pub.start_pass = start_pass_main; -+ cinfo->main = (struct jpeg_c_main_controller *) jmain; -+ jmain->pub.start_pass = start_pass_main; - - /* We don't need to create a buffer in raw-data mode. */ - if (cinfo->raw_data_in) -@@ -267,7 +267,7 @@ - /* Note we pad the bottom to a multiple of the iMCU height */ - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { -- main->whole_image[ci] = (*cinfo->mem->request_virt_sarray) -+ jmain->whole_image[ci] = (*cinfo->mem->request_virt_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) jround_up((long) compptr->height_in_blocks, -@@ -279,12 +279,12 @@ - #endif - } else { - #ifdef FULL_MAIN_BUFFER_SUPPORTED -- main->whole_image[0] = NULL; /* flag for no virtual arrays */ -+ jmain->whole_image[0] = NULL; /* flag for no virtual arrays */ - #endif - /* Allocate a strip buffer for each component */ - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { -- main->buffer[ci] = (*cinfo->mem->alloc_sarray) -+ jmain->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); -diff -u -w /home/tma/sources/jpeg-6b/jdatasrc.c ./jdatasrc.c ---- /home/tma/sources/jpeg-6b/jdatasrc.c 1996-01-06 23:26:42.000000000 +0000 -+++ ./jdatasrc.c 2008-08-22 00:00:59.000000000 +0100 -@@ -19,13 +19,17 @@ - #include "jpeglib.h" - #include "jerror.h" - -+#ifndef MIN -+#define MIN(a, b) ((a)<(b)?(a):(b)) -+#endif - - /* Expanded data source object for stdio input */ - - typedef struct { - struct jpeg_source_mgr pub; /* public fields */ - -- FILE * infile; /* source stream */ -+ unsigned char *inbuf; /* source stream */ -+ size_t inbufbytes; - JOCTET * buffer; /* start of buffer */ - boolean start_of_file; /* have we gotten any data yet? */ - } my_source_mgr; -@@ -90,18 +94,19 @@ - fill_input_buffer (j_decompress_ptr cinfo) - { - my_src_ptr src = (my_src_ptr) cinfo->src; -- size_t nbytes; -- -- nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE); -+ size_t nbytes = MIN(src->inbufbytes, INPUT_BUF_SIZE); - - if (nbytes <= 0) { -- if (src->start_of_file) /* Treat empty input file as fatal error */ -- ERREXIT(cinfo, JERR_INPUT_EMPTY); - WARNMS(cinfo, JWRN_JPEG_EOF); - /* Insert a fake EOI marker */ - src->buffer[0] = (JOCTET) 0xFF; - src->buffer[1] = (JOCTET) JPEG_EOI; - nbytes = 2; -+ } else { -+ memcpy( src->buffer, src->inbuf, nbytes); -+ -+ src->inbuf += nbytes; -+ src->inbufbytes -= nbytes; - } - - src->pub.next_input_byte = src->buffer; -@@ -179,7 +184,7 @@ - */ - - GLOBAL(void) --jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile) -+jpeg_mem_src (j_decompress_ptr cinfo, unsigned char *inbuf, size_t size) - { - my_src_ptr src; - -@@ -206,7 +211,8 @@ - src->pub.skip_input_data = skip_input_data; - src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */ - src->pub.term_source = term_source; -- src->infile = infile; -+ src->inbuf = inbuf; -+ src->inbufbytes = size; - src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */ - src->pub.next_input_byte = NULL; /* until buffer loaded */ - } -diff -u -w /home/tma/sources/jpeg-6b/jdmainct.c ./jdmainct.c ---- /home/tma/sources/jpeg-6b/jdmainct.c 1996-01-06 23:27:17.000000000 +0000 -+++ ./jdmainct.c 2008-08-22 00:13:48.000000000 +0100 -@@ -159,7 +159,7 @@ - * This is done only once, not once per pass. - */ - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - int ci, rgroup; - int M = cinfo->min_DCT_scaled_size; - jpeg_component_info *compptr; -@@ -168,10 +168,10 @@ - /* Get top-level space for component array pointers. - * We alloc both arrays with one call to save a few cycles. - */ -- main->xbuffer[0] = (JSAMPIMAGE) -+ jmain->xbuffer[0] = (JSAMPIMAGE) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components * 2 * SIZEOF(JSAMPARRAY)); -- main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components; -+ jmain->xbuffer[1] = jmain->xbuffer[0] + cinfo->num_components; - - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { -@@ -184,9 +184,9 @@ - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW)); - xbuf += rgroup; /* want one row group at negative offsets */ -- main->xbuffer[0][ci] = xbuf; -+ jmain->xbuffer[0][ci] = xbuf; - xbuf += rgroup * (M + 4); -- main->xbuffer[1][ci] = xbuf; -+ jmain->xbuffer[1][ci] = xbuf; - } - } - -@@ -194,13 +194,13 @@ - LOCAL(void) - make_funny_pointers (j_decompress_ptr cinfo) - /* Create the funny pointer lists discussed in the comments above. -- * The actual workspace is already allocated (in main->buffer), -+ * The actual workspace is already allocated (in jmain->buffer), - * and the space for the pointer lists is allocated too. - * This routine just fills in the curiously ordered lists. - * This will be repeated at the beginning of each pass. - */ - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - int ci, i, rgroup; - int M = cinfo->min_DCT_scaled_size; - jpeg_component_info *compptr; -@@ -210,10 +210,10 @@ - ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; /* height of a row group of component */ -- xbuf0 = main->xbuffer[0][ci]; -- xbuf1 = main->xbuffer[1][ci]; -+ xbuf0 = jmain->xbuffer[0][ci]; -+ xbuf1 = jmain->xbuffer[1][ci]; - /* First copy the workspace pointers as-is */ -- buf = main->buffer[ci]; -+ buf = jmain->buffer[ci]; - for (i = 0; i < rgroup * (M + 2); i++) { - xbuf0[i] = xbuf1[i] = buf[i]; - } -@@ -240,7 +240,7 @@ - * This changes the pointer list state from top-of-image to the normal state. - */ - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - int ci, i, rgroup; - int M = cinfo->min_DCT_scaled_size; - jpeg_component_info *compptr; -@@ -250,8 +250,8 @@ - ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; /* height of a row group of component */ -- xbuf0 = main->xbuffer[0][ci]; -- xbuf1 = main->xbuffer[1][ci]; -+ xbuf0 = jmain->xbuffer[0][ci]; -+ xbuf1 = jmain->xbuffer[1][ci]; - for (i = 0; i < rgroup; i++) { - xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; - xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; -@@ -269,7 +269,7 @@ - * Also sets rowgroups_avail to indicate number of nondummy row groups in row. - */ - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - int ci, i, rgroup, iMCUheight, rows_left; - jpeg_component_info *compptr; - JSAMPARRAY xbuf; -@@ -286,12 +286,12 @@ - * so we need only do it once. - */ - if (ci == 0) { -- main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1); -+ jmain->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1); - } - /* Duplicate the last real sample row rgroup*2 times; this pads out the - * last partial rowgroup and ensures at least one full rowgroup of context. - */ -- xbuf = main->xbuffer[main->whichptr][ci]; -+ xbuf = jmain->xbuffer[jmain->whichptr][ci]; - for (i = 0; i < rgroup * 2; i++) { - xbuf[rows_left + i] = xbuf[rows_left-1]; - } -@@ -306,27 +306,27 @@ - METHODDEF(void) - start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - - switch (pass_mode) { - case JBUF_PASS_THRU: - if (cinfo->upsample->need_context_rows) { -- main->pub.process_data = process_data_context_main; -+ jmain->pub.process_data = process_data_context_main; - make_funny_pointers(cinfo); /* Create the xbuffer[] lists */ -- main->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ -- main->context_state = CTX_PREPARE_FOR_IMCU; -- main->iMCU_row_ctr = 0; -+ jmain->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ -+ jmain->context_state = CTX_PREPARE_FOR_IMCU; -+ jmain->iMCU_row_ctr = 0; - } else { - /* Simple case with no context needed */ -- main->pub.process_data = process_data_simple_main; -+ jmain->pub.process_data = process_data_simple_main; - } -- main->buffer_full = FALSE; /* Mark buffer empty */ -- main->rowgroup_ctr = 0; -+ jmain->buffer_full = FALSE; /* Mark buffer empty */ -+ jmain->rowgroup_ctr = 0; - break; - #ifdef QUANT_2PASS_SUPPORTED - case JBUF_CRANK_DEST: - /* For last pass of 2-pass quantization, just crank the postprocessor */ -- main->pub.process_data = process_data_crank_post; -+ jmain->pub.process_data = process_data_crank_post; - break; - #endif - default: -@@ -346,14 +346,14 @@ - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - JDIMENSION rowgroups_avail; - -- /* Read input data if we haven't filled the main buffer yet */ -- if (! main->buffer_full) { -- if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer)) -+ /* Read input data if we haven't filled the jmain buffer yet */ -+ if (! jmain->buffer_full) { -+ if (! (*cinfo->coef->decompress_data) (cinfo, jmain->buffer)) - return; /* suspension forced, can do nothing more */ -- main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ -+ jmain->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ - } - - /* There are always min_DCT_scaled_size row groups in an iMCU row. */ -@@ -364,14 +364,14 @@ - */ - - /* Feed the postprocessor */ -- (*cinfo->post->post_process_data) (cinfo, main->buffer, -- &main->rowgroup_ctr, rowgroups_avail, -+ (*cinfo->post->post_process_data) (cinfo, jmain->buffer, -+ &jmain->rowgroup_ctr, rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); - - /* Has postprocessor consumed all the data yet? If so, mark buffer empty */ -- if (main->rowgroup_ctr >= rowgroups_avail) { -- main->buffer_full = FALSE; -- main->rowgroup_ctr = 0; -+ if (jmain->rowgroup_ctr >= rowgroups_avail) { -+ jmain->buffer_full = FALSE; -+ jmain->rowgroup_ctr = 0; - } - } - -@@ -386,15 +386,15 @@ - JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, - JDIMENSION out_rows_avail) - { -- my_main_ptr main = (my_main_ptr) cinfo->main; -+ my_main_ptr jmain = (my_main_ptr) cinfo->main; - -- /* Read input data if we haven't filled the main buffer yet */ -- if (! main->buffer_full) { -+ /* Read input data if we haven't filled the jmain buffer yet */ -+ if (! jmain->buffer_full) { - if (! (*cinfo->coef->decompress_data) (cinfo, -- main->xbuffer[main->whichptr])) -+ jmain->xbuffer[jmain->whichptr])) - return; /* suspension forced, can do nothing more */ -- main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ -- main->iMCU_row_ctr++; /* count rows received */ -+ jmain->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ -+ jmain->iMCU_row_ctr++; /* count rows received */ - } - - /* Postprocessor typically will not swallow all the input data it is handed -@@ -402,47 +402,47 @@ - * to exit and restart. This switch lets us keep track of how far we got. - * Note that each case falls through to the next on successful completion. - */ -- switch (main->context_state) { -+ switch (jmain->context_state) { - case CTX_POSTPONED_ROW: - /* Call postprocessor using previously set pointers for postponed row */ -- (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr], -- &main->rowgroup_ctr, main->rowgroups_avail, -+ (*cinfo->post->post_process_data) (cinfo, jmain->xbuffer[jmain->whichptr], -+ &jmain->rowgroup_ctr, jmain->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); -- if (main->rowgroup_ctr < main->rowgroups_avail) -+ if (jmain->rowgroup_ctr < jmain->rowgroups_avail) - return; /* Need to suspend */ -- main->context_state = CTX_PREPARE_FOR_IMCU; -+ jmain->context_state = CTX_PREPARE_FOR_IMCU; - if (*out_row_ctr >= out_rows_avail) - return; /* Postprocessor exactly filled output buf */ - /*FALLTHROUGH*/ - case CTX_PREPARE_FOR_IMCU: - /* Prepare to process first M-1 row groups of this iMCU row */ -- main->rowgroup_ctr = 0; -- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1); -+ jmain->rowgroup_ctr = 0; -+ jmain->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1); - /* Check for bottom of image: if so, tweak pointers to "duplicate" - * the last sample row, and adjust rowgroups_avail to ignore padding rows. - */ -- if (main->iMCU_row_ctr == cinfo->total_iMCU_rows) -+ if (jmain->iMCU_row_ctr == cinfo->total_iMCU_rows) - set_bottom_pointers(cinfo); -- main->context_state = CTX_PROCESS_IMCU; -+ jmain->context_state = CTX_PROCESS_IMCU; - /*FALLTHROUGH*/ - case CTX_PROCESS_IMCU: - /* Call postprocessor using previously set pointers */ -- (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr], -- &main->rowgroup_ctr, main->rowgroups_avail, -+ (*cinfo->post->post_process_data) (cinfo, jmain->xbuffer[jmain->whichptr], -+ &jmain->rowgroup_ctr, jmain->rowgroups_avail, - output_buf, out_row_ctr, out_rows_avail); -- if (main->rowgroup_ctr < main->rowgroups_avail) -+ if (jmain->rowgroup_ctr < jmain->rowgroups_avail) - return; /* Need to suspend */ - /* After the first iMCU, change wraparound pointers to normal state */ -- if (main->iMCU_row_ctr == 1) -+ if (jmain->iMCU_row_ctr == 1) - set_wraparound_pointers(cinfo); - /* Prepare to load new iMCU row using other xbuffer list */ -- main->whichptr ^= 1; /* 0=>1 or 1=>0 */ -- main->buffer_full = FALSE; -+ jmain->whichptr ^= 1; /* 0=>1 or 1=>0 */ -+ jmain->buffer_full = FALSE; - /* Still need to process last row group of this iMCU row, */ - /* which is saved at index M+1 of the other xbuffer */ -- main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1); -- main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2); -- main->context_state = CTX_POSTPONED_ROW; -+ jmain->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1); -+ jmain->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2); -+ jmain->context_state = CTX_POSTPONED_ROW; - } - } - -@@ -469,21 +469,21 @@ - - - /* -- * Initialize main buffer controller. -+ * Initialize jmain buffer controller. - */ - - GLOBAL(void) - jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer) - { -- my_main_ptr main; -+ my_main_ptr jmain; - int ci, rgroup, ngroups; - jpeg_component_info *compptr; - -- main = (my_main_ptr) -+ jmain = (my_main_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(my_main_controller)); -- cinfo->main = (struct jpeg_d_main_controller *) main; -- main->pub.start_pass = start_pass_main; -+ cinfo->main = (struct jpeg_d_main_controller *) jmain; -+ jmain->pub.start_pass = start_pass_main; - - if (need_full_buffer) /* shouldn't happen */ - ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); -@@ -504,7 +504,7 @@ - ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; /* height of a row group of component */ -- main->buffer[ci] = (*cinfo->mem->alloc_sarray) -+ jmain->buffer[ci] = (*cinfo->mem->alloc_sarray) - ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * compptr->DCT_scaled_size, - (JDIMENSION) (rgroup * ngroups)); -diff -u -w /home/tma/sources/jpeg-6b/jerror.c ./jerror.c ---- /home/tma/sources/jpeg-6b/jerror.c 1998-02-22 01:03:15.000000000 +0000 -+++ ./jerror.c 2008-08-21 23:58:36.000000000 +0100 -@@ -18,6 +18,8 @@ - * These routines are used by both the compression and decompression code. - */ - -+#include "../renderer/tr_local.h" -+ - /* this is not a core library module, so it doesn't define JPEG_INTERNALS */ - #include "jinclude.h" - #include "jpeglib.h" -@@ -69,13 +71,15 @@ - METHODDEF(void) - error_exit (j_common_ptr cinfo) - { -- /* Always display the message */ -- (*cinfo->err->output_message) (cinfo); -+ char buffer[JMSG_LENGTH_MAX]; -+ -+ /* Create the message */ -+ (*cinfo->err->format_message) (cinfo, buffer); - - /* Let the memory manager delete any temp files before we die */ - jpeg_destroy(cinfo); - -- exit(EXIT_FAILURE); -+ ri.Error( ERR_FATAL, "%s\n", buffer ); - } - - -@@ -108,7 +112,7 @@ - MB_OK | MB_ICONERROR); - #else - /* Send it to stderr, adding a newline */ -- fprintf(stderr, "%s\n", buffer); -+ ri.Printf(PRINT_ALL, "%s\n", buffer); - #endif - } - -diff -u -w /home/tma/sources/jpeg-6b/jinclude.h ./jinclude.h ---- /home/tma/sources/jpeg-6b/jinclude.h 1994-04-01 21:29:31.000000000 +0100 -+++ ./jinclude.h 2008-08-21 23:58:36.000000000 +0100 -@@ -15,9 +15,34 @@ - */ - - -+#ifdef _MSC_VER -+ -+#pragma warning(disable : 4018) // signed/unsigned mismatch -+#pragma warning(disable : 4032) -+#pragma warning(disable : 4051) -+#pragma warning(disable : 4057) // slightly different base types -+#pragma warning(disable : 4100) // unreferenced formal parameter -+#pragma warning(disable : 4115) -+#pragma warning(disable : 4125) // decimal digit terminates octal escape sequence -+#pragma warning(disable : 4127) // conditional expression is constant -+#pragma warning(disable : 4136) -+#pragma warning(disable : 4152) // nonstandard extension, function/data pointer conversion in expression -+#pragma warning(disable : 4201) -+#pragma warning(disable : 4214) -+#pragma warning(disable : 4244) -+#pragma warning(disable : 4305) // truncation from const double to float -+#pragma warning(disable : 4310) // cast truncates constant value -+#pragma warning(disable: 4505) // unreferenced local function has been removed -+#pragma warning(disable : 4514) -+#pragma warning(disable : 4702) // unreachable code -+#pragma warning(disable : 4711) // selected for automatic inline expansion -+#pragma warning(disable : 4220) // varargs matches remaining parameters -+#pragma warning(disable : 4761) // integral size mismatch -+#endif -+ - /* Include auto-config file to find out which system include files we need. */ - --#include "jconfig.h" /* auto configuration options */ -+#include "../jpeg-6b/jconfig.h" /* auto configuration options */ - #define JCONFIG_INCLUDED /* so that jpeglib.h doesn't do it again */ - - /* -diff -u -w /home/tma/sources/jpeg-6b/jmemnobs.c ./jmemnobs.c ---- /home/tma/sources/jpeg-6b/jmemnobs.c 1996-01-06 23:31:18.000000000 +0000 -+++ ./jmemnobs.c 2008-08-21 23:58:36.000000000 +0100 -@@ -8,39 +8,35 @@ - * This file provides a really simple implementation of the system- - * dependent portion of the JPEG memory manager. This implementation - * assumes that no backing-store files are needed: all required space -- * can be obtained from malloc(). -+ * can be obtained from ri.Malloc(). - * This is very portable in the sense that it'll compile on almost anything, - * but you'd better have lots of main memory (or virtual memory) if you want - * to process big images. - * Note that the max_memory_to_use option is ignored by this implementation. - */ - -+#include "../renderer/tr_local.h" -+ - #define JPEG_INTERNALS - #include "jinclude.h" - #include "jpeglib.h" - #include "jmemsys.h" /* import the system-dependent declarations */ - --#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ --extern void * malloc JPP((size_t size)); --extern void free JPP((void *ptr)); --#endif -- -- - /* - * Memory allocation and freeing are controlled by the regular library -- * routines malloc() and free(). -+ * routines ri.Malloc() and ri.Free(). - */ - - GLOBAL(void *) - jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject) - { -- return (void *) malloc(sizeofobject); -+ return (void *) ri.Malloc(sizeofobject); - } - - GLOBAL(void) - jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) - { -- free(object); -+ ri.Free(object); - } - - -@@ -54,13 +50,13 @@ - GLOBAL(void FAR *) - jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) - { -- return (void FAR *) malloc(sizeofobject); -+ return (void FAR *) ri.Malloc(sizeofobject); - } - - GLOBAL(void) - jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject) - { -- free(object); -+ ri.Free(object); - } - - -diff -u -w /home/tma/sources/jpeg-6b/jmorecfg.h ./jmorecfg.h ---- /home/tma/sources/jpeg-6b/jmorecfg.h 1997-08-10 00:58:56.000000000 +0100 -+++ ./jmorecfg.h 2008-08-21 23:58:36.000000000 +0100 -@@ -157,7 +157,8 @@ - - /* INT32 must hold at least signed 32-bit values. */ - --#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ -+/* MinGW basetsd.h defines INT32 - don't redefine it */ -+#if !(defined __MINGW32__ && defined _BASETSD_H) - typedef long INT32; - #endif - -@@ -210,8 +211,10 @@ - */ - - #ifdef NEED_FAR_POINTERS -+#undef FAR - #define FAR far - #else -+#undef FAR - #define FAR - #endif - -@@ -223,9 +226,7 @@ - * Defining HAVE_BOOLEAN before including jpeglib.h should make it work. - */ - --#ifndef HAVE_BOOLEAN --typedef int boolean; --#endif -+typedef unsigned char boolean; - #ifndef FALSE /* in case these macros already exist */ - #define FALSE 0 /* values of boolean */ - #endif -@@ -260,8 +261,8 @@ - - /* Capability options common to encoder and decoder: */ - --#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ --#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ -+#undef DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ -+#undef DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ - #define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */ - - /* Encoder capability options: */ -@@ -283,15 +284,15 @@ - /* Decoder capability options: */ - - #undef D_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */ --#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ --#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ --#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ --#define BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */ --#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ -+#undef D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ -+#undef D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ -+#undef SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ -+#undef BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */ -+#undef IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ - #undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */ --#define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */ --#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ --#define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ -+#undef UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */ -+#undef QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ -+#undef QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ - - /* more capability options later, no doubt */ - -@@ -314,7 +315,7 @@ - #define RGB_RED 0 /* Offset of Red in an RGB scanline element */ - #define RGB_GREEN 1 /* Offset of Green */ - #define RGB_BLUE 2 /* Offset of Blue */ --#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ -+#define RGB_PIXELSIZE 4 /* JSAMPLEs per RGB scanline element */ - - - /* Definitions for speed-related optimizations. */ -diff -u -w /home/tma/sources/jpeg-6b/jpeglib.h ./jpeglib.h ---- /home/tma/sources/jpeg-6b/jpeglib.h 1998-02-21 19:48:14.000000000 +0000 -+++ ./jpeglib.h 2008-08-22 00:01:58.000000000 +0100 -@@ -21,9 +21,9 @@ - */ - - #ifndef JCONFIG_INCLUDED /* in case jinclude.h already did */ --#include "jconfig.h" /* widely used configuration options */ -+#include "../jpeg-6b/jconfig.h" /* widely used configuration options */ - #endif --#include "jmorecfg.h" /* seldom changed options */ -+#include "../jpeg-6b/jmorecfg.h" /* seldom changed options */ - - - /* Version ID for the JPEG library. -@@ -835,7 +835,7 @@ - #define jpeg_destroy_compress jDestCompress - #define jpeg_destroy_decompress jDestDecompress - #define jpeg_stdio_dest jStdDest --#define jpeg_stdio_src jStdSrc -+#define jpeg_mem_src jMemSrc - #define jpeg_set_defaults jSetDefaults - #define jpeg_set_colorspace jSetColorspace - #define jpeg_default_colorspace jDefColorspace -@@ -908,7 +908,7 @@ - /* Standard data source and destination managers: stdio streams. */ - /* Caller is responsible for opening the file before and closing after. */ - EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile)); --EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile)); -+EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo, unsigned char *inbuf, size_t size)); - - /* Default parameter setup for compression */ - EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo)); -@@ -1089,8 +1089,8 @@ - */ - - #ifdef JPEG_INTERNALS --#include "jpegint.h" /* fetch private declarations */ --#include "jerror.h" /* fetch error codes too */ -+#include "../jpeg-6b/jpegint.h" /* fetch private declarations */ -+#include "../jpeg-6b/jerror.h" /* fetch error codes too */ - #endif - - #endif /* JPEGLIB_H */ diff --git a/code/jpeg-6b/jchuff.c b/code/jpeg-6b/jchuff.c deleted file mode 100644 index f2352505..00000000 --- a/code/jpeg-6b/jchuff.c +++ /dev/null @@ -1,909 +0,0 @@ -/* - * jchuff.c - * - * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains Huffman entropy encoding routines. - * - * Much of the complexity here has to do with supporting output suspension. - * If the data destination module demands suspension, we want to be able to - * back up to the start of the current MCU. To do this, we copy state - * variables into local working storage, and update them back to the - * permanent JPEG objects only upon successful completion of an MCU. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jchuff.h" /* Declarations shared with jcphuff.c */ - - -/* Expanded entropy encoder object for Huffman encoding. - * - * The savable_state subrecord contains fields that change within an MCU, - * but must not be updated permanently until we complete the MCU. - */ - -typedef struct { - INT32 put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ -} savable_state; - -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. - */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest,src) ((dest) = (src)) -#else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest,src) \ - ((dest).put_buffer = (src).put_buffer, \ - (dest).put_bits = (src).put_bits, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) -#endif -#endif - - -typedef struct { - struct jpeg_entropy_encoder pub; /* public fields */ - - savable_state saved; /* Bit buffer & DC state at start of MCU */ - - /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ - - /* Pointers to derived tables (these workspaces have image lifespan) */ - c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; - c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; - -#ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ - long * dc_count_ptrs[NUM_HUFF_TBLS]; - long * ac_count_ptrs[NUM_HUFF_TBLS]; -#endif -} huff_entropy_encoder; - -typedef huff_entropy_encoder * huff_entropy_ptr; - -/* Working state while writing an MCU. - * This struct contains all the fields that are needed by subroutines. - */ - -typedef struct { - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - savable_state cur; /* Current bit buffer & DC state */ - j_compress_ptr cinfo; /* dump_buffer needs access to this */ -} working_state; - - -/* Forward declarations */ -METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo)); -#ifdef ENTROPY_OPT_SUPPORTED -METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo)); -#endif - - -/* - * Initialize for a Huffman-compressed scan. - * If gather_statistics is TRUE, we do not output anything during the scan, - * just count the Huffman symbols used and generate Huffman code tables. - */ - -METHODDEF(void) -start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - int ci, dctbl, actbl; - jpeg_component_info * compptr; - - if (gather_statistics) { -#ifdef ENTROPY_OPT_SUPPORTED - entropy->pub.encode_mcu = encode_mcu_gather; - entropy->pub.finish_pass = finish_pass_gather; -#else - ERREXIT(cinfo, JERR_NOT_COMPILED); -#endif - } else { - entropy->pub.encode_mcu = encode_mcu_huff; - entropy->pub.finish_pass = finish_pass_huff; - } - - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - dctbl = compptr->dc_tbl_no; - actbl = compptr->ac_tbl_no; - if (gather_statistics) { -#ifdef ENTROPY_OPT_SUPPORTED - /* Check for invalid table indexes */ - /* (make_c_derived_tbl does this in the other path) */ - if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); - if (actbl < 0 || actbl >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); - /* Allocate and zero the statistics tables */ - /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ - if (entropy->dc_count_ptrs[dctbl] == NULL) - entropy->dc_count_ptrs[dctbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long)); - if (entropy->ac_count_ptrs[actbl] == NULL) - entropy->ac_count_ptrs[actbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long)); -#endif - } else { - /* Compute derived values for Huffman tables */ - /* We may do this more than once for a table, but it's not expensive */ - jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, - & entropy->dc_derived_tbls[dctbl]); - jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, - & entropy->ac_derived_tbls[actbl]); - } - /* Initialize DC predictions to 0 */ - entropy->saved.last_dc_val[ci] = 0; - } - - /* Initialize bit buffer to empty */ - entropy->saved.put_buffer = 0; - entropy->saved.put_bits = 0; - - /* Initialize restart stuff */ - entropy->restarts_to_go = cinfo->restart_interval; - entropy->next_restart_num = 0; -} - - -/* - * Compute the derived values for a Huffman table. - * This routine also performs some validation checks on the table. - * - * Note this is also used by jcphuff.c. - */ - -GLOBAL(void) -jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl ** pdtbl) -{ - JHUFF_TBL *htbl; - c_derived_tbl *dtbl; - int p, i, l, lastp, si, maxsymbol; - char huffsize[257]; - unsigned int huffcode[257]; - unsigned int code; - - /* Note that huffsize[] and huffcode[] are filled in code-length order, - * paralleling the order of the symbols themselves in htbl->huffval[]. - */ - - /* Find the input Huffman table */ - if (tblno < 0 || tblno >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); - htbl = - isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; - if (htbl == NULL) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); - - /* Allocate a workspace if we haven't already done so. */ - if (*pdtbl == NULL) - *pdtbl = (c_derived_tbl *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(c_derived_tbl)); - dtbl = *pdtbl; - - /* Figure C.1: make table of Huffman code length for each symbol */ - - p = 0; - for (l = 1; l <= 16; l++) { - i = (int) htbl->bits[l]; - if (i < 0 || p + i > 256) /* protect against table overrun */ - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - while (i--) - huffsize[p++] = (char) l; - } - huffsize[p] = 0; - lastp = p; - - /* Figure C.2: generate the codes themselves */ - /* We also validate that the counts represent a legal Huffman code tree. */ - - code = 0; - si = huffsize[0]; - p = 0; - while (huffsize[p]) { - while (((int) huffsize[p]) == si) { - huffcode[p++] = code; - code++; - } - /* code is now 1 more than the last code used for codelength si; but - * it must still fit in si bits, since no code is allowed to be all ones. - */ - if (((INT32) code) >= (((INT32) 1) << si)) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - code <<= 1; - si++; - } - - /* Figure C.3: generate encoding tables */ - /* These are code and size indexed by symbol value */ - - /* Set all codeless symbols to have code length 0; - * this lets us detect duplicate VAL entries here, and later - * allows emit_bits to detect any attempt to emit such symbols. - */ - MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi)); - - /* This is also a convenient place to check for out-of-range - * and duplicated VAL entries. We allow 0..255 for AC symbols - * but only 0..15 for DC. (We could constrain them further - * based on data depth and mode, but this seems enough.) - */ - maxsymbol = isDC ? 15 : 255; - - for (p = 0; p < lastp; p++) { - i = htbl->huffval[p]; - if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - dtbl->ehufco[i] = huffcode[p]; - dtbl->ehufsi[i] = huffsize[p]; - } -} - - -/* Outputting bytes to the file */ - -/* Emit a byte, taking 'action' if must suspend. */ -#define emit_byte(state,val,action) \ - { *(state)->next_output_byte++ = (JOCTET) (val); \ - if (--(state)->free_in_buffer == 0) \ - if (! dump_buffer(state)) \ - { action; } } - - -LOCAL(boolean) -dump_buffer (working_state * state) -/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ -{ - struct jpeg_destination_mgr * dest = state->cinfo->dest; - - if (! (*dest->empty_output_buffer) (state->cinfo)) - return FALSE; - /* After a successful buffer dump, must reset buffer pointers */ - state->next_output_byte = dest->next_output_byte; - state->free_in_buffer = dest->free_in_buffer; - return TRUE; -} - - -/* Outputting bits to the file */ - -/* Only the right 24 bits of put_buffer are used; the valid bits are - * left-justified in this part. At most 16 bits can be passed to emit_bits - * in one call, and we never retain more than 7 bits in put_buffer - * between calls, so 24 bits are sufficient. - */ - -INLINE -LOCAL(boolean) -emit_bits (working_state * state, unsigned int code, int size) -/* Emit some bits; return TRUE if successful, FALSE if must suspend */ -{ - /* This routine is heavily used, so it's worth coding tightly. */ - register INT32 put_buffer = (INT32) code; - register int put_bits = state->cur.put_bits; - - /* if size is 0, caller used an invalid Huffman table entry */ - if (size == 0) - ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE); - - put_buffer &= (((INT32) 1)<cur.put_buffer; /* and merge with old buffer contents */ - - while (put_bits >= 8) { - int c = (int) ((put_buffer >> 16) & 0xFF); - - emit_byte(state, c, return FALSE); - if (c == 0xFF) { /* need to stuff a zero byte? */ - emit_byte(state, 0, return FALSE); - } - put_buffer <<= 8; - put_bits -= 8; - } - - state->cur.put_buffer = put_buffer; /* update state variables */ - state->cur.put_bits = put_bits; - - return TRUE; -} - - -LOCAL(boolean) -flush_bits (working_state * state) -{ - if (! emit_bits(state, 0x7F, 7)) /* fill any partial byte with ones */ - return FALSE; - state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ - state->cur.put_bits = 0; - return TRUE; -} - - -/* Encode a single block's worth of coefficients */ - -LOCAL(boolean) -encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val, - c_derived_tbl *dctbl, c_derived_tbl *actbl) -{ - register int temp, temp2; - register int nbits; - register int k, r, i; - - /* Encode the DC coefficient difference per section F.1.2.1 */ - - temp = temp2 = block[0] - last_dc_val; - - if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - /* For a negative input, want temp2 = bitwise complement of abs(input) */ - /* This code assumes we are on a two's complement machine */ - temp2--; - } - - /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 0; - while (temp) { - nbits++; - temp >>= 1; - } - /* Check for out-of-range coefficient values. - * Since we're encoding a difference, the range limit is twice as much. - */ - if (nbits > MAX_COEF_BITS+1) - ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); - - /* Emit the Huffman-coded symbol for the number of bits */ - if (! emit_bits(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits])) - return FALSE; - - /* Emit that number of bits of the value, if positive, */ - /* or the complement of its magnitude, if negative. */ - if (nbits) /* emit_bits rejects calls with size 0 */ - if (! emit_bits(state, (unsigned int) temp2, nbits)) - return FALSE; - - /* Encode the AC coefficients per section F.1.2.2 */ - - r = 0; /* r = run length of zeros */ - - for (k = 1; k < DCTSIZE2; k++) { - if ((temp = block[jpeg_natural_order[k]]) == 0) { - r++; - } else { - /* if run length > 15, must emit special run-length-16 codes (0xF0) */ - while (r > 15) { - if (! emit_bits(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0])) - return FALSE; - r -= 16; - } - - temp2 = temp; - if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - /* This code assumes we are on a two's complement machine */ - temp2--; - } - - /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ - while ((temp >>= 1)) - nbits++; - /* Check for out-of-range coefficient values */ - if (nbits > MAX_COEF_BITS) - ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); - - /* Emit Huffman symbol for run length / number of bits */ - i = (r << 4) + nbits; - if (! emit_bits(state, actbl->ehufco[i], actbl->ehufsi[i])) - return FALSE; - - /* Emit that number of bits of the value, if positive, */ - /* or the complement of its magnitude, if negative. */ - if (! emit_bits(state, (unsigned int) temp2, nbits)) - return FALSE; - - r = 0; - } - } - - /* If the last coef(s) were zero, emit an end-of-block code */ - if (r > 0) - if (! emit_bits(state, actbl->ehufco[0], actbl->ehufsi[0])) - return FALSE; - - return TRUE; -} - - -/* - * Emit a restart marker & resynchronize predictions. - */ - -LOCAL(boolean) -emit_restart (working_state * state, int restart_num) -{ - int ci; - - if (! flush_bits(state)) - return FALSE; - - emit_byte(state, 0xFF, return FALSE); - emit_byte(state, JPEG_RST0 + restart_num, return FALSE); - - /* Re-initialize DC predictions to 0 */ - for (ci = 0; ci < state->cinfo->comps_in_scan; ci++) - state->cur.last_dc_val[ci] = 0; - - /* The restart counter is not updated until we successfully write the MCU. */ - - return TRUE; -} - - -/* - * Encode and output one MCU's worth of Huffman-compressed coefficients. - */ - -METHODDEF(boolean) -encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - working_state state; - int blkn, ci; - jpeg_component_info * compptr; - - /* Load up working state */ - state.next_output_byte = cinfo->dest->next_output_byte; - state.free_in_buffer = cinfo->dest->free_in_buffer; - ASSIGN_STATE(state.cur, entropy->saved); - state.cinfo = cinfo; - - /* Emit restart marker if needed */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) - if (! emit_restart(&state, entropy->next_restart_num)) - return FALSE; - } - - /* Encode the MCU data blocks */ - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - if (! encode_one_block(&state, - MCU_data[blkn][0], state.cur.last_dc_val[ci], - entropy->dc_derived_tbls[compptr->dc_tbl_no], - entropy->ac_derived_tbls[compptr->ac_tbl_no])) - return FALSE; - /* Update last_dc_val */ - state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; - } - - /* Completed MCU, so update state */ - cinfo->dest->next_output_byte = state.next_output_byte; - cinfo->dest->free_in_buffer = state.free_in_buffer; - ASSIGN_STATE(entropy->saved, state.cur); - - /* Update restart-interval state too */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) { - entropy->restarts_to_go = cinfo->restart_interval; - entropy->next_restart_num++; - entropy->next_restart_num &= 7; - } - entropy->restarts_to_go--; - } - - return TRUE; -} - - -/* - * Finish up at the end of a Huffman-compressed scan. - */ - -METHODDEF(void) -finish_pass_huff (j_compress_ptr cinfo) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - working_state state; - - /* Load up working state ... flush_bits needs it */ - state.next_output_byte = cinfo->dest->next_output_byte; - state.free_in_buffer = cinfo->dest->free_in_buffer; - ASSIGN_STATE(state.cur, entropy->saved); - state.cinfo = cinfo; - - /* Flush out the last data */ - if (! flush_bits(&state)) - ERREXIT(cinfo, JERR_CANT_SUSPEND); - - /* Update state */ - cinfo->dest->next_output_byte = state.next_output_byte; - cinfo->dest->free_in_buffer = state.free_in_buffer; - ASSIGN_STATE(entropy->saved, state.cur); -} - - -/* - * Huffman coding optimization. - * - * We first scan the supplied data and count the number of uses of each symbol - * that is to be Huffman-coded. (This process MUST agree with the code above.) - * Then we build a Huffman coding tree for the observed counts. - * Symbols which are not needed at all for the particular image are not - * assigned any code, which saves space in the DHT marker as well as in - * the compressed data. - */ - -#ifdef ENTROPY_OPT_SUPPORTED - - -/* Process a single block's worth of coefficients */ - -LOCAL(void) -htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, - long dc_counts[], long ac_counts[]) -{ - register int temp; - register int nbits; - register int k, r; - - /* Encode the DC coefficient difference per section F.1.2.1 */ - - temp = block[0] - last_dc_val; - if (temp < 0) - temp = -temp; - - /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 0; - while (temp) { - nbits++; - temp >>= 1; - } - /* Check for out-of-range coefficient values. - * Since we're encoding a difference, the range limit is twice as much. - */ - if (nbits > MAX_COEF_BITS+1) - ERREXIT(cinfo, JERR_BAD_DCT_COEF); - - /* Count the Huffman symbol for the number of bits */ - dc_counts[nbits]++; - - /* Encode the AC coefficients per section F.1.2.2 */ - - r = 0; /* r = run length of zeros */ - - for (k = 1; k < DCTSIZE2; k++) { - if ((temp = block[jpeg_natural_order[k]]) == 0) { - r++; - } else { - /* if run length > 15, must emit special run-length-16 codes (0xF0) */ - while (r > 15) { - ac_counts[0xF0]++; - r -= 16; - } - - /* Find the number of bits needed for the magnitude of the coefficient */ - if (temp < 0) - temp = -temp; - - /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ - while ((temp >>= 1)) - nbits++; - /* Check for out-of-range coefficient values */ - if (nbits > MAX_COEF_BITS) - ERREXIT(cinfo, JERR_BAD_DCT_COEF); - - /* Count Huffman symbol for run length / number of bits */ - ac_counts[(r << 4) + nbits]++; - - r = 0; - } - } - - /* If the last coef(s) were zero, emit an end-of-block code */ - if (r > 0) - ac_counts[0]++; -} - - -/* - * Trial-encode one MCU's worth of Huffman-compressed coefficients. - * No data is actually output, so no suspension return is possible. - */ - -METHODDEF(boolean) -encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - int blkn, ci; - jpeg_component_info * compptr; - - /* Take care of restart intervals if needed */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) { - /* Re-initialize DC predictions to 0 */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) - entropy->saved.last_dc_val[ci] = 0; - /* Update restart state */ - entropy->restarts_to_go = cinfo->restart_interval; - } - entropy->restarts_to_go--; - } - - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci], - entropy->dc_count_ptrs[compptr->dc_tbl_no], - entropy->ac_count_ptrs[compptr->ac_tbl_no]); - entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0]; - } - - return TRUE; -} - - -/* - * Generate the best Huffman code table for the given counts, fill htbl. - * Note this is also used by jcphuff.c. - * - * The JPEG standard requires that no symbol be assigned a codeword of all - * one bits (so that padding bits added at the end of a compressed segment - * can't look like a valid code). Because of the canonical ordering of - * codewords, this just means that there must be an unused slot in the - * longest codeword length category. Section K.2 of the JPEG spec suggests - * reserving such a slot by pretending that symbol 256 is a valid symbol - * with count 1. In theory that's not optimal; giving it count zero but - * including it in the symbol set anyway should give a better Huffman code. - * But the theoretically better code actually seems to come out worse in - * practice, because it produces more all-ones bytes (which incur stuffed - * zero bytes in the final file). In any case the difference is tiny. - * - * The JPEG standard requires Huffman codes to be no more than 16 bits long. - * If some symbols have a very small but nonzero probability, the Huffman tree - * must be adjusted to meet the code length restriction. We currently use - * the adjustment method suggested in JPEG section K.2. This method is *not* - * optimal; it may not choose the best possible limited-length code. But - * typically only very-low-frequency symbols will be given less-than-optimal - * lengths, so the code is almost optimal. Experimental comparisons against - * an optimal limited-length-code algorithm indicate that the difference is - * microscopic --- usually less than a hundredth of a percent of total size. - * So the extra complexity of an optimal algorithm doesn't seem worthwhile. - */ - -GLOBAL(void) -jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]) -{ -#define MAX_CLEN 32 /* assumed maximum initial code length */ - UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ - int codesize[257]; /* codesize[k] = code length of symbol k */ - int others[257]; /* next symbol in current branch of tree */ - int c1, c2; - int p, i, j; - long v; - - /* This algorithm is explained in section K.2 of the JPEG standard */ - - MEMZERO(bits, SIZEOF(bits)); - MEMZERO(codesize, SIZEOF(codesize)); - for (i = 0; i < 257; i++) - others[i] = -1; /* init links to empty */ - - freq[256] = 1; /* make sure 256 has a nonzero count */ - /* Including the pseudo-symbol 256 in the Huffman procedure guarantees - * that no real symbol is given code-value of all ones, because 256 - * will be placed last in the largest codeword category. - */ - - /* Huffman's basic algorithm to assign optimal code lengths to symbols */ - - for (;;) { - /* Find the smallest nonzero frequency, set c1 = its symbol */ - /* In case of ties, take the larger symbol number */ - c1 = -1; - v = 1000000000L; - for (i = 0; i <= 256; i++) { - if (freq[i] && freq[i] <= v) { - v = freq[i]; - c1 = i; - } - } - - /* Find the next smallest nonzero frequency, set c2 = its symbol */ - /* In case of ties, take the larger symbol number */ - c2 = -1; - v = 1000000000L; - for (i = 0; i <= 256; i++) { - if (freq[i] && freq[i] <= v && i != c1) { - v = freq[i]; - c2 = i; - } - } - - /* Done if we've merged everything into one frequency */ - if (c2 < 0) - break; - - /* Else merge the two counts/trees */ - freq[c1] += freq[c2]; - freq[c2] = 0; - - /* Increment the codesize of everything in c1's tree branch */ - codesize[c1]++; - while (others[c1] >= 0) { - c1 = others[c1]; - codesize[c1]++; - } - - others[c1] = c2; /* chain c2 onto c1's tree branch */ - - /* Increment the codesize of everything in c2's tree branch */ - codesize[c2]++; - while (others[c2] >= 0) { - c2 = others[c2]; - codesize[c2]++; - } - } - - /* Now count the number of symbols of each code length */ - for (i = 0; i <= 256; i++) { - if (codesize[i]) { - /* The JPEG standard seems to think that this can't happen, */ - /* but I'm paranoid... */ - if (codesize[i] > MAX_CLEN) - ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); - - bits[codesize[i]]++; - } - } - - /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure - * Huffman procedure assigned any such lengths, we must adjust the coding. - * Here is what the JPEG spec says about how this next bit works: - * Since symbols are paired for the longest Huffman code, the symbols are - * removed from this length category two at a time. The prefix for the pair - * (which is one bit shorter) is allocated to one of the pair; then, - * skipping the BITS entry for that prefix length, a code word from the next - * shortest nonzero BITS entry is converted into a prefix for two code words - * one bit longer. - */ - - for (i = MAX_CLEN; i > 16; i--) { - while (bits[i] > 0) { - j = i - 2; /* find length of new prefix to be used */ - while (bits[j] == 0) - j--; - - bits[i] -= 2; /* remove two symbols */ - bits[i-1]++; /* one goes in this length */ - bits[j+1] += 2; /* two new symbols in this length */ - bits[j]--; /* symbol of this length is now a prefix */ - } - } - - /* Remove the count for the pseudo-symbol 256 from the largest codelength */ - while (bits[i] == 0) /* find largest codelength still in use */ - i--; - bits[i]--; - - /* Return final symbol counts (only for lengths 0..16) */ - MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits)); - - /* Return a list of the symbols sorted by code length */ - /* It's not real clear to me why we don't need to consider the codelength - * changes made above, but the JPEG spec seems to think this works. - */ - p = 0; - for (i = 1; i <= MAX_CLEN; i++) { - for (j = 0; j <= 255; j++) { - if (codesize[j] == i) { - htbl->huffval[p] = (UINT8) j; - p++; - } - } - } - - /* Set sent_table FALSE so updated table will be written to JPEG file. */ - htbl->sent_table = FALSE; -} - - -/* - * Finish up a statistics-gathering pass and create the new Huffman tables. - */ - -METHODDEF(void) -finish_pass_gather (j_compress_ptr cinfo) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - int ci, dctbl, actbl; - jpeg_component_info * compptr; - JHUFF_TBL **htblptr; - boolean did_dc[NUM_HUFF_TBLS]; - boolean did_ac[NUM_HUFF_TBLS]; - - /* It's important not to apply jpeg_gen_optimal_table more than once - * per table, because it clobbers the input frequency counts! - */ - MEMZERO(did_dc, SIZEOF(did_dc)); - MEMZERO(did_ac, SIZEOF(did_ac)); - - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - dctbl = compptr->dc_tbl_no; - actbl = compptr->ac_tbl_no; - if (! did_dc[dctbl]) { - htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl]; - if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); - jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]); - did_dc[dctbl] = TRUE; - } - if (! did_ac[actbl]) { - htblptr = & cinfo->ac_huff_tbl_ptrs[actbl]; - if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); - jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]); - did_ac[actbl] = TRUE; - } - } -} - - -#endif /* ENTROPY_OPT_SUPPORTED */ - - -/* - * Module initialization routine for Huffman entropy encoding. - */ - -GLOBAL(void) -jinit_huff_encoder (j_compress_ptr cinfo) -{ - huff_entropy_ptr entropy; - int i; - - entropy = (huff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(huff_entropy_encoder)); - cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; - entropy->pub.start_pass = start_pass_huff; - - /* Mark tables unallocated */ - for (i = 0; i < NUM_HUFF_TBLS; i++) { - entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL; -#ifdef ENTROPY_OPT_SUPPORTED - entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL; -#endif - } -} diff --git a/code/jpeg-6b/jchuff.h b/code/jpeg-6b/jchuff.h deleted file mode 100644 index a9599fc1..00000000 --- a/code/jpeg-6b/jchuff.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * jchuff.h - * - * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains declarations for Huffman entropy encoding routines - * that are shared between the sequential encoder (jchuff.c) and the - * progressive encoder (jcphuff.c). No other modules need to see these. - */ - -/* The legal range of a DCT coefficient is - * -1024 .. +1023 for 8-bit data; - * -16384 .. +16383 for 12-bit data. - * Hence the magnitude should always fit in 10 or 14 bits respectively. - */ - -#if BITS_IN_JSAMPLE == 8 -#define MAX_COEF_BITS 10 -#else -#define MAX_COEF_BITS 14 -#endif - -/* Derived data constructed for each Huffman table */ - -typedef struct { - unsigned int ehufco[256]; /* code for each symbol */ - char ehufsi[256]; /* length of code for each symbol */ - /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */ -} c_derived_tbl; - -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_make_c_derived_tbl jMkCDerived -#define jpeg_gen_optimal_table jGenOptTbl -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - -/* Expand a Huffman table definition into the derived format */ -EXTERN(void) jpeg_make_c_derived_tbl - JPP((j_compress_ptr cinfo, boolean isDC, int tblno, - c_derived_tbl ** pdtbl)); - -/* Generate an optimal table definition given the specified counts */ -EXTERN(void) jpeg_gen_optimal_table - JPP((j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[])); diff --git a/code/jpeg-6b/jconfig.h b/code/jpeg-6b/jconfig.h deleted file mode 100644 index 361586ee..00000000 --- a/code/jpeg-6b/jconfig.h +++ /dev/null @@ -1,40 +0,0 @@ -/* jconfig.wat --- jconfig.h for Watcom C/C++ on MS-DOS or OS/2. */ -/* see jconfig.doc for explanations */ - -#define HAVE_PROTOTYPES -#define HAVE_UNSIGNED_CHAR -#define HAVE_UNSIGNED_SHORT -/* #define void char */ -/* #define const */ -#define CHAR_IS_UNSIGNED -#define HAVE_STDDEF_H -#undef NEED_BSD_STRINGS -#undef NEED_SYS_TYPES_H -#undef NEED_FAR_POINTERS /* Watcom uses flat 32-bit addressing */ -#undef NEED_SHORT_EXTERNAL_NAMES -#undef INCOMPLETE_TYPES_BROKEN - -#define JDCT_DEFAULT JDCT_FLOAT -#define JDCT_FASTEST JDCT_FLOAT - -#ifdef JPEG_INTERNALS - -#undef RIGHT_SHIFT_IS_UNSIGNED - -#endif /* JPEG_INTERNALS */ - -#ifdef JPEG_CJPEG_DJPEG - -#define BMP_SUPPORTED /* BMP image file format */ -#define GIF_SUPPORTED /* GIF image file format */ -#define PPM_SUPPORTED /* PBMPLUS PPM/PGM image file format */ -#undef RLE_SUPPORTED /* Utah RLE image file format */ -#define TARGA_SUPPORTED /* Targa image file format */ - -#undef TWO_FILE_COMMANDLINE /* optional */ -#define USE_SETMODE /* Needed to make one-file style work in Watcom */ -#undef NEED_SIGNAL_CATCHER /* Define this if you use jmemname.c */ -#undef DONT_USE_B_MODE -#undef PROGRESS_REPORT /* optional */ - -#endif /* JPEG_CJPEG_DJPEG */ diff --git a/code/jpeg-6b/jcphuff.c b/code/jpeg-6b/jcphuff.c deleted file mode 100644 index 07f9178b..00000000 --- a/code/jpeg-6b/jcphuff.c +++ /dev/null @@ -1,833 +0,0 @@ -/* - * jcphuff.c - * - * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains Huffman entropy encoding routines for progressive JPEG. - * - * We do not support output suspension in this module, since the library - * currently does not allow multiple-scan files to be written with output - * suspension. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jchuff.h" /* Declarations shared with jchuff.c */ - -#ifdef C_PROGRESSIVE_SUPPORTED - -/* Expanded entropy encoder object for progressive Huffman encoding. */ - -typedef struct { - struct jpeg_entropy_encoder pub; /* public fields */ - - /* Mode flag: TRUE for optimization, FALSE for actual data output */ - boolean gather_statistics; - - /* Bit-level coding status. - * next_output_byte/free_in_buffer are local copies of cinfo->dest fields. - */ - JOCTET * next_output_byte; /* => next byte to write in buffer */ - size_t free_in_buffer; /* # of byte spaces remaining in buffer */ - INT32 put_buffer; /* current bit-accumulation buffer */ - int put_bits; /* # of bits now in it */ - j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ - - /* Coding status for DC components */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ - - /* Coding status for AC components */ - int ac_tbl_no; /* the table number of the single component */ - unsigned int EOBRUN; /* run length of EOBs */ - unsigned int BE; /* # of buffered correction bits before MCU */ - char * bit_buffer; /* buffer for correction bits (1 per char) */ - /* packing correction bits tightly would save some space but cost time... */ - - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - int next_restart_num; /* next restart number to write (0-7) */ - - /* Pointers to derived tables (these workspaces have image lifespan). - * Since any one scan codes only DC or only AC, we only need one set - * of tables, not one for DC and one for AC. - */ - c_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; - - /* Statistics tables for optimization; again, one set is enough */ - long * count_ptrs[NUM_HUFF_TBLS]; -} phuff_entropy_encoder; - -typedef phuff_entropy_encoder * phuff_entropy_ptr; - -/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit - * buffer can hold. Larger sizes may slightly improve compression, but - * 1000 is already well into the realm of overkill. - * The minimum safe size is 64 bits. - */ - -#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ - -/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32. - * We assume that int right shift is unsigned if INT32 right shift is, - * which should be safe. - */ - -#ifdef RIGHT_SHIFT_IS_UNSIGNED -#define ISHIFT_TEMPS int ishift_temp; -#define IRIGHT_SHIFT(x,shft) \ - ((ishift_temp = (x)) < 0 ? \ - (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ - (ishift_temp >> (shft))) -#else -#define ISHIFT_TEMPS -#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) -#endif - -/* Forward declarations */ -METHODDEF(boolean) encode_mcu_DC_first JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_AC_first JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_DC_refine JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) encode_mcu_AC_refine JPP((j_compress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(void) finish_pass_phuff JPP((j_compress_ptr cinfo)); -METHODDEF(void) finish_pass_gather_phuff JPP((j_compress_ptr cinfo)); - - -/* - * Initialize for a Huffman-compressed scan using progressive JPEG. - */ - -METHODDEF(void) -start_pass_phuff (j_compress_ptr cinfo, boolean gather_statistics) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - boolean is_DC_band; - int ci, tbl; - jpeg_component_info * compptr; - - entropy->cinfo = cinfo; - entropy->gather_statistics = gather_statistics; - - is_DC_band = (cinfo->Ss == 0); - - /* We assume jcmaster.c already validated the scan parameters. */ - - /* Select execution routines */ - if (cinfo->Ah == 0) { - if (is_DC_band) - entropy->pub.encode_mcu = encode_mcu_DC_first; - else - entropy->pub.encode_mcu = encode_mcu_AC_first; - } else { - if (is_DC_band) - entropy->pub.encode_mcu = encode_mcu_DC_refine; - else { - entropy->pub.encode_mcu = encode_mcu_AC_refine; - /* AC refinement needs a correction bit buffer */ - if (entropy->bit_buffer == NULL) - entropy->bit_buffer = (char *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - MAX_CORR_BITS * SIZEOF(char)); - } - } - if (gather_statistics) - entropy->pub.finish_pass = finish_pass_gather_phuff; - else - entropy->pub.finish_pass = finish_pass_phuff; - - /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1 - * for AC coefficients. - */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - /* Initialize DC predictions to 0 */ - entropy->last_dc_val[ci] = 0; - /* Get table index */ - if (is_DC_band) { - if (cinfo->Ah != 0) /* DC refinement needs no table */ - continue; - tbl = compptr->dc_tbl_no; - } else { - entropy->ac_tbl_no = tbl = compptr->ac_tbl_no; - } - if (gather_statistics) { - /* Check for invalid table index */ - /* (make_c_derived_tbl does this in the other path) */ - if (tbl < 0 || tbl >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl); - /* Allocate and zero the statistics tables */ - /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ - if (entropy->count_ptrs[tbl] == NULL) - entropy->count_ptrs[tbl] = (long *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - 257 * SIZEOF(long)); - MEMZERO(entropy->count_ptrs[tbl], 257 * SIZEOF(long)); - } else { - /* Compute derived values for Huffman table */ - /* We may do this more than once for a table, but it's not expensive */ - jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, - & entropy->derived_tbls[tbl]); - } - } - - /* Initialize AC stuff */ - entropy->EOBRUN = 0; - entropy->BE = 0; - - /* Initialize bit buffer to empty */ - entropy->put_buffer = 0; - entropy->put_bits = 0; - - /* Initialize restart stuff */ - entropy->restarts_to_go = cinfo->restart_interval; - entropy->next_restart_num = 0; -} - - -/* Outputting bytes to the file. - * NB: these must be called only when actually outputting, - * that is, entropy->gather_statistics == FALSE. - */ - -/* Emit a byte */ -#define emit_byte(entropy,val) \ - { *(entropy)->next_output_byte++ = (JOCTET) (val); \ - if (--(entropy)->free_in_buffer == 0) \ - dump_buffer(entropy); } - - -LOCAL(void) -dump_buffer (phuff_entropy_ptr entropy) -/* Empty the output buffer; we do not support suspension in this module. */ -{ - struct jpeg_destination_mgr * dest = entropy->cinfo->dest; - - if (! (*dest->empty_output_buffer) (entropy->cinfo)) - ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); - /* After a successful buffer dump, must reset buffer pointers */ - entropy->next_output_byte = dest->next_output_byte; - entropy->free_in_buffer = dest->free_in_buffer; -} - - -/* Outputting bits to the file */ - -/* Only the right 24 bits of put_buffer are used; the valid bits are - * left-justified in this part. At most 16 bits can be passed to emit_bits - * in one call, and we never retain more than 7 bits in put_buffer - * between calls, so 24 bits are sufficient. - */ - -INLINE -LOCAL(void) -emit_bits (phuff_entropy_ptr entropy, unsigned int code, int size) -/* Emit some bits, unless we are in gather mode */ -{ - /* This routine is heavily used, so it's worth coding tightly. */ - register INT32 put_buffer = (INT32) code; - register int put_bits = entropy->put_bits; - - /* if size is 0, caller used an invalid Huffman table entry */ - if (size == 0) - ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); - - if (entropy->gather_statistics) - return; /* do nothing if we're only getting stats */ - - put_buffer &= (((INT32) 1)<put_buffer; /* and merge with old buffer contents */ - - while (put_bits >= 8) { - int c = (int) ((put_buffer >> 16) & 0xFF); - - emit_byte(entropy, c); - if (c == 0xFF) { /* need to stuff a zero byte? */ - emit_byte(entropy, 0); - } - put_buffer <<= 8; - put_bits -= 8; - } - - entropy->put_buffer = put_buffer; /* update variables */ - entropy->put_bits = put_bits; -} - - -LOCAL(void) -flush_bits (phuff_entropy_ptr entropy) -{ - emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */ - entropy->put_buffer = 0; /* and reset bit-buffer to empty */ - entropy->put_bits = 0; -} - - -/* - * Emit (or just count) a Huffman symbol. - */ - -INLINE -LOCAL(void) -emit_symbol (phuff_entropy_ptr entropy, int tbl_no, int symbol) -{ - if (entropy->gather_statistics) - entropy->count_ptrs[tbl_no][symbol]++; - else { - c_derived_tbl * tbl = entropy->derived_tbls[tbl_no]; - emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); - } -} - - -/* - * Emit bits from a correction bit buffer. - */ - -LOCAL(void) -emit_buffered_bits (phuff_entropy_ptr entropy, char * bufstart, - unsigned int nbits) -{ - if (entropy->gather_statistics) - return; /* no real work */ - - while (nbits > 0) { - emit_bits(entropy, (unsigned int) (*bufstart), 1); - bufstart++; - nbits--; - } -} - - -/* - * Emit any pending EOBRUN symbol. - */ - -LOCAL(void) -emit_eobrun (phuff_entropy_ptr entropy) -{ - register int temp, nbits; - - if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ - temp = entropy->EOBRUN; - nbits = 0; - while ((temp >>= 1)) - nbits++; - /* safety check: shouldn't happen given limited correction-bit buffer */ - if (nbits > 14) - ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); - - emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4); - if (nbits) - emit_bits(entropy, entropy->EOBRUN, nbits); - - entropy->EOBRUN = 0; - - /* Emit any buffered correction bits */ - emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE); - entropy->BE = 0; - } -} - - -/* - * Emit a restart marker & resynchronize predictions. - */ - -LOCAL(void) -emit_restart (phuff_entropy_ptr entropy, int restart_num) -{ - int ci; - - emit_eobrun(entropy); - - if (! entropy->gather_statistics) { - flush_bits(entropy); - emit_byte(entropy, 0xFF); - emit_byte(entropy, JPEG_RST0 + restart_num); - } - - if (entropy->cinfo->Ss == 0) { - /* Re-initialize DC predictions to 0 */ - for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++) - entropy->last_dc_val[ci] = 0; - } else { - /* Re-initialize all AC-related fields to 0 */ - entropy->EOBRUN = 0; - entropy->BE = 0; - } -} - - -/* - * MCU encoding for DC initial scan (either spectral selection, - * or first pass of successive approximation). - */ - -METHODDEF(boolean) -encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - register int temp, temp2; - register int nbits; - int blkn, ci; - int Al = cinfo->Al; - JBLOCKROW block; - jpeg_component_info * compptr; - ISHIFT_TEMPS - - entropy->next_output_byte = cinfo->dest->next_output_byte; - entropy->free_in_buffer = cinfo->dest->free_in_buffer; - - /* Emit restart marker if needed */ - if (cinfo->restart_interval) - if (entropy->restarts_to_go == 0) - emit_restart(entropy, entropy->next_restart_num); - - /* Encode the MCU data blocks */ - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - - /* Compute the DC value after the required point transform by Al. - * This is simply an arithmetic right shift. - */ - temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al); - - /* DC differences are figured on the point-transformed values. */ - temp = temp2 - entropy->last_dc_val[ci]; - entropy->last_dc_val[ci] = temp2; - - /* Encode the DC coefficient difference per section G.1.2.1 */ - temp2 = temp; - if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - /* For a negative input, want temp2 = bitwise complement of abs(input) */ - /* This code assumes we are on a two's complement machine */ - temp2--; - } - - /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 0; - while (temp) { - nbits++; - temp >>= 1; - } - /* Check for out-of-range coefficient values. - * Since we're encoding a difference, the range limit is twice as much. - */ - if (nbits > MAX_COEF_BITS+1) - ERREXIT(cinfo, JERR_BAD_DCT_COEF); - - /* Count/emit the Huffman-coded symbol for the number of bits */ - emit_symbol(entropy, compptr->dc_tbl_no, nbits); - - /* Emit that number of bits of the value, if positive, */ - /* or the complement of its magnitude, if negative. */ - if (nbits) /* emit_bits rejects calls with size 0 */ - emit_bits(entropy, (unsigned int) temp2, nbits); - } - - cinfo->dest->next_output_byte = entropy->next_output_byte; - cinfo->dest->free_in_buffer = entropy->free_in_buffer; - - /* Update restart-interval state too */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) { - entropy->restarts_to_go = cinfo->restart_interval; - entropy->next_restart_num++; - entropy->next_restart_num &= 7; - } - entropy->restarts_to_go--; - } - - return TRUE; -} - - -/* - * MCU encoding for AC initial scan (either spectral selection, - * or first pass of successive approximation). - */ - -METHODDEF(boolean) -encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - register int temp, temp2; - register int nbits; - register int r, k; - int Se = cinfo->Se; - int Al = cinfo->Al; - JBLOCKROW block; - - entropy->next_output_byte = cinfo->dest->next_output_byte; - entropy->free_in_buffer = cinfo->dest->free_in_buffer; - - /* Emit restart marker if needed */ - if (cinfo->restart_interval) - if (entropy->restarts_to_go == 0) - emit_restart(entropy, entropy->next_restart_num); - - /* Encode the MCU data block */ - block = MCU_data[0]; - - /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ - - r = 0; /* r = run length of zeros */ - - for (k = cinfo->Ss; k <= Se; k++) { - if ((temp = (*block)[jpeg_natural_order[k]]) == 0) { - r++; - continue; - } - /* We must apply the point transform by Al. For AC coefficients this - * is an integer division with rounding towards 0. To do this portably - * in C, we shift after obtaining the absolute value; so the code is - * interwoven with finding the abs value (temp) and output bits (temp2). - */ - if (temp < 0) { - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ - /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ - temp2 = ~temp; - } else { - temp >>= Al; /* apply the point transform */ - temp2 = temp; - } - /* Watch out for case that nonzero coef is zero after point transform */ - if (temp == 0) { - r++; - continue; - } - - /* Emit any pending EOBRUN */ - if (entropy->EOBRUN > 0) - emit_eobrun(entropy); - /* if run length > 15, must emit special run-length-16 codes (0xF0) */ - while (r > 15) { - emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); - r -= 16; - } - - /* Find the number of bits needed for the magnitude of the coefficient */ - nbits = 1; /* there must be at least one 1 bit */ - while ((temp >>= 1)) - nbits++; - /* Check for out-of-range coefficient values */ - if (nbits > MAX_COEF_BITS) - ERREXIT(cinfo, JERR_BAD_DCT_COEF); - - /* Count/emit Huffman symbol for run length / number of bits */ - emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); - - /* Emit that number of bits of the value, if positive, */ - /* or the complement of its magnitude, if negative. */ - emit_bits(entropy, (unsigned int) temp2, nbits); - - r = 0; /* reset zero run length */ - } - - if (r > 0) { /* If there are trailing zeroes, */ - entropy->EOBRUN++; /* count an EOB */ - if (entropy->EOBRUN == 0x7FFF) - emit_eobrun(entropy); /* force it out to avoid overflow */ - } - - cinfo->dest->next_output_byte = entropy->next_output_byte; - cinfo->dest->free_in_buffer = entropy->free_in_buffer; - - /* Update restart-interval state too */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) { - entropy->restarts_to_go = cinfo->restart_interval; - entropy->next_restart_num++; - entropy->next_restart_num &= 7; - } - entropy->restarts_to_go--; - } - - return TRUE; -} - - -/* - * MCU encoding for DC successive approximation refinement scan. - * Note: we assume such scans can be multi-component, although the spec - * is not very clear on the point. - */ - -METHODDEF(boolean) -encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - register int temp; - int blkn; - int Al = cinfo->Al; - JBLOCKROW block; - - entropy->next_output_byte = cinfo->dest->next_output_byte; - entropy->free_in_buffer = cinfo->dest->free_in_buffer; - - /* Emit restart marker if needed */ - if (cinfo->restart_interval) - if (entropy->restarts_to_go == 0) - emit_restart(entropy, entropy->next_restart_num); - - /* Encode the MCU data blocks */ - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; - - /* We simply emit the Al'th bit of the DC coefficient value. */ - temp = (*block)[0]; - emit_bits(entropy, (unsigned int) (temp >> Al), 1); - } - - cinfo->dest->next_output_byte = entropy->next_output_byte; - cinfo->dest->free_in_buffer = entropy->free_in_buffer; - - /* Update restart-interval state too */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) { - entropy->restarts_to_go = cinfo->restart_interval; - entropy->next_restart_num++; - entropy->next_restart_num &= 7; - } - entropy->restarts_to_go--; - } - - return TRUE; -} - - -/* - * MCU encoding for AC successive approximation refinement scan. - */ - -METHODDEF(boolean) -encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - register int temp; - register int r, k; - int EOB; - char *BR_buffer; - unsigned int BR; - int Se = cinfo->Se; - int Al = cinfo->Al; - JBLOCKROW block; - int absvalues[DCTSIZE2]; - - entropy->next_output_byte = cinfo->dest->next_output_byte; - entropy->free_in_buffer = cinfo->dest->free_in_buffer; - - /* Emit restart marker if needed */ - if (cinfo->restart_interval) - if (entropy->restarts_to_go == 0) - emit_restart(entropy, entropy->next_restart_num); - - /* Encode the MCU data block */ - block = MCU_data[0]; - - /* It is convenient to make a pre-pass to determine the transformed - * coefficients' absolute values and the EOB position. - */ - EOB = 0; - for (k = cinfo->Ss; k <= Se; k++) { - temp = (*block)[jpeg_natural_order[k]]; - /* We must apply the point transform by Al. For AC coefficients this - * is an integer division with rounding towards 0. To do this portably - * in C, we shift after obtaining the absolute value. - */ - if (temp < 0) - temp = -temp; /* temp is abs value of input */ - temp >>= Al; /* apply the point transform */ - absvalues[k] = temp; /* save abs value for main pass */ - if (temp == 1) - EOB = k; /* EOB = index of last newly-nonzero coef */ - } - - /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ - - r = 0; /* r = run length of zeros */ - BR = 0; /* BR = count of buffered bits added now */ - BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ - - for (k = cinfo->Ss; k <= Se; k++) { - if ((temp = absvalues[k]) == 0) { - r++; - continue; - } - - /* Emit any required ZRLs, but not if they can be folded into EOB */ - while (r > 15 && k <= EOB) { - /* emit any pending EOBRUN and the BE correction bits */ - emit_eobrun(entropy); - /* Emit ZRL */ - emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); - r -= 16; - /* Emit buffered correction bits that must be associated with ZRL */ - emit_buffered_bits(entropy, BR_buffer, BR); - BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ - BR = 0; - } - - /* If the coef was previously nonzero, it only needs a correction bit. - * NOTE: a straight translation of the spec's figure G.7 would suggest - * that we also need to test r > 15. But if r > 15, we can only get here - * if k > EOB, which implies that this coefficient is not 1. - */ - if (temp > 1) { - /* The correction bit is the next bit of the absolute value. */ - BR_buffer[BR++] = (char) (temp & 1); - continue; - } - - /* Emit any pending EOBRUN and the BE correction bits */ - emit_eobrun(entropy); - - /* Count/emit Huffman symbol for run length / number of bits */ - emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); - - /* Emit output bit for newly-nonzero coef */ - temp = ((*block)[jpeg_natural_order[k]] < 0) ? 0 : 1; - emit_bits(entropy, (unsigned int) temp, 1); - - /* Emit buffered correction bits that must be associated with this code */ - emit_buffered_bits(entropy, BR_buffer, BR); - BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ - BR = 0; - r = 0; /* reset zero run length */ - } - - if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ - entropy->EOBRUN++; /* count an EOB */ - entropy->BE += BR; /* concat my correction bits to older ones */ - /* We force out the EOB if we risk either: - * 1. overflow of the EOB counter; - * 2. overflow of the correction bit buffer during the next MCU. - */ - if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1)) - emit_eobrun(entropy); - } - - cinfo->dest->next_output_byte = entropy->next_output_byte; - cinfo->dest->free_in_buffer = entropy->free_in_buffer; - - /* Update restart-interval state too */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) { - entropy->restarts_to_go = cinfo->restart_interval; - entropy->next_restart_num++; - entropy->next_restart_num &= 7; - } - entropy->restarts_to_go--; - } - - return TRUE; -} - - -/* - * Finish up at the end of a Huffman-compressed progressive scan. - */ - -METHODDEF(void) -finish_pass_phuff (j_compress_ptr cinfo) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - - entropy->next_output_byte = cinfo->dest->next_output_byte; - entropy->free_in_buffer = cinfo->dest->free_in_buffer; - - /* Flush out any buffered data */ - emit_eobrun(entropy); - flush_bits(entropy); - - cinfo->dest->next_output_byte = entropy->next_output_byte; - cinfo->dest->free_in_buffer = entropy->free_in_buffer; -} - - -/* - * Finish up a statistics-gathering pass and create the new Huffman tables. - */ - -METHODDEF(void) -finish_pass_gather_phuff (j_compress_ptr cinfo) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - boolean is_DC_band; - int ci, tbl; - jpeg_component_info * compptr; - JHUFF_TBL **htblptr; - boolean did[NUM_HUFF_TBLS]; - - /* Flush out buffered data (all we care about is counting the EOB symbol) */ - emit_eobrun(entropy); - - is_DC_band = (cinfo->Ss == 0); - - /* It's important not to apply jpeg_gen_optimal_table more than once - * per table, because it clobbers the input frequency counts! - */ - MEMZERO(did, SIZEOF(did)); - - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - if (is_DC_band) { - if (cinfo->Ah != 0) /* DC refinement needs no table */ - continue; - tbl = compptr->dc_tbl_no; - } else { - tbl = compptr->ac_tbl_no; - } - if (! did[tbl]) { - if (is_DC_band) - htblptr = & cinfo->dc_huff_tbl_ptrs[tbl]; - else - htblptr = & cinfo->ac_huff_tbl_ptrs[tbl]; - if (*htblptr == NULL) - *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); - jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]); - did[tbl] = TRUE; - } - } -} - - -/* - * Module initialization routine for progressive Huffman entropy encoding. - */ - -GLOBAL(void) -jinit_phuff_encoder (j_compress_ptr cinfo) -{ - phuff_entropy_ptr entropy; - int i; - - entropy = (phuff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(phuff_entropy_encoder)); - cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; - entropy->pub.start_pass = start_pass_phuff; - - /* Mark tables unallocated */ - for (i = 0; i < NUM_HUFF_TBLS; i++) { - entropy->derived_tbls[i] = NULL; - entropy->count_ptrs[i] = NULL; - } - entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ -} - -#endif /* C_PROGRESSIVE_SUPPORTED */ diff --git a/code/jpeg-6b/jdct.h b/code/jpeg-6b/jdct.h deleted file mode 100644 index 04192a26..00000000 --- a/code/jpeg-6b/jdct.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * jdct.h - * - * Copyright (C) 1994-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This include file contains common declarations for the forward and - * inverse DCT modules. These declarations are private to the DCT managers - * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms. - * The individual DCT algorithms are kept in separate files to ease - * machine-dependent tuning (e.g., assembly coding). - */ - - -/* - * A forward DCT routine is given a pointer to a work area of type DCTELEM[]; - * the DCT is to be performed in-place in that buffer. Type DCTELEM is int - * for 8-bit samples, INT32 for 12-bit samples. (NOTE: Floating-point DCT - * implementations use an array of type FAST_FLOAT, instead.) - * The DCT inputs are expected to be signed (range +-CENTERJSAMPLE). - * The DCT outputs are returned scaled up by a factor of 8; they therefore - * have a range of +-8K for 8-bit data, +-128K for 12-bit data. This - * convention improves accuracy in integer implementations and saves some - * work in floating-point ones. - * Quantization of the output coefficients is done by jcdctmgr.c. - */ - -#if BITS_IN_JSAMPLE == 8 -typedef int DCTELEM; /* 16 or 32 bits is fine */ -#else -typedef INT32 DCTELEM; /* must have 32 bits */ -#endif - -typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data)); -typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); - - -/* - * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer - * to an output sample array. The routine must dequantize the input data as - * well as perform the IDCT; for dequantization, it uses the multiplier table - * pointed to by compptr->dct_table. The output data is to be placed into the - * sample array starting at a specified column. (Any row offset needed will - * be applied to the array pointer before it is passed to the IDCT code.) - * Note that the number of samples emitted by the IDCT routine is - * DCT_scaled_size * DCT_scaled_size. - */ - -/* typedef inverse_DCT_method_ptr is declared in jpegint.h */ - -/* - * Each IDCT routine has its own ideas about the best dct_table element type. - */ - -typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */ -#if BITS_IN_JSAMPLE == 8 -typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */ -#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ -#else -typedef INT32 IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ -#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ -#endif -typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ - - -/* - * Each IDCT routine is responsible for range-limiting its results and - * converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could - * be quite far out of range if the input data is corrupt, so a bulletproof - * range-limiting step is required. We use a mask-and-table-lookup method - * to do the combined operations quickly. See the comments with - * prepare_range_limit_table (in jdmaster.c) for more info. - */ - -#define IDCT_range_limit(cinfo) ((cinfo)->sample_range_limit + CENTERJSAMPLE) - -#define RANGE_MASK (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */ - - -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_fdct_islow jFDislow -#define jpeg_fdct_ifast jFDifast -#define jpeg_fdct_float jFDfloat -#define jpeg_idct_islow jRDislow -#define jpeg_idct_ifast jRDifast -#define jpeg_idct_float jRDfloat -#define jpeg_idct_4x4 jRD4x4 -#define jpeg_idct_2x2 jRD2x2 -#define jpeg_idct_1x1 jRD1x1 -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - -/* Extern declarations for the forward and inverse DCT routines. */ - -EXTERN(void) jpeg_fdct_islow JPP((DCTELEM * data)); -EXTERN(void) jpeg_fdct_ifast JPP((DCTELEM * data)); -EXTERN(void) jpeg_fdct_float JPP((FAST_FLOAT * data)); - -EXTERN(void) jpeg_idct_islow - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); -EXTERN(void) jpeg_idct_ifast - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); -EXTERN(void) jpeg_idct_float - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); -EXTERN(void) jpeg_idct_4x4 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); -EXTERN(void) jpeg_idct_2x2 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); -EXTERN(void) jpeg_idct_1x1 - JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); - - -/* - * Macros for handling fixed-point arithmetic; these are used by many - * but not all of the DCT/IDCT modules. - * - * All values are expected to be of type INT32. - * Fractional constants are scaled left by CONST_BITS bits. - * CONST_BITS is defined within each module using these macros, - * and may differ from one module to the next. - */ - -#define ONE ((INT32) 1) -#define CONST_SCALE (ONE << CONST_BITS) - -/* Convert a positive real constant to an integer scaled by CONST_SCALE. - * Caution: some C compilers fail to reduce "FIX(constant)" at compile time, - * thus causing a lot of useless floating-point operations at run time. - */ - -#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) - -/* Descale and correctly round an INT32 value that's scaled by N bits. - * We assume RIGHT_SHIFT rounds towards minus infinity, so adding - * the fudge factor is correct for either sign of X. - */ - -#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) - -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. - * This macro is used only when the two inputs will actually be no more than - * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a - * full 32x32 multiply. This provides a useful speedup on many machines. - * Unfortunately there is no way to specify a 16x16->32 multiply portably - * in C, but some C compilers will do the right thing if you provide the - * correct combination of casts. - */ - -#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ -#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT16) (const))) -#endif -#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ -#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT32) (const))) -#endif - -#ifndef MULTIPLY16C16 /* default definition */ -#define MULTIPLY16C16(var,const) ((var) * (const)) -#endif - -/* Same except both inputs are variables. */ - -#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ -#define MULTIPLY16V16(var1,var2) (((INT16) (var1)) * ((INT16) (var2))) -#endif - -#ifndef MULTIPLY16V16 /* default definition */ -#define MULTIPLY16V16(var1,var2) ((var1) * (var2)) -#endif diff --git a/code/jpeg-6b/jdhuff.c b/code/jpeg-6b/jdhuff.c deleted file mode 100644 index b5ba39f7..00000000 --- a/code/jpeg-6b/jdhuff.c +++ /dev/null @@ -1,651 +0,0 @@ -/* - * jdhuff.c - * - * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains Huffman entropy decoding routines. - * - * Much of the complexity here has to do with supporting input suspension. - * If the data source module demands suspension, we want to be able to back - * up to the start of the current MCU. To do this, we copy state variables - * into local working storage, and update them back to the permanent - * storage only upon successful completion of an MCU. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jdhuff.h" /* Declarations shared with jdphuff.c */ - - -/* - * Expanded entropy decoder object for Huffman decoding. - * - * The savable_state subrecord contains fields that change within an MCU, - * but must not be updated permanently until we complete the MCU. - */ - -typedef struct { - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ -} savable_state; - -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. - */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest,src) ((dest) = (src)) -#else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest,src) \ - ((dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) -#endif -#endif - - -typedef struct { - struct jpeg_entropy_decoder pub; /* public fields */ - - /* These fields are loaded into local variables at start of each MCU. - * In case of suspension, we exit WITHOUT updating them. - */ - bitread_perm_state bitstate; /* Bit buffer at start of MCU */ - savable_state saved; /* Other state at start of MCU */ - - /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - - /* Pointers to derived tables (these workspaces have image lifespan) */ - d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; - d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; - - /* Precalculated info set up by start_pass for use in decode_mcu: */ - - /* Pointers to derived tables to be used for each block within an MCU */ - d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU]; - d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU]; - /* Whether we care about the DC and AC coefficient values for each block */ - boolean dc_needed[D_MAX_BLOCKS_IN_MCU]; - boolean ac_needed[D_MAX_BLOCKS_IN_MCU]; -} huff_entropy_decoder; - -typedef huff_entropy_decoder * huff_entropy_ptr; - - -/* - * Initialize for a Huffman-compressed scan. - */ - -METHODDEF(void) -start_pass_huff_decoder (j_decompress_ptr cinfo) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - int ci, blkn, dctbl, actbl; - jpeg_component_info * compptr; - - /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG. - * This ought to be an error condition, but we make it a warning because - * there are some baseline files out there with all zeroes in these bytes. - */ - if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2-1 || - cinfo->Ah != 0 || cinfo->Al != 0) - WARNMS(cinfo, JWRN_NOT_SEQUENTIAL); - - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - dctbl = compptr->dc_tbl_no; - actbl = compptr->ac_tbl_no; - /* Compute derived values for Huffman tables */ - /* We may do this more than once for a table, but it's not expensive */ - jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, - & entropy->dc_derived_tbls[dctbl]); - jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, - & entropy->ac_derived_tbls[actbl]); - /* Initialize DC predictions to 0 */ - entropy->saved.last_dc_val[ci] = 0; - } - - /* Precalculate decoding info for each block in an MCU of this scan */ - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - /* Precalculate which table to use for each block */ - entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no]; - entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no]; - /* Decide whether we really care about the coefficient values */ - if (compptr->component_needed) { - entropy->dc_needed[blkn] = TRUE; - /* we don't need the ACs if producing a 1/8th-size image */ - entropy->ac_needed[blkn] = (compptr->DCT_scaled_size > 1); - } else { - entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE; - } - } - - /* Initialize bitread state variables */ - entropy->bitstate.bits_left = 0; - entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */ - entropy->pub.insufficient_data = FALSE; - - /* Initialize restart counter */ - entropy->restarts_to_go = cinfo->restart_interval; -} - - -/* - * Compute the derived values for a Huffman table. - * This routine also performs some validation checks on the table. - * - * Note this is also used by jdphuff.c. - */ - -GLOBAL(void) -jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, - d_derived_tbl ** pdtbl) -{ - JHUFF_TBL *htbl; - d_derived_tbl *dtbl; - int p, i, l, si, numsymbols; - int lookbits, ctr; - char huffsize[257]; - unsigned int huffcode[257]; - unsigned int code; - - /* Note that huffsize[] and huffcode[] are filled in code-length order, - * paralleling the order of the symbols themselves in htbl->huffval[]. - */ - - /* Find the input Huffman table */ - if (tblno < 0 || tblno >= NUM_HUFF_TBLS) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); - htbl = - isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; - if (htbl == NULL) - ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); - - /* Allocate a workspace if we haven't already done so. */ - if (*pdtbl == NULL) - *pdtbl = (d_derived_tbl *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(d_derived_tbl)); - dtbl = *pdtbl; - dtbl->pub = htbl; /* fill in back link */ - - /* Figure C.1: make table of Huffman code length for each symbol */ - - p = 0; - for (l = 1; l <= 16; l++) { - i = (int) htbl->bits[l]; - if (i < 0 || p + i > 256) /* protect against table overrun */ - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - while (i--) - huffsize[p++] = (char) l; - } - huffsize[p] = 0; - numsymbols = p; - - /* Figure C.2: generate the codes themselves */ - /* We also validate that the counts represent a legal Huffman code tree. */ - - code = 0; - si = huffsize[0]; - p = 0; - while (huffsize[p]) { - while (((int) huffsize[p]) == si) { - huffcode[p++] = code; - code++; - } - /* code is now 1 more than the last code used for codelength si; but - * it must still fit in si bits, since no code is allowed to be all ones. - */ - if (((INT32) code) >= (((INT32) 1) << si)) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - code <<= 1; - si++; - } - - /* Figure F.15: generate decoding tables for bit-sequential decoding */ - - p = 0; - for (l = 1; l <= 16; l++) { - if (htbl->bits[l]) { - /* valoffset[l] = huffval[] index of 1st symbol of code length l, - * minus the minimum code of length l - */ - dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p]; - p += htbl->bits[l]; - dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */ - } else { - dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ - } - } - dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */ - - /* Compute lookahead tables to speed up decoding. - * First we set all the table entries to 0, indicating "too long"; - * then we iterate through the Huffman codes that are short enough and - * fill in all the entries that correspond to bit sequences starting - * with that code. - */ - - MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits)); - - p = 0; - for (l = 1; l <= HUFF_LOOKAHEAD; l++) { - for (i = 1; i <= (int) htbl->bits[l]; i++, p++) { - /* l = current code's length, p = its index in huffcode[] & huffval[]. */ - /* Generate left-justified code followed by all possible bit sequences */ - lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l); - for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) { - dtbl->look_nbits[lookbits] = l; - dtbl->look_sym[lookbits] = htbl->huffval[p]; - lookbits++; - } - } - } - - /* Validate symbols as being reasonable. - * For AC tables, we make no check, but accept all byte values 0..255. - * For DC tables, we require the symbols to be in range 0..15. - * (Tighter bounds could be applied depending on the data depth and mode, - * but this is sufficient to ensure safe decoding.) - */ - if (isDC) { - for (i = 0; i < numsymbols; i++) { - int sym = htbl->huffval[i]; - if (sym < 0 || sym > 15) - ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); - } - } -} - - -/* - * Out-of-line code for bit fetching (shared with jdphuff.c). - * See jdhuff.h for info about usage. - * Note: current values of get_buffer and bits_left are passed as parameters, - * but are returned in the corresponding fields of the state struct. - * - * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width - * of get_buffer to be used. (On machines with wider words, an even larger - * buffer could be used.) However, on some machines 32-bit shifts are - * quite slow and take time proportional to the number of places shifted. - * (This is true with most PC compilers, for instance.) In this case it may - * be a win to set MIN_GET_BITS to the minimum value of 15. This reduces the - * average shift distance at the cost of more calls to jpeg_fill_bit_buffer. - */ - -#ifdef SLOW_SHIFT_32 -#define MIN_GET_BITS 15 /* minimum allowable value */ -#else -#define MIN_GET_BITS (BIT_BUF_SIZE-7) -#endif - - -GLOBAL(boolean) -jpeg_fill_bit_buffer (bitread_working_state * state, - register bit_buf_type get_buffer, register int bits_left, - int nbits) -/* Load up the bit buffer to a depth of at least nbits */ -{ - /* Copy heavily used state fields into locals (hopefully registers) */ - register const JOCTET * next_input_byte = state->next_input_byte; - register size_t bytes_in_buffer = state->bytes_in_buffer; - j_decompress_ptr cinfo = state->cinfo; - - /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */ - /* (It is assumed that no request will be for more than that many bits.) */ - /* We fail to do so only if we hit a marker or are forced to suspend. */ - - if (cinfo->unread_marker == 0) { /* cannot advance past a marker */ - while (bits_left < MIN_GET_BITS) { - register int c; - - /* Attempt to read a byte */ - if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; - } - bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); - - /* If it's 0xFF, check and discard stuffed zero byte */ - if (c == 0xFF) { - /* Loop here to discard any padding FF's on terminating marker, - * so that we can save a valid unread_marker value. NOTE: we will - * accept multiple FF's followed by a 0 as meaning a single FF data - * byte. This data pattern is not valid according to the standard. - */ - do { - if (bytes_in_buffer == 0) { - if (! (*cinfo->src->fill_input_buffer) (cinfo)) - return FALSE; - next_input_byte = cinfo->src->next_input_byte; - bytes_in_buffer = cinfo->src->bytes_in_buffer; - } - bytes_in_buffer--; - c = GETJOCTET(*next_input_byte++); - } while (c == 0xFF); - - if (c == 0) { - /* Found FF/00, which represents an FF data byte */ - c = 0xFF; - } else { - /* Oops, it's actually a marker indicating end of compressed data. - * Save the marker code for later use. - * Fine point: it might appear that we should save the marker into - * bitread working state, not straight into permanent state. But - * once we have hit a marker, we cannot need to suspend within the - * current MCU, because we will read no more bytes from the data - * source. So it is OK to update permanent state right away. - */ - cinfo->unread_marker = c; - /* See if we need to insert some fake zero bits. */ - goto no_more_bytes; - } - } - - /* OK, load c into get_buffer */ - get_buffer = (get_buffer << 8) | c; - bits_left += 8; - } /* end while */ - } else { - no_more_bytes: - /* We get here if we've read the marker that terminates the compressed - * data segment. There should be enough bits in the buffer register - * to satisfy the request; if so, no problem. - */ - if (nbits > bits_left) { - /* Uh-oh. Report corrupted data to user and stuff zeroes into - * the data stream, so that we can produce some kind of image. - * We use a nonvolatile flag to ensure that only one warning message - * appears per data segment. - */ - if (! cinfo->entropy->insufficient_data) { - WARNMS(cinfo, JWRN_HIT_MARKER); - cinfo->entropy->insufficient_data = TRUE; - } - /* Fill the buffer with zero bits */ - get_buffer <<= MIN_GET_BITS - bits_left; - bits_left = MIN_GET_BITS; - } - } - - /* Unload the local registers */ - state->next_input_byte = next_input_byte; - state->bytes_in_buffer = bytes_in_buffer; - state->get_buffer = get_buffer; - state->bits_left = bits_left; - - return TRUE; -} - - -/* - * Out-of-line code for Huffman code decoding. - * See jdhuff.h for info about usage. - */ - -GLOBAL(int) -jpeg_huff_decode (bitread_working_state * state, - register bit_buf_type get_buffer, register int bits_left, - d_derived_tbl * htbl, int min_bits) -{ - register int l = min_bits; - register INT32 code; - - /* HUFF_DECODE has determined that the code is at least min_bits */ - /* bits long, so fetch that many bits in one swoop. */ - - CHECK_BIT_BUFFER(*state, l, return -1); - code = GET_BITS(l); - - /* Collect the rest of the Huffman code one bit at a time. */ - /* This is per Figure F.16 in the JPEG spec. */ - - while (code > htbl->maxcode[l]) { - code <<= 1; - CHECK_BIT_BUFFER(*state, 1, return -1); - code |= GET_BITS(1); - l++; - } - - /* Unload the local registers */ - state->get_buffer = get_buffer; - state->bits_left = bits_left; - - /* With garbage input we may reach the sentinel value l = 17. */ - - if (l > 16) { - WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE); - return 0; /* fake a zero as the safest result */ - } - - return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ]; -} - - -/* - * Figure F.12: extend sign bit. - * On some machines, a shift and add will be faster than a table lookup. - */ - -#ifdef AVOID_TABLES - -#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) - -#else - -#define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) - -static const int extend_test[16] = /* entry n is 2**(n-1) */ - { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; - -static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ - { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, - ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, - ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, - ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; - -#endif /* AVOID_TABLES */ - - -/* - * Check for a restart marker & resynchronize decoder. - * Returns FALSE if must suspend. - */ - -LOCAL(boolean) -process_restart (j_decompress_ptr cinfo) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - int ci; - - /* Throw away any unused bits remaining in bit buffer; */ - /* include any full bytes in next_marker's count of discarded bytes */ - cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8; - entropy->bitstate.bits_left = 0; - - /* Advance past the RSTn marker */ - if (! (*cinfo->marker->read_restart_marker) (cinfo)) - return FALSE; - - /* Re-initialize DC predictions to 0 */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) - entropy->saved.last_dc_val[ci] = 0; - - /* Reset restart counter */ - entropy->restarts_to_go = cinfo->restart_interval; - - /* Reset out-of-data flag, unless read_restart_marker left us smack up - * against a marker. In that case we will end up treating the next data - * segment as empty, and we can avoid producing bogus output pixels by - * leaving the flag set. - */ - if (cinfo->unread_marker == 0) - entropy->pub.insufficient_data = FALSE; - - return TRUE; -} - - -/* - * Decode and return one MCU's worth of Huffman-compressed coefficients. - * The coefficients are reordered from zigzag order into natural array order, - * but are not dequantized. - * - * The i'th block of the MCU is stored into the block pointed to by - * MCU_data[i]. WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER. - * (Wholesale zeroing is usually a little faster than retail...) - * - * Returns FALSE if data source requested suspension. In that case no - * changes have been made to permanent state. (Exception: some output - * coefficients may already have been assigned. This is harmless for - * this module, since we'll just re-assign them on the next call.) - */ - -METHODDEF(boolean) -decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ - huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; - int blkn; - BITREAD_STATE_VARS; - savable_state state; - - /* Process restart marker if needed; may have to suspend */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) - return FALSE; - } - - /* If we've run out of data, just leave the MCU set to zeroes. - * This way, we return uniform gray for the remainder of the segment. - */ - if (! entropy->pub.insufficient_data) { - - /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); - ASSIGN_STATE(state, entropy->saved); - - /* Outer loop handles each block in the MCU */ - - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - JBLOCKROW block = MCU_data[blkn]; - d_derived_tbl * dctbl = entropy->dc_cur_tbls[blkn]; - d_derived_tbl * actbl = entropy->ac_cur_tbls[blkn]; - register int s, k, r; - - /* Decode a single block's worth of coefficients */ - - /* Section F.2.2.1: decode the DC coefficient difference */ - HUFF_DECODE(s, br_state, dctbl, return FALSE, label1); - if (s) { - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - } - - if (entropy->dc_needed[blkn]) { - /* Convert DC difference to actual value, update last_dc_val */ - int ci = cinfo->MCU_membership[blkn]; - s += state.last_dc_val[ci]; - state.last_dc_val[ci] = s; - /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */ - (*block)[0] = (JCOEF) s; - } - - if (entropy->ac_needed[blkn]) { - - /* Section F.2.2.2: decode the AC coefficients */ - /* Since zeroes are skipped, output area must be cleared beforehand */ - for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE(s, br_state, actbl, return FALSE, label2); - - r = s >> 4; - s &= 15; - - if (s) { - k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - /* Output coefficient in natural (dezigzagged) order. - * Note: the extra entries in jpeg_natural_order[] will save us - * if k >= DCTSIZE2, which could happen if the data is corrupted. - */ - (*block)[jpeg_natural_order[k]] = (JCOEF) s; - } else { - if (r != 15) - break; - k += 15; - } - } - - } else { - - /* Section F.2.2.2: decode the AC coefficients */ - /* In this path we just discard the values */ - for (k = 1; k < DCTSIZE2; k++) { - HUFF_DECODE(s, br_state, actbl, return FALSE, label3); - - r = s >> 4; - s &= 15; - - if (s) { - k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - DROP_BITS(s); - } else { - if (r != 15) - break; - k += 15; - } - } - - } - } - - /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); - } - - /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; - - return TRUE; -} - - -/* - * Module initialization routine for Huffman entropy decoding. - */ - -GLOBAL(void) -jinit_huff_decoder (j_decompress_ptr cinfo) -{ - huff_entropy_ptr entropy; - int i; - - entropy = (huff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(huff_entropy_decoder)); - cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; - entropy->pub.start_pass = start_pass_huff_decoder; - entropy->pub.decode_mcu = decode_mcu; - - /* Mark tables unallocated */ - for (i = 0; i < NUM_HUFF_TBLS; i++) { - entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL; - } -} diff --git a/code/jpeg-6b/jdhuff.h b/code/jpeg-6b/jdhuff.h deleted file mode 100644 index ae19b6ca..00000000 --- a/code/jpeg-6b/jdhuff.h +++ /dev/null @@ -1,201 +0,0 @@ -/* - * jdhuff.h - * - * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains declarations for Huffman entropy decoding routines - * that are shared between the sequential decoder (jdhuff.c) and the - * progressive decoder (jdphuff.c). No other modules need to see these. - */ - -/* Short forms of external names for systems with brain-damaged linkers. */ - -#ifdef NEED_SHORT_EXTERNAL_NAMES -#define jpeg_make_d_derived_tbl jMkDDerived -#define jpeg_fill_bit_buffer jFilBitBuf -#define jpeg_huff_decode jHufDecode -#endif /* NEED_SHORT_EXTERNAL_NAMES */ - - -/* Derived data constructed for each Huffman table */ - -#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ - -typedef struct { - /* Basic tables: (element [0] of each array is unused) */ - INT32 maxcode[18]; /* largest code of length k (-1 if none) */ - /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */ - INT32 valoffset[17]; /* huffval[] offset for codes of length k */ - /* valoffset[k] = huffval[] index of 1st symbol of code length k, less - * the smallest code of length k; so given a code of length k, the - * corresponding symbol is huffval[code + valoffset[k]] - */ - - /* Link to public Huffman table (needed only in jpeg_huff_decode) */ - JHUFF_TBL *pub; - - /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of - * the input data stream. If the next Huffman code is no more - * than HUFF_LOOKAHEAD bits long, we can obtain its length and - * the corresponding symbol directly from these tables. - */ - int look_nbits[1< 32 bits on your machine, and shifting/masking longs is - * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE - * appropriately should be a win. Unfortunately we can't define the size - * with something like #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8) - * because not all machines measure sizeof in 8-bit bytes. - */ - -typedef struct { /* Bitreading state saved across MCUs */ - bit_buf_type get_buffer; /* current bit-extraction buffer */ - int bits_left; /* # of unused bits in it */ -} bitread_perm_state; - -typedef struct { /* Bitreading working state within an MCU */ - /* Current data source location */ - /* We need a copy, rather than munging the original, in case of suspension */ - const JOCTET * next_input_byte; /* => next byte to read from source */ - size_t bytes_in_buffer; /* # of bytes remaining in source buffer */ - /* Bit input buffer --- note these values are kept in register variables, - * not in this struct, inside the inner loops. - */ - bit_buf_type get_buffer; /* current bit-extraction buffer */ - int bits_left; /* # of unused bits in it */ - /* Pointer needed by jpeg_fill_bit_buffer. */ - j_decompress_ptr cinfo; /* back link to decompress master record */ -} bitread_working_state; - -/* Macros to declare and load/save bitread local variables. */ -#define BITREAD_STATE_VARS \ - register bit_buf_type get_buffer; \ - register int bits_left; \ - bitread_working_state br_state - -#define BITREAD_LOAD_STATE(cinfop,permstate) \ - br_state.cinfo = cinfop; \ - br_state.next_input_byte = cinfop->src->next_input_byte; \ - br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ - get_buffer = permstate.get_buffer; \ - bits_left = permstate.bits_left; - -#define BITREAD_SAVE_STATE(cinfop,permstate) \ - cinfop->src->next_input_byte = br_state.next_input_byte; \ - cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ - permstate.get_buffer = get_buffer; \ - permstate.bits_left = bits_left - -/* - * These macros provide the in-line portion of bit fetching. - * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer - * before using GET_BITS, PEEK_BITS, or DROP_BITS. - * The variables get_buffer and bits_left are assumed to be locals, - * but the state struct might not be (jpeg_huff_decode needs this). - * CHECK_BIT_BUFFER(state,n,action); - * Ensure there are N bits in get_buffer; if suspend, take action. - * val = GET_BITS(n); - * Fetch next N bits. - * val = PEEK_BITS(n); - * Fetch next N bits without removing them from the buffer. - * DROP_BITS(n); - * Discard next N bits. - * The value N should be a simple variable, not an expression, because it - * is evaluated multiple times. - */ - -#define CHECK_BIT_BUFFER(state,nbits,action) \ - { if (bits_left < (nbits)) { \ - if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits)) \ - { action; } \ - get_buffer = (state).get_buffer; bits_left = (state).bits_left; } } - -#define GET_BITS(nbits) \ - (((int) (get_buffer >> (bits_left -= (nbits)))) & ((1<<(nbits))-1)) - -#define PEEK_BITS(nbits) \ - (((int) (get_buffer >> (bits_left - (nbits)))) & ((1<<(nbits))-1)) - -#define DROP_BITS(nbits) \ - (bits_left -= (nbits)) - -/* Load up the bit buffer to a depth of at least nbits */ -EXTERN(boolean) jpeg_fill_bit_buffer - JPP((bitread_working_state * state, register bit_buf_type get_buffer, - register int bits_left, int nbits)); - - -/* - * Code for extracting next Huffman-coded symbol from input bit stream. - * Again, this is time-critical and we make the main paths be macros. - * - * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits - * without looping. Usually, more than 95% of the Huffman codes will be 8 - * or fewer bits long. The few overlength codes are handled with a loop, - * which need not be inline code. - * - * Notes about the HUFF_DECODE macro: - * 1. Near the end of the data segment, we may fail to get enough bits - * for a lookahead. In that case, we do it the hard way. - * 2. If the lookahead table contains no entry, the next code must be - * more than HUFF_LOOKAHEAD bits long. - * 3. jpeg_huff_decode returns -1 if forced to suspend. - */ - -#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \ -{ register int nb, look; \ - if (bits_left < HUFF_LOOKAHEAD) { \ - if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \ - get_buffer = state.get_buffer; bits_left = state.bits_left; \ - if (bits_left < HUFF_LOOKAHEAD) { \ - nb = 1; goto slowlabel; \ - } \ - } \ - look = PEEK_BITS(HUFF_LOOKAHEAD); \ - if ((nb = htbl->look_nbits[look]) != 0) { \ - DROP_BITS(nb); \ - result = htbl->look_sym[look]; \ - } else { \ - nb = HUFF_LOOKAHEAD+1; \ -slowlabel: \ - if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \ - { failaction; } \ - get_buffer = state.get_buffer; bits_left = state.bits_left; \ - } \ -} - -/* Out-of-line case for Huffman code fetching */ -EXTERN(int) jpeg_huff_decode - JPP((bitread_working_state * state, register bit_buf_type get_buffer, - register int bits_left, d_derived_tbl * htbl, int min_bits)); diff --git a/code/jpeg-6b/jdinput.c b/code/jpeg-6b/jdinput.c deleted file mode 100644 index 0c2ac8f1..00000000 --- a/code/jpeg-6b/jdinput.c +++ /dev/null @@ -1,381 +0,0 @@ -/* - * jdinput.c - * - * Copyright (C) 1991-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains input control logic for the JPEG decompressor. - * These routines are concerned with controlling the decompressor's input - * processing (marker reading and coefficient decoding). The actual input - * reading is done in jdmarker.c, jdhuff.c, and jdphuff.c. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" - - -/* Private state */ - -typedef struct { - struct jpeg_input_controller pub; /* public fields */ - - boolean inheaders; /* TRUE until first SOS is reached */ -} my_input_controller; - -typedef my_input_controller * my_inputctl_ptr; - - -/* Forward declarations */ -METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo)); - - -/* - * Routines to calculate various quantities related to the size of the image. - */ - -LOCAL(void) -initial_setup (j_decompress_ptr cinfo) -/* Called once, when first SOS marker is reached */ -{ - int ci; - jpeg_component_info *compptr; - - /* Make sure image isn't bigger than I can handle */ - if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION || - (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION) - ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION); - - /* For now, precision must match compiled-in value... */ - if (cinfo->data_precision != BITS_IN_JSAMPLE) - ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision); - - /* Check that number of components won't exceed internal array sizes */ - if (cinfo->num_components > MAX_COMPONENTS) - ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, - MAX_COMPONENTS); - - /* Compute maximum sampling factors; check factor validity */ - cinfo->max_h_samp_factor = 1; - cinfo->max_v_samp_factor = 1; - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || - compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) - ERREXIT(cinfo, JERR_BAD_SAMPLING); - cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, - compptr->h_samp_factor); - cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor, - compptr->v_samp_factor); - } - - /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE. - * In the full decompressor, this will be overridden by jdmaster.c; - * but in the transcoder, jdmaster.c is not used, so we must do it here. - */ - cinfo->min_DCT_scaled_size = DCTSIZE; - - /* Compute dimensions of components */ - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - compptr->DCT_scaled_size = DCTSIZE; - /* Size in DCT blocks */ - compptr->width_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); - compptr->height_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); - /* downsampled_width and downsampled_height will also be overridden by - * jdmaster.c if we are doing full decompression. The transcoder library - * doesn't use these values, but the calling application might. - */ - /* Size in samples */ - compptr->downsampled_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); - compptr->downsampled_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); - /* Mark component needed, until color conversion says otherwise */ - compptr->component_needed = TRUE; - /* Mark no quantization table yet saved for component */ - compptr->quant_table = NULL; - } - - /* Compute number of fully interleaved MCU rows. */ - cinfo->total_iMCU_rows = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); - - /* Decide whether file contains multiple scans */ - if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode) - cinfo->inputctl->has_multiple_scans = TRUE; - else - cinfo->inputctl->has_multiple_scans = FALSE; -} - - -LOCAL(void) -per_scan_setup (j_decompress_ptr cinfo) -/* Do computations that are needed before processing a JPEG scan */ -/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */ -{ - int ci, mcublks, tmp; - jpeg_component_info *compptr; - - if (cinfo->comps_in_scan == 1) { - - /* Noninterleaved (single-component) scan */ - compptr = cinfo->cur_comp_info[0]; - - /* Overall image size in MCUs */ - cinfo->MCUs_per_row = compptr->width_in_blocks; - cinfo->MCU_rows_in_scan = compptr->height_in_blocks; - - /* For noninterleaved scan, always one block per MCU */ - compptr->MCU_width = 1; - compptr->MCU_height = 1; - compptr->MCU_blocks = 1; - compptr->MCU_sample_width = compptr->DCT_scaled_size; - compptr->last_col_width = 1; - /* For noninterleaved scans, it is convenient to define last_row_height - * as the number of block rows present in the last iMCU row. - */ - tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); - if (tmp == 0) tmp = compptr->v_samp_factor; - compptr->last_row_height = tmp; - - /* Prepare array describing MCU composition */ - cinfo->blocks_in_MCU = 1; - cinfo->MCU_membership[0] = 0; - - } else { - - /* Interleaved (multi-component) scan */ - if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN) - ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan, - MAX_COMPS_IN_SCAN); - - /* Overall image size in MCUs */ - cinfo->MCUs_per_row = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); - cinfo->MCU_rows_in_scan = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); - - cinfo->blocks_in_MCU = 0; - - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - /* Sampling factors give # of blocks of component in each MCU */ - compptr->MCU_width = compptr->h_samp_factor; - compptr->MCU_height = compptr->v_samp_factor; - compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height; - compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_scaled_size; - /* Figure number of non-dummy blocks in last MCU column & row */ - tmp = (int) (compptr->width_in_blocks % compptr->MCU_width); - if (tmp == 0) tmp = compptr->MCU_width; - compptr->last_col_width = tmp; - tmp = (int) (compptr->height_in_blocks % compptr->MCU_height); - if (tmp == 0) tmp = compptr->MCU_height; - compptr->last_row_height = tmp; - /* Prepare array describing MCU composition */ - mcublks = compptr->MCU_blocks; - if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU) - ERREXIT(cinfo, JERR_BAD_MCU_SIZE); - while (mcublks-- > 0) { - cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; - } - } - - } -} - - -/* - * Save away a copy of the Q-table referenced by each component present - * in the current scan, unless already saved during a prior scan. - * - * In a multiple-scan JPEG file, the encoder could assign different components - * the same Q-table slot number, but change table definitions between scans - * so that each component uses a different Q-table. (The IJG encoder is not - * currently capable of doing this, but other encoders might.) Since we want - * to be able to dequantize all the components at the end of the file, this - * means that we have to save away the table actually used for each component. - * We do this by copying the table at the start of the first scan containing - * the component. - * The JPEG spec prohibits the encoder from changing the contents of a Q-table - * slot between scans of a component using that slot. If the encoder does so - * anyway, this decoder will simply use the Q-table values that were current - * at the start of the first scan for the component. - * - * The decompressor output side looks only at the saved quant tables, - * not at the current Q-table slots. - */ - -LOCAL(void) -latch_quant_tables (j_decompress_ptr cinfo) -{ - int ci, qtblno; - jpeg_component_info *compptr; - JQUANT_TBL * qtbl; - - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - /* No work if we already saved Q-table for this component */ - if (compptr->quant_table != NULL) - continue; - /* Make sure specified quantization table is present */ - qtblno = compptr->quant_tbl_no; - if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || - cinfo->quant_tbl_ptrs[qtblno] == NULL) - ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); - /* OK, save away the quantization table */ - qtbl = (JQUANT_TBL *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(JQUANT_TBL)); - MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL)); - compptr->quant_table = qtbl; - } -} - - -/* - * Initialize the input modules to read a scan of compressed data. - * The first call to this is done by jdmaster.c after initializing - * the entire decompressor (during jpeg_start_decompress). - * Subsequent calls come from consume_markers, below. - */ - -METHODDEF(void) -start_input_pass (j_decompress_ptr cinfo) -{ - per_scan_setup(cinfo); - latch_quant_tables(cinfo); - (*cinfo->entropy->start_pass) (cinfo); - (*cinfo->coef->start_input_pass) (cinfo); - cinfo->inputctl->consume_input = cinfo->coef->consume_data; -} - - -/* - * Finish up after inputting a compressed-data scan. - * This is called by the coefficient controller after it's read all - * the expected data of the scan. - */ - -METHODDEF(void) -finish_input_pass (j_decompress_ptr cinfo) -{ - cinfo->inputctl->consume_input = consume_markers; -} - - -/* - * Read JPEG markers before, between, or after compressed-data scans. - * Change state as necessary when a new scan is reached. - * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI. - * - * The consume_input method pointer points either here or to the - * coefficient controller's consume_data routine, depending on whether - * we are reading a compressed data segment or inter-segment markers. - */ - -METHODDEF(int) -consume_markers (j_decompress_ptr cinfo) -{ - my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl; - int val; - - if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */ - return JPEG_REACHED_EOI; - - val = (*cinfo->marker->read_markers) (cinfo); - - switch (val) { - case JPEG_REACHED_SOS: /* Found SOS */ - if (inputctl->inheaders) { /* 1st SOS */ - initial_setup(cinfo); - inputctl->inheaders = FALSE; - /* Note: start_input_pass must be called by jdmaster.c - * before any more input can be consumed. jdapimin.c is - * responsible for enforcing this sequencing. - */ - } else { /* 2nd or later SOS marker */ - if (! inputctl->pub.has_multiple_scans) - ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */ - start_input_pass(cinfo); - } - break; - case JPEG_REACHED_EOI: /* Found EOI */ - inputctl->pub.eoi_reached = TRUE; - if (inputctl->inheaders) { /* Tables-only datastream, apparently */ - if (cinfo->marker->saw_SOF) - ERREXIT(cinfo, JERR_SOF_NO_SOS); - } else { - /* Prevent infinite loop in coef ctlr's decompress_data routine - * if user set output_scan_number larger than number of scans. - */ - if (cinfo->output_scan_number > cinfo->input_scan_number) - cinfo->output_scan_number = cinfo->input_scan_number; - } - break; - case JPEG_SUSPENDED: - break; - } - - return val; -} - - -/* - * Reset state to begin a fresh datastream. - */ - -METHODDEF(void) -reset_input_controller (j_decompress_ptr cinfo) -{ - my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl; - - inputctl->pub.consume_input = consume_markers; - inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */ - inputctl->pub.eoi_reached = FALSE; - inputctl->inheaders = TRUE; - /* Reset other modules */ - (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); - (*cinfo->marker->reset_marker_reader) (cinfo); - /* Reset progression state -- would be cleaner if entropy decoder did this */ - cinfo->coef_bits = NULL; -} - - -/* - * Initialize the input controller module. - * This is called only once, when the decompression object is created. - */ - -GLOBAL(void) -jinit_input_controller (j_decompress_ptr cinfo) -{ - my_inputctl_ptr inputctl; - - /* Create subobject in permanent pool */ - inputctl = (my_inputctl_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, - SIZEOF(my_input_controller)); - cinfo->inputctl = (struct jpeg_input_controller *) inputctl; - /* Initialize method pointers */ - inputctl->pub.consume_input = consume_markers; - inputctl->pub.reset_input_controller = reset_input_controller; - inputctl->pub.start_input_pass = start_input_pass; - inputctl->pub.finish_input_pass = finish_input_pass; - /* Initialize state: can't use reset_input_controller since we don't - * want to try to reset other modules yet. - */ - inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */ - inputctl->pub.eoi_reached = FALSE; - inputctl->inheaders = TRUE; -} diff --git a/code/jpeg-6b/jdphuff.c b/code/jpeg-6b/jdphuff.c deleted file mode 100644 index 22678099..00000000 --- a/code/jpeg-6b/jdphuff.c +++ /dev/null @@ -1,668 +0,0 @@ -/* - * jdphuff.c - * - * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains Huffman entropy decoding routines for progressive JPEG. - * - * Much of the complexity here has to do with supporting input suspension. - * If the data source module demands suspension, we want to be able to back - * up to the start of the current MCU. To do this, we copy state variables - * into local working storage, and update them back to the permanent - * storage only upon successful completion of an MCU. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jdhuff.h" /* Declarations shared with jdhuff.c */ - - -#ifdef D_PROGRESSIVE_SUPPORTED - -/* - * Expanded entropy decoder object for progressive Huffman decoding. - * - * The savable_state subrecord contains fields that change within an MCU, - * but must not be updated permanently until we complete the MCU. - */ - -typedef struct { - unsigned int EOBRUN; /* remaining EOBs in EOBRUN */ - int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ -} savable_state; - -/* This macro is to work around compilers with missing or broken - * structure assignment. You'll need to fix this code if you have - * such a compiler and you change MAX_COMPS_IN_SCAN. - */ - -#ifndef NO_STRUCT_ASSIGN -#define ASSIGN_STATE(dest,src) ((dest) = (src)) -#else -#if MAX_COMPS_IN_SCAN == 4 -#define ASSIGN_STATE(dest,src) \ - ((dest).EOBRUN = (src).EOBRUN, \ - (dest).last_dc_val[0] = (src).last_dc_val[0], \ - (dest).last_dc_val[1] = (src).last_dc_val[1], \ - (dest).last_dc_val[2] = (src).last_dc_val[2], \ - (dest).last_dc_val[3] = (src).last_dc_val[3]) -#endif -#endif - - -typedef struct { - struct jpeg_entropy_decoder pub; /* public fields */ - - /* These fields are loaded into local variables at start of each MCU. - * In case of suspension, we exit WITHOUT updating them. - */ - bitread_perm_state bitstate; /* Bit buffer at start of MCU */ - savable_state saved; /* Other state at start of MCU */ - - /* These fields are NOT loaded into local working state. */ - unsigned int restarts_to_go; /* MCUs left in this restart interval */ - - /* Pointers to derived tables (these workspaces have image lifespan) */ - d_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; - - d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */ -} phuff_entropy_decoder; - -typedef phuff_entropy_decoder * phuff_entropy_ptr; - -/* Forward declarations */ -METHODDEF(boolean) decode_mcu_DC_first JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_AC_first JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_DC_refine JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); -METHODDEF(boolean) decode_mcu_AC_refine JPP((j_decompress_ptr cinfo, - JBLOCKROW *MCU_data)); - - -/* - * Initialize for a Huffman-compressed scan. - */ - -METHODDEF(void) -start_pass_phuff_decoder (j_decompress_ptr cinfo) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - boolean is_DC_band, bad; - int ci, coefi, tbl; - int *coef_bit_ptr; - jpeg_component_info * compptr; - - is_DC_band = (cinfo->Ss == 0); - - /* Validate scan parameters */ - bad = FALSE; - if (is_DC_band) { - if (cinfo->Se != 0) - bad = TRUE; - } else { - /* need not check Ss/Se < 0 since they came from unsigned bytes */ - if (cinfo->Ss > cinfo->Se || cinfo->Se >= DCTSIZE2) - bad = TRUE; - /* AC scans may have only one component */ - if (cinfo->comps_in_scan != 1) - bad = TRUE; - } - if (cinfo->Ah != 0) { - /* Successive approximation refinement scan: must have Al = Ah-1. */ - if (cinfo->Al != cinfo->Ah-1) - bad = TRUE; - } - if (cinfo->Al > 13) /* need not check for < 0 */ - bad = TRUE; - /* Arguably the maximum Al value should be less than 13 for 8-bit precision, - * but the spec doesn't say so, and we try to be liberal about what we - * accept. Note: large Al values could result in out-of-range DC - * coefficients during early scans, leading to bizarre displays due to - * overflows in the IDCT math. But we won't crash. - */ - if (bad) - ERREXIT4(cinfo, JERR_BAD_PROGRESSION, - cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); - /* Update progression status, and verify that scan order is legal. - * Note that inter-scan inconsistencies are treated as warnings - * not fatal errors ... not clear if this is right way to behave. - */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - int cindex = cinfo->cur_comp_info[ci]->component_index; - coef_bit_ptr = & cinfo->coef_bits[cindex][0]; - if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); - for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { - int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; - if (cinfo->Ah != expected) - WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); - coef_bit_ptr[coefi] = cinfo->Al; - } - } - - /* Select MCU decoding routine */ - if (cinfo->Ah == 0) { - if (is_DC_band) - entropy->pub.decode_mcu = decode_mcu_DC_first; - else - entropy->pub.decode_mcu = decode_mcu_AC_first; - } else { - if (is_DC_band) - entropy->pub.decode_mcu = decode_mcu_DC_refine; - else - entropy->pub.decode_mcu = decode_mcu_AC_refine; - } - - for (ci = 0; ci < cinfo->comps_in_scan; ci++) { - compptr = cinfo->cur_comp_info[ci]; - /* Make sure requested tables are present, and compute derived tables. - * We may build same derived table more than once, but it's not expensive. - */ - if (is_DC_band) { - if (cinfo->Ah == 0) { /* DC refinement needs no table */ - tbl = compptr->dc_tbl_no; - jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, - & entropy->derived_tbls[tbl]); - } - } else { - tbl = compptr->ac_tbl_no; - jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, - & entropy->derived_tbls[tbl]); - /* remember the single active table */ - entropy->ac_derived_tbl = entropy->derived_tbls[tbl]; - } - /* Initialize DC predictions to 0 */ - entropy->saved.last_dc_val[ci] = 0; - } - - /* Initialize bitread state variables */ - entropy->bitstate.bits_left = 0; - entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */ - entropy->pub.insufficient_data = FALSE; - - /* Initialize private state variables */ - entropy->saved.EOBRUN = 0; - - /* Initialize restart counter */ - entropy->restarts_to_go = cinfo->restart_interval; -} - - -/* - * Figure F.12: extend sign bit. - * On some machines, a shift and add will be faster than a table lookup. - */ - -#ifdef AVOID_TABLES - -#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) + (((-1)<<(s)) + 1) : (x)) - -#else - -#define HUFF_EXTEND(x,s) ((x) < extend_test[s] ? (x) + extend_offset[s] : (x)) - -static const int extend_test[16] = /* entry n is 2**(n-1) */ - { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, - 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; - -static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */ - { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, - ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, - ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, - ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; - -#endif /* AVOID_TABLES */ - - -/* - * Check for a restart marker & resynchronize decoder. - * Returns FALSE if must suspend. - */ - -LOCAL(boolean) -process_restart (j_decompress_ptr cinfo) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - int ci; - - /* Throw away any unused bits remaining in bit buffer; */ - /* include any full bytes in next_marker's count of discarded bytes */ - cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8; - entropy->bitstate.bits_left = 0; - - /* Advance past the RSTn marker */ - if (! (*cinfo->marker->read_restart_marker) (cinfo)) - return FALSE; - - /* Re-initialize DC predictions to 0 */ - for (ci = 0; ci < cinfo->comps_in_scan; ci++) - entropy->saved.last_dc_val[ci] = 0; - /* Re-init EOB run count, too */ - entropy->saved.EOBRUN = 0; - - /* Reset restart counter */ - entropy->restarts_to_go = cinfo->restart_interval; - - /* Reset out-of-data flag, unless read_restart_marker left us smack up - * against a marker. In that case we will end up treating the next data - * segment as empty, and we can avoid producing bogus output pixels by - * leaving the flag set. - */ - if (cinfo->unread_marker == 0) - entropy->pub.insufficient_data = FALSE; - - return TRUE; -} - - -/* - * Huffman MCU decoding. - * Each of these routines decodes and returns one MCU's worth of - * Huffman-compressed coefficients. - * The coefficients are reordered from zigzag order into natural array order, - * but are not dequantized. - * - * The i'th block of the MCU is stored into the block pointed to by - * MCU_data[i]. WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER. - * - * We return FALSE if data source requested suspension. In that case no - * changes have been made to permanent state. (Exception: some output - * coefficients may already have been assigned. This is harmless for - * spectral selection, since we'll just re-assign them on the next call. - * Successive approximation AC refinement has to be more careful, however.) - */ - -/* - * MCU decoding for DC initial scan (either spectral selection, - * or first pass of successive approximation). - */ - -METHODDEF(boolean) -decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - int Al = cinfo->Al; - register int s, r; - int blkn, ci; - JBLOCKROW block; - BITREAD_STATE_VARS; - savable_state state; - d_derived_tbl * tbl; - jpeg_component_info * compptr; - - /* Process restart marker if needed; may have to suspend */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) - return FALSE; - } - - /* If we've run out of data, just leave the MCU set to zeroes. - * This way, we return uniform gray for the remainder of the segment. - */ - if (! entropy->pub.insufficient_data) { - - /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); - ASSIGN_STATE(state, entropy->saved); - - /* Outer loop handles each block in the MCU */ - - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; - ci = cinfo->MCU_membership[blkn]; - compptr = cinfo->cur_comp_info[ci]; - tbl = entropy->derived_tbls[compptr->dc_tbl_no]; - - /* Decode a single block's worth of coefficients */ - - /* Section F.2.2.1: decode the DC coefficient difference */ - HUFF_DECODE(s, br_state, tbl, return FALSE, label1); - if (s) { - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - } - - /* Convert DC difference to actual value, update last_dc_val */ - s += state.last_dc_val[ci]; - state.last_dc_val[ci] = s; - /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */ - (*block)[0] = (JCOEF) (s << Al); - } - - /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); - ASSIGN_STATE(entropy->saved, state); - } - - /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; - - return TRUE; -} - - -/* - * MCU decoding for AC initial scan (either spectral selection, - * or first pass of successive approximation). - */ - -METHODDEF(boolean) -decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - int Se = cinfo->Se; - int Al = cinfo->Al; - register int s, k, r; - unsigned int EOBRUN; - JBLOCKROW block; - BITREAD_STATE_VARS; - d_derived_tbl * tbl; - - /* Process restart marker if needed; may have to suspend */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) - return FALSE; - } - - /* If we've run out of data, just leave the MCU set to zeroes. - * This way, we return uniform gray for the remainder of the segment. - */ - if (! entropy->pub.insufficient_data) { - - /* Load up working state. - * We can avoid loading/saving bitread state if in an EOB run. - */ - EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ - - /* There is always only one block per MCU */ - - if (EOBRUN > 0) /* if it's a band of zeroes... */ - EOBRUN--; /* ...process it now (we do nothing) */ - else { - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); - block = MCU_data[0]; - tbl = entropy->ac_derived_tbl; - - for (k = cinfo->Ss; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, return FALSE, label2); - r = s >> 4; - s &= 15; - if (s) { - k += r; - CHECK_BIT_BUFFER(br_state, s, return FALSE); - r = GET_BITS(s); - s = HUFF_EXTEND(r, s); - /* Scale and output coefficient in natural (dezigzagged) order */ - (*block)[jpeg_natural_order[k]] = (JCOEF) (s << Al); - } else { - if (r == 15) { /* ZRL */ - k += 15; /* skip 15 zeroes in band */ - } else { /* EOBr, run length is 2^r + appended bits */ - EOBRUN = 1 << r; - if (r) { /* EOBr, r > 0 */ - CHECK_BIT_BUFFER(br_state, r, return FALSE); - r = GET_BITS(r); - EOBRUN += r; - } - EOBRUN--; /* this band is processed at this moment */ - break; /* force end-of-band */ - } - } - } - - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); - } - - /* Completed MCU, so update state */ - entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ - } - - /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; - - return TRUE; -} - - -/* - * MCU decoding for DC successive approximation refinement scan. - * Note: we assume such scans can be multi-component, although the spec - * is not very clear on the point. - */ - -METHODDEF(boolean) -decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ - int blkn; - JBLOCKROW block; - BITREAD_STATE_VARS; - - /* Process restart marker if needed; may have to suspend */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) - return FALSE; - } - - /* Not worth the cycles to check insufficient_data here, - * since we will not change the data anyway if we read zeroes. - */ - - /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); - - /* Outer loop handles each block in the MCU */ - - for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { - block = MCU_data[blkn]; - - /* Encoded data is simply the next bit of the two's-complement DC value */ - CHECK_BIT_BUFFER(br_state, 1, return FALSE); - if (GET_BITS(1)) - (*block)[0] |= p1; - /* Note: since we use |=, repeating the assignment later is safe */ - } - - /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); - - /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; - - return TRUE; -} - - -/* - * MCU decoding for AC successive approximation refinement scan. - */ - -METHODDEF(boolean) -decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) -{ - phuff_entropy_ptr entropy = (phuff_entropy_ptr) cinfo->entropy; - int Se = cinfo->Se; - int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ - int m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ - register int s, k, r; - unsigned int EOBRUN; - JBLOCKROW block; - JCOEFPTR thiscoef; - BITREAD_STATE_VARS; - d_derived_tbl * tbl; - int num_newnz; - int newnz_pos[DCTSIZE2]; - - /* Process restart marker if needed; may have to suspend */ - if (cinfo->restart_interval) { - if (entropy->restarts_to_go == 0) - if (! process_restart(cinfo)) - return FALSE; - } - - /* If we've run out of data, don't modify the MCU. - */ - if (! entropy->pub.insufficient_data) { - - /* Load up working state */ - BITREAD_LOAD_STATE(cinfo,entropy->bitstate); - EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ - - /* There is always only one block per MCU */ - block = MCU_data[0]; - tbl = entropy->ac_derived_tbl; - - /* If we are forced to suspend, we must undo the assignments to any newly - * nonzero coefficients in the block, because otherwise we'd get confused - * next time about which coefficients were already nonzero. - * But we need not undo addition of bits to already-nonzero coefficients; - * instead, we can test the current bit to see if we already did it. - */ - num_newnz = 0; - - /* initialize coefficient loop counter to start of band */ - k = cinfo->Ss; - - if (EOBRUN == 0) { - for (; k <= Se; k++) { - HUFF_DECODE(s, br_state, tbl, goto undoit, label3); - r = s >> 4; - s &= 15; - if (s) { - if (s != 1) /* size of new coef should always be 1 */ - WARNMS(cinfo, JWRN_HUFF_BAD_CODE); - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) - s = p1; /* newly nonzero coef is positive */ - else - s = m1; /* newly nonzero coef is negative */ - } else { - if (r != 15) { - EOBRUN = 1 << r; /* EOBr, run length is 2^r + appended bits */ - if (r) { - CHECK_BIT_BUFFER(br_state, r, goto undoit); - r = GET_BITS(r); - EOBRUN += r; - } - break; /* rest of block is handled by EOB logic */ - } - /* note s = 0 for processing ZRL */ - } - /* Advance over already-nonzero coefs and r still-zero coefs, - * appending correction bits to the nonzeroes. A correction bit is 1 - * if the absolute value of the coefficient must be increased. - */ - do { - thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef != 0) { - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already set it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } - } - } else { - if (--r < 0) - break; /* reached target zero coefficient */ - } - k++; - } while (k <= Se); - if (s) { - int pos = jpeg_natural_order[k]; - /* Output newly nonzero coefficient */ - (*block)[pos] = (JCOEF) s; - /* Remember its position in case we have to suspend */ - newnz_pos[num_newnz++] = pos; - } - } - } - - if (EOBRUN > 0) { - /* Scan any remaining coefficient positions after the end-of-band - * (the last newly nonzero coefficient, if any). Append a correction - * bit to each already-nonzero coefficient. A correction bit is 1 - * if the absolute value of the coefficient must be increased. - */ - for (; k <= Se; k++) { - thiscoef = *block + jpeg_natural_order[k]; - if (*thiscoef != 0) { - CHECK_BIT_BUFFER(br_state, 1, goto undoit); - if (GET_BITS(1)) { - if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */ - if (*thiscoef >= 0) - *thiscoef += p1; - else - *thiscoef += m1; - } - } - } - } - /* Count one block completed in EOB run */ - EOBRUN--; - } - - /* Completed MCU, so update state */ - BITREAD_SAVE_STATE(cinfo,entropy->bitstate); - entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ - } - - /* Account for restart interval (no-op if not using restarts) */ - entropy->restarts_to_go--; - - return TRUE; - -undoit: - /* Re-zero any output coefficients that we made newly nonzero */ - while (num_newnz > 0) - (*block)[newnz_pos[--num_newnz]] = 0; - - return FALSE; -} - - -/* - * Module initialization routine for progressive Huffman entropy decoding. - */ - -GLOBAL(void) -jinit_phuff_decoder (j_decompress_ptr cinfo) -{ - phuff_entropy_ptr entropy; - int *coef_bit_ptr; - int ci, i; - - entropy = (phuff_entropy_ptr) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - SIZEOF(phuff_entropy_decoder)); - cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; - entropy->pub.start_pass = start_pass_phuff_decoder; - - /* Mark derived tables unallocated */ - for (i = 0; i < NUM_HUFF_TBLS; i++) { - entropy->derived_tbls[i] = NULL; - } - - /* Create progression status table */ - cinfo->coef_bits = (int (*)[DCTSIZE2]) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - cinfo->num_components*DCTSIZE2*SIZEOF(int)); - coef_bit_ptr = & cinfo->coef_bits[0][0]; - for (ci = 0; ci < cinfo->num_components; ci++) - for (i = 0; i < DCTSIZE2; i++) - *coef_bit_ptr++ = -1; -} - -#endif /* D_PROGRESSIVE_SUPPORTED */ diff --git a/code/jpeg-6b/jfdctint.c b/code/jpeg-6b/jfdctint.c deleted file mode 100644 index 0a78b64a..00000000 --- a/code/jpeg-6b/jfdctint.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * jfdctint.c - * - * Copyright (C) 1991-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains a slow-but-accurate integer implementation of the - * forward DCT (Discrete Cosine Transform). - * - * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT - * on each column. Direct algorithms are also available, but they are - * much more complex and seem not to be any faster when reduced to code. - * - * This implementation is based on an algorithm described in - * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT - * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, - * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. - * The primary algorithm described there uses 11 multiplies and 29 adds. - * We use their alternate method with 12 multiplies and 32 adds. - * The advantage of this method is that no data path contains more than one - * multiplication; this allows a very simple and accurate implementation in - * scaled fixed-point arithmetic, with a minimal number of shifts. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ - -#ifdef DCT_ISLOW_SUPPORTED - - -/* - * This module is specialized to the case DCTSIZE = 8. - */ - -#if DCTSIZE != 8 - Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ -#endif - - -/* - * The poop on this scaling stuff is as follows: - * - * Each 1-D DCT step produces outputs which are a factor of sqrt(N) - * larger than the true DCT outputs. The final outputs are therefore - * a factor of N larger than desired; since N=8 this can be cured by - * a simple right shift at the end of the algorithm. The advantage of - * this arrangement is that we save two multiplications per 1-D DCT, - * because the y0 and y4 outputs need not be divided by sqrt(N). - * In the IJG code, this factor of 8 is removed by the quantization step - * (in jcdctmgr.c), NOT in this module. - * - * We have to do addition and subtraction of the integer inputs, which - * is no problem, and multiplication by fractional constants, which is - * a problem to do in integer arithmetic. We multiply all the constants - * by CONST_SCALE and convert them to integer constants (thus retaining - * CONST_BITS bits of precision in the constants). After doing a - * multiplication we have to divide the product by CONST_SCALE, with proper - * rounding, to produce the correct output. This division can be done - * cheaply as a right shift of CONST_BITS bits. We postpone shifting - * as long as possible so that partial sums can be added together with - * full fractional precision. - * - * The outputs of the first pass are scaled up by PASS1_BITS bits so that - * they are represented to better-than-integral precision. These outputs - * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word - * with the recommended scaling. (For 12-bit sample data, the intermediate - * array is INT32 anyway.) - * - * To avoid overflow of the 32-bit intermediate results in pass 2, we must - * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis - * shows that the values given below are the most effective. - */ - -#if BITS_IN_JSAMPLE == 8 -#define CONST_BITS 13 -#define PASS1_BITS 2 -#else -#define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ -#endif - -/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus - * causing a lot of useless floating-point operations at run time. - * To get around this we use the following pre-calculated constants. - * If you change CONST_BITS you may want to add appropriate values. - * (With a reasonable C compiler, you can just rely on the FIX() macro...) - */ - -#if CONST_BITS == 13 -#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ -#else -#define FIX_0_298631336 FIX(0.298631336) -#define FIX_0_390180644 FIX(0.390180644) -#define FIX_0_541196100 FIX(0.541196100) -#define FIX_0_765366865 FIX(0.765366865) -#define FIX_0_899976223 FIX(0.899976223) -#define FIX_1_175875602 FIX(1.175875602) -#define FIX_1_501321110 FIX(1.501321110) -#define FIX_1_847759065 FIX(1.847759065) -#define FIX_1_961570560 FIX(1.961570560) -#define FIX_2_053119869 FIX(2.053119869) -#define FIX_2_562915447 FIX(2.562915447) -#define FIX_3_072711026 FIX(3.072711026) -#endif - - -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. - * For 8-bit samples with the recommended scaling, all the variable - * and constant values involved are no more than 16 bits wide, so a - * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. - * For 12-bit samples, a full 32-bit multiplication will be needed. - */ - -#if BITS_IN_JSAMPLE == 8 -#define MULTIPLY(var,const) MULTIPLY16C16(var,const) -#else -#define MULTIPLY(var,const) ((var) * (const)) -#endif - - -/* - * Perform the forward DCT on one block of samples. - */ - -GLOBAL(void) -jpeg_fdct_islow (DCTELEM * data) -{ - INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - INT32 tmp10, tmp11, tmp12, tmp13; - INT32 z1, z2, z3, z4, z5; - DCTELEM *dataptr; - int ctr; - SHIFT_TEMPS - - /* Pass 1: process rows. */ - /* Note results are scaled up by sqrt(8) compared to a true DCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - - dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[0] + dataptr[7]; - tmp7 = dataptr[0] - dataptr[7]; - tmp1 = dataptr[1] + dataptr[6]; - tmp6 = dataptr[1] - dataptr[6]; - tmp2 = dataptr[2] + dataptr[5]; - tmp5 = dataptr[2] - dataptr[5]; - tmp3 = dataptr[3] + dataptr[4]; - tmp4 = dataptr[3] - dataptr[4]; - - /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". - */ - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS); - dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); - - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS-PASS1_BITS); - dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS-PASS1_BITS); - - /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents cos(K*pi/16). - * i0..i3 in the paper are tmp4..tmp7 here. - */ - - z1 = tmp4 + tmp7; - z2 = tmp5 + tmp6; - z3 = tmp4 + tmp6; - z4 = tmp5 + tmp7; - z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - - tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ - tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ - tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ - tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - - z3 += z5; - z4 += z5; - - dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS); - dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS); - dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS); - dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS); - - dataptr += DCTSIZE; /* advance pointer to next row */ - } - - /* Pass 2: process columns. - * We remove the PASS1_BITS scaling, but leave the results scaled up - * by an overall factor of 8. - */ - - dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; - tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; - tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; - tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; - tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; - tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; - tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; - tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - - /* Even part per LL&M figure 1 --- note that published figure is faulty; - * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". - */ - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS); - dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS); - - z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); - dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - CONST_BITS+PASS1_BITS); - - /* Odd part per figure 8 --- note paper omits factor of sqrt(2). - * cK represents cos(K*pi/16). - * i0..i3 in the paper are tmp4..tmp7 here. - */ - - z1 = tmp4 + tmp7; - z2 = tmp5 + tmp6; - z3 = tmp4 + tmp6; - z4 = tmp5 + tmp7; - z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - - tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ - tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ - tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ - tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - - z3 += z5; - z4 += z5; - - dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, - CONST_BITS+PASS1_BITS); - dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, - CONST_BITS+PASS1_BITS); - - dataptr++; /* advance pointer to next column */ - } -} - -#endif /* DCT_ISLOW_SUPPORTED */ diff --git a/code/jpeg-6b/jidctint.c b/code/jpeg-6b/jidctint.c deleted file mode 100644 index a72b3207..00000000 --- a/code/jpeg-6b/jidctint.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * jidctint.c - * - * Copyright (C) 1991-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains a slow-but-accurate integer implementation of the - * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine - * must also perform dequantization of the input coefficients. - * - * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT - * on each row (or vice versa, but it's more convenient to emit a row at - * a time). Direct algorithms are also available, but they are much more - * complex and seem not to be any faster when reduced to code. - * - * This implementation is based on an algorithm described in - * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT - * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, - * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. - * The primary algorithm described there uses 11 multiplies and 29 adds. - * We use their alternate method with 12 multiplies and 32 adds. - * The advantage of this method is that no data path contains more than one - * multiplication; this allows a very simple and accurate implementation in - * scaled fixed-point arithmetic, with a minimal number of shifts. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ - -#ifdef DCT_ISLOW_SUPPORTED - - -/* - * This module is specialized to the case DCTSIZE = 8. - */ - -#if DCTSIZE != 8 - Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ -#endif - - -/* - * The poop on this scaling stuff is as follows: - * - * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) - * larger than the true IDCT outputs. The final outputs are therefore - * a factor of N larger than desired; since N=8 this can be cured by - * a simple right shift at the end of the algorithm. The advantage of - * this arrangement is that we save two multiplications per 1-D IDCT, - * because the y0 and y4 inputs need not be divided by sqrt(N). - * - * We have to do addition and subtraction of the integer inputs, which - * is no problem, and multiplication by fractional constants, which is - * a problem to do in integer arithmetic. We multiply all the constants - * by CONST_SCALE and convert them to integer constants (thus retaining - * CONST_BITS bits of precision in the constants). After doing a - * multiplication we have to divide the product by CONST_SCALE, with proper - * rounding, to produce the correct output. This division can be done - * cheaply as a right shift of CONST_BITS bits. We postpone shifting - * as long as possible so that partial sums can be added together with - * full fractional precision. - * - * The outputs of the first pass are scaled up by PASS1_BITS bits so that - * they are represented to better-than-integral precision. These outputs - * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word - * with the recommended scaling. (To scale up 12-bit sample data further, an - * intermediate INT32 array would be needed.) - * - * To avoid overflow of the 32-bit intermediate results in pass 2, we must - * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis - * shows that the values given below are the most effective. - */ - -#if BITS_IN_JSAMPLE == 8 -#define CONST_BITS 13 -#define PASS1_BITS 2 -#else -#define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ -#endif - -/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus - * causing a lot of useless floating-point operations at run time. - * To get around this we use the following pre-calculated constants. - * If you change CONST_BITS you may want to add appropriate values. - * (With a reasonable C compiler, you can just rely on the FIX() macro...) - */ - -#if CONST_BITS == 13 -#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ -#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ -#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ -#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ -#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ -#else -#define FIX_0_298631336 FIX(0.298631336) -#define FIX_0_390180644 FIX(0.390180644) -#define FIX_0_541196100 FIX(0.541196100) -#define FIX_0_765366865 FIX(0.765366865) -#define FIX_0_899976223 FIX(0.899976223) -#define FIX_1_175875602 FIX(1.175875602) -#define FIX_1_501321110 FIX(1.501321110) -#define FIX_1_847759065 FIX(1.847759065) -#define FIX_1_961570560 FIX(1.961570560) -#define FIX_2_053119869 FIX(2.053119869) -#define FIX_2_562915447 FIX(2.562915447) -#define FIX_3_072711026 FIX(3.072711026) -#endif - - -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. - * For 8-bit samples with the recommended scaling, all the variable - * and constant values involved are no more than 16 bits wide, so a - * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. - * For 12-bit samples, a full 32-bit multiplication will be needed. - */ - -#if BITS_IN_JSAMPLE == 8 -#define MULTIPLY(var,const) MULTIPLY16C16(var,const) -#else -#define MULTIPLY(var,const) ((var) * (const)) -#endif - - -/* Dequantize a coefficient by multiplying it by the multiplier-table - * entry; produce an int result. In this module, both inputs and result - * are 16 bits or less, so either int or short multiply will work. - */ - -#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) - - -/* - * Perform dequantization and inverse DCT on one block of coefficients. - */ - -GLOBAL(void) -jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) -{ - INT32 tmp0, tmp1, tmp2, tmp3; - INT32 tmp10, tmp11, tmp12, tmp13; - INT32 z1, z2, z3, z4, z5; - JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; - JSAMPROW outptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); - int ctr; - int workspace[DCTSIZE2]; /* buffers data between passes */ - SHIFT_TEMPS - - /* Pass 1: process columns from input, store into work array. */ - /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ - /* furthermore, we scale the results by 2**PASS1_BITS. */ - - inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; - wsptr = workspace; - for (ctr = DCTSIZE; ctr > 0; ctr--) { - /* Due to quantization, we will usually find that many of the input - * coefficients are zero, especially the AC terms. We can exploit this - * by short-circuiting the IDCT calculation for any column in which all - * the AC terms are zero. In that case each output is equal to the - * DC coefficient (with scale factor as needed). - * With typical images and quantization tables, half or more of the - * column DCT calculations can be simplified this way. - */ - - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && - inptr[DCTSIZE*7] == 0) { - /* AC terms all zero */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - wsptr[DCTSIZE*2] = dcval; - wsptr[DCTSIZE*3] = dcval; - wsptr[DCTSIZE*4] = dcval; - wsptr[DCTSIZE*5] = dcval; - wsptr[DCTSIZE*6] = dcval; - wsptr[DCTSIZE*7] = dcval; - - inptr++; /* advance pointers to next column */ - quantptr++; - wsptr++; - continue; - } - - /* Even part: reverse the even part of the forward DCT. */ - /* The rotator is sqrt(2)*c(-6). */ - - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - - z1 = MULTIPLY(z2 + z3, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); - tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - - z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); - - tmp0 = (z2 + z3) << CONST_BITS; - tmp1 = (z2 - z3) << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - /* Odd part per figure 8; the matrix is unitary and hence its - * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. - */ - - tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - - z1 = tmp0 + tmp3; - z2 = tmp1 + tmp2; - z3 = tmp0 + tmp2; - z4 = tmp1 + tmp3; - z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - - tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ - tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ - tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ - tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - - z3 += z5; - z4 += z5; - - tmp0 += z1 + z3; - tmp1 += z2 + z4; - tmp2 += z2 + z3; - tmp3 += z1 + z4; - - /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - - wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); - wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); - - inptr++; /* advance pointers to next column */ - quantptr++; - wsptr++; - } - - /* Pass 2: process rows from work array, store into output array. */ - /* Note that we must descale the results by a factor of 8 == 2**3, */ - /* and also undo the PASS1_BITS scaling. */ - - wsptr = workspace; - for (ctr = 0; ctr < DCTSIZE; ctr++) { - outptr = output_buf[ctr] + output_col; - /* Rows of zeroes can be exploited in the same way as we did with columns. - * However, the column calculation has created many nonzero AC terms, so - * the simplification applies less often (typically 5% to 10% of the time). - * On machines with very fast multiplication, it's possible that the - * test takes more time than it's worth. In that case this section - * may be commented out. - */ - -#ifndef NO_ZERO_ROW_TEST - if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { - /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - - outptr[0] = dcval; - outptr[1] = dcval; - outptr[2] = dcval; - outptr[3] = dcval; - outptr[4] = dcval; - outptr[5] = dcval; - outptr[6] = dcval; - outptr[7] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ - continue; - } -#endif - - /* Even part: reverse the even part of the forward DCT. */ - /* The rotator is sqrt(2)*c(-6). */ - - z2 = (INT32) wsptr[2]; - z3 = (INT32) wsptr[6]; - - z1 = MULTIPLY(z2 + z3, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); - tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); - - tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; - tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp1 + tmp2; - tmp12 = tmp1 - tmp2; - - /* Odd part per figure 8; the matrix is unitary and hence its - * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. - */ - - tmp0 = (INT32) wsptr[7]; - tmp1 = (INT32) wsptr[5]; - tmp2 = (INT32) wsptr[3]; - tmp3 = (INT32) wsptr[1]; - - z1 = tmp0 + tmp3; - z2 = tmp1 + tmp2; - z3 = tmp0 + tmp2; - z4 = tmp1 + tmp3; - z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ - - tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ - tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ - tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ - tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ - z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ - z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ - z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ - z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ - - z3 += z5; - z4 += z5; - - tmp0 += z1 + z3; - tmp1 += z2 + z4; - tmp2 += z2 + z3; - tmp3 += z1 + z4; - - /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ - - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, - CONST_BITS+PASS1_BITS+3) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ - } -} - -#endif /* DCT_ISLOW_SUPPORTED */ diff --git a/code/jpeg-6b/jidctred.c b/code/jpeg-6b/jidctred.c deleted file mode 100644 index 421f3c7c..00000000 --- a/code/jpeg-6b/jidctred.c +++ /dev/null @@ -1,398 +0,0 @@ -/* - * jidctred.c - * - * Copyright (C) 1994-1998, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains inverse-DCT routines that produce reduced-size output: - * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block. - * - * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M) - * algorithm used in jidctint.c. We simply replace each 8-to-8 1-D IDCT step - * with an 8-to-4 step that produces the four averages of two adjacent outputs - * (or an 8-to-2 step producing two averages of four outputs, for 2x2 output). - * These steps were derived by computing the corresponding values at the end - * of the normal LL&M code, then simplifying as much as possible. - * - * 1x1 is trivial: just take the DC coefficient divided by 8. - * - * See jidctint.c for additional comments. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jdct.h" /* Private declarations for DCT subsystem */ - -#ifdef IDCT_SCALING_SUPPORTED - - -/* - * This module is specialized to the case DCTSIZE = 8. - */ - -#if DCTSIZE != 8 - Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ -#endif - - -/* Scaling is the same as in jidctint.c. */ - -#if BITS_IN_JSAMPLE == 8 -#define CONST_BITS 13 -#define PASS1_BITS 2 -#else -#define CONST_BITS 13 -#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ -#endif - -/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus - * causing a lot of useless floating-point operations at run time. - * To get around this we use the following pre-calculated constants. - * If you change CONST_BITS you may want to add appropriate values. - * (With a reasonable C compiler, you can just rely on the FIX() macro...) - */ - -#if CONST_BITS == 13 -#define FIX_0_211164243 ((INT32) 1730) /* FIX(0.211164243) */ -#define FIX_0_509795579 ((INT32) 4176) /* FIX(0.509795579) */ -#define FIX_0_601344887 ((INT32) 4926) /* FIX(0.601344887) */ -#define FIX_0_720959822 ((INT32) 5906) /* FIX(0.720959822) */ -#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ -#define FIX_0_850430095 ((INT32) 6967) /* FIX(0.850430095) */ -#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ -#define FIX_1_061594337 ((INT32) 8697) /* FIX(1.061594337) */ -#define FIX_1_272758580 ((INT32) 10426) /* FIX(1.272758580) */ -#define FIX_1_451774981 ((INT32) 11893) /* FIX(1.451774981) */ -#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ -#define FIX_2_172734803 ((INT32) 17799) /* FIX(2.172734803) */ -#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ -#define FIX_3_624509785 ((INT32) 29692) /* FIX(3.624509785) */ -#else -#define FIX_0_211164243 FIX(0.211164243) -#define FIX_0_509795579 FIX(0.509795579) -#define FIX_0_601344887 FIX(0.601344887) -#define FIX_0_720959822 FIX(0.720959822) -#define FIX_0_765366865 FIX(0.765366865) -#define FIX_0_850430095 FIX(0.850430095) -#define FIX_0_899976223 FIX(0.899976223) -#define FIX_1_061594337 FIX(1.061594337) -#define FIX_1_272758580 FIX(1.272758580) -#define FIX_1_451774981 FIX(1.451774981) -#define FIX_1_847759065 FIX(1.847759065) -#define FIX_2_172734803 FIX(2.172734803) -#define FIX_2_562915447 FIX(2.562915447) -#define FIX_3_624509785 FIX(3.624509785) -#endif - - -/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. - * For 8-bit samples with the recommended scaling, all the variable - * and constant values involved are no more than 16 bits wide, so a - * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. - * For 12-bit samples, a full 32-bit multiplication will be needed. - */ - -#if BITS_IN_JSAMPLE == 8 -#define MULTIPLY(var,const) MULTIPLY16C16(var,const) -#else -#define MULTIPLY(var,const) ((var) * (const)) -#endif - - -/* Dequantize a coefficient by multiplying it by the multiplier-table - * entry; produce an int result. In this module, both inputs and result - * are 16 bits or less, so either int or short multiply will work. - */ - -#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) - - -/* - * Perform dequantization and inverse DCT on one block of coefficients, - * producing a reduced-size 4x4 output block. - */ - -GLOBAL(void) -jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) -{ - INT32 tmp0, tmp2, tmp10, tmp12; - INT32 z1, z2, z3, z4; - JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; - JSAMPROW outptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); - int ctr; - int workspace[DCTSIZE*4]; /* buffers data between passes */ - SHIFT_TEMPS - - /* Pass 1: process columns from input, store into work array. */ - - inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; - wsptr = workspace; - for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { - /* Don't bother to process column 4, because second pass won't use it */ - if (ctr == DCTSIZE-4) - continue; - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && - inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*5] == 0 && - inptr[DCTSIZE*6] == 0 && inptr[DCTSIZE*7] == 0) { - /* AC terms all zero; we need not examine term 4 for 4x4 output */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - wsptr[DCTSIZE*2] = dcval; - wsptr[DCTSIZE*3] = dcval; - - continue; - } - - /* Even part */ - - tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp0 <<= (CONST_BITS+1); - - z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); - z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); - - tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, - FIX_0_765366865); - - tmp10 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - - /* Odd part */ - - z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - z2 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - z4 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - - tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ - - tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ - - /* Final output stage */ - - wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp2, CONST_BITS-PASS1_BITS+1); - wsptr[DCTSIZE*3] = (int) DESCALE(tmp10 - tmp2, CONST_BITS-PASS1_BITS+1); - wsptr[DCTSIZE*1] = (int) DESCALE(tmp12 + tmp0, CONST_BITS-PASS1_BITS+1); - wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 - tmp0, CONST_BITS-PASS1_BITS+1); - } - - /* Pass 2: process 4 rows from work array, store into output array. */ - - wsptr = workspace; - for (ctr = 0; ctr < 4; ctr++) { - outptr = output_buf[ctr] + output_col; - /* It's not clear whether a zero row test is worthwhile here ... */ - -#ifndef NO_ZERO_ROW_TEST - if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && - wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { - /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - - outptr[0] = dcval; - outptr[1] = dcval; - outptr[2] = dcval; - outptr[3] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ - continue; - } -#endif - - /* Even part */ - - tmp0 = ((INT32) wsptr[0]) << (CONST_BITS+1); - - tmp2 = MULTIPLY((INT32) wsptr[2], FIX_1_847759065) - + MULTIPLY((INT32) wsptr[6], - FIX_0_765366865); - - tmp10 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - - /* Odd part */ - - z1 = (INT32) wsptr[7]; - z2 = (INT32) wsptr[5]; - z3 = (INT32) wsptr[3]; - z4 = (INT32) wsptr[1]; - - tmp0 = MULTIPLY(z1, - FIX_0_211164243) /* sqrt(2) * (c3-c1) */ - + MULTIPLY(z2, FIX_1_451774981) /* sqrt(2) * (c3+c7) */ - + MULTIPLY(z3, - FIX_2_172734803) /* sqrt(2) * (-c1-c5) */ - + MULTIPLY(z4, FIX_1_061594337); /* sqrt(2) * (c5+c7) */ - - tmp2 = MULTIPLY(z1, - FIX_0_509795579) /* sqrt(2) * (c7-c5) */ - + MULTIPLY(z2, - FIX_0_601344887) /* sqrt(2) * (c5-c1) */ - + MULTIPLY(z3, FIX_0_899976223) /* sqrt(2) * (c3-c7) */ - + MULTIPLY(z4, FIX_2_562915447); /* sqrt(2) * (c1+c3) */ - - /* Final output stage */ - - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE(tmp10 - tmp2, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp12 + tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE(tmp12 - tmp0, - CONST_BITS+PASS1_BITS+3+1) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ - } -} - - -/* - * Perform dequantization and inverse DCT on one block of coefficients, - * producing a reduced-size 2x2 output block. - */ - -GLOBAL(void) -jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) -{ - INT32 tmp0, tmp10, z1; - JCOEFPTR inptr; - ISLOW_MULT_TYPE * quantptr; - int * wsptr; - JSAMPROW outptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); - int ctr; - int workspace[DCTSIZE*2]; /* buffers data between passes */ - SHIFT_TEMPS - - /* Pass 1: process columns from input, store into work array. */ - - inptr = coef_block; - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; - wsptr = workspace; - for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) { - /* Don't bother to process columns 2,4,6 */ - if (ctr == DCTSIZE-2 || ctr == DCTSIZE-4 || ctr == DCTSIZE-6) - continue; - if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*3] == 0 && - inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*7] == 0) { - /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */ - int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; - - wsptr[DCTSIZE*0] = dcval; - wsptr[DCTSIZE*1] = dcval; - - continue; - } - - /* Even part */ - - z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); - tmp10 = z1 << (CONST_BITS+2); - - /* Odd part */ - - z1 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); - tmp0 = MULTIPLY(z1, - FIX_0_720959822); /* sqrt(2) * (c7-c5+c3-c1) */ - z1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); - tmp0 += MULTIPLY(z1, FIX_0_850430095); /* sqrt(2) * (-c1+c3+c5+c7) */ - z1 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); - tmp0 += MULTIPLY(z1, - FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */ - z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); - tmp0 += MULTIPLY(z1, FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ - - /* Final output stage */ - - wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp0, CONST_BITS-PASS1_BITS+2); - wsptr[DCTSIZE*1] = (int) DESCALE(tmp10 - tmp0, CONST_BITS-PASS1_BITS+2); - } - - /* Pass 2: process 2 rows from work array, store into output array. */ - - wsptr = workspace; - for (ctr = 0; ctr < 2; ctr++) { - outptr = output_buf[ctr] + output_col; - /* It's not clear whether a zero row test is worthwhile here ... */ - -#ifndef NO_ZERO_ROW_TEST - if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) { - /* AC terms all zero */ - JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) - & RANGE_MASK]; - - outptr[0] = dcval; - outptr[1] = dcval; - - wsptr += DCTSIZE; /* advance pointer to next row */ - continue; - } -#endif - - /* Even part */ - - tmp10 = ((INT32) wsptr[0]) << (CONST_BITS+2); - - /* Odd part */ - - tmp0 = MULTIPLY((INT32) wsptr[7], - FIX_0_720959822) /* sqrt(2) * (c7-c5+c3-c1) */ - + MULTIPLY((INT32) wsptr[5], FIX_0_850430095) /* sqrt(2) * (-c1+c3+c5+c7) */ - + MULTIPLY((INT32) wsptr[3], - FIX_1_272758580) /* sqrt(2) * (-c1+c3-c5-c7) */ - + MULTIPLY((INT32) wsptr[1], FIX_3_624509785); /* sqrt(2) * (c1+c3+c5+c7) */ - - /* Final output stage */ - - outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE(tmp10 - tmp0, - CONST_BITS+PASS1_BITS+3+2) - & RANGE_MASK]; - - wsptr += DCTSIZE; /* advance pointer to next row */ - } -} - - -/* - * Perform dequantization and inverse DCT on one block of coefficients, - * producing a reduced-size 1x1 output block. - */ - -GLOBAL(void) -jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JCOEFPTR coef_block, - JSAMPARRAY output_buf, JDIMENSION output_col) -{ - int dcval; - ISLOW_MULT_TYPE * quantptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); - SHIFT_TEMPS - - /* We hardly need an inverse DCT routine for this: just take the - * average pixel value, which is one-eighth of the DC coefficient. - */ - quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; - dcval = DEQUANTIZE(coef_block[0], quantptr[0]); - dcval = (int) DESCALE((INT32) dcval, 3); - - output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; -} - -#endif /* IDCT_SCALING_SUPPORTED */ diff --git a/code/jpeg-6b/jload.c b/code/jpeg-6b/jload.c deleted file mode 100644 index 29e750b3..00000000 --- a/code/jpeg-6b/jload.c +++ /dev/null @@ -1,145 +0,0 @@ - -#include "../qcommon/q_shared.h" -#include "../qcommon/qcommon.h" - -/* - * Include file for users of JPEG library. - * You will need to have included system headers that define at least - * the typedefs FILE and size_t before you can include jpeglib.h. - * (stdio.h is sufficient on ANSI-conforming systems.) - * You may also wish to include "jerror.h". - */ - -#include "jpeglib.h" - - -int LoadJPG( const char *filename, unsigned char **pic, int *width, int *height ) { - /* This struct contains the JPEG decompression parameters and pointers to - * working space (which is allocated as needed by the JPEG library). - */ - struct jpeg_decompress_struct cinfo; - /* We use our private extension JPEG error handler. - * Note that this struct must live as long as the main JPEG parameter - * struct, to avoid dangling-pointer problems. - */ - /* This struct represents a JPEG error handler. It is declared separately - * because applications often want to supply a specialized error handler - * (see the second half of this file for an example). But here we just - * take the easy way out and use the standard error handler, which will - * print a message on stderr and call exit() if compression fails. - * Note that this struct must live as long as the main JPEG parameter - * struct, to avoid dangling-pointer problems. - */ - struct jpeg_error_mgr jerr; - /* More stuff */ - fileHandle_t infile; /* source file */ - JSAMPARRAY buffer; /* Output row buffer */ - int row_stride; /* physical row width in output buffer */ - unsigned char *out; - - /* In this example we want to open the input file before doing anything else, - * so that the setjmp() error recovery below can assume the file is open. - * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that - * requires it in order to read binary files. - */ - - FS_FOpenFileRead( filename, &infile, qfalse ); - if (infile == 0) { - return 0; - } - - /* Step 1: allocate and initialize JPEG decompression object */ - - /* We have to set up the error handler first, in case the initialization - * step fails. (Unlikely, but it could happen if you are out of memory.) - * This routine fills in the contents of struct jerr, and returns jerr's - * address which we place into the link field in cinfo. - */ - cinfo.err = jpeg_std_error(&jerr); - - /* Now we can initialize the JPEG decompression object. */ - jpeg_create_decompress(&cinfo); - - /* Step 2: specify data source (eg, a file) */ - - jpeg_stdio_src(&cinfo, infile); - - /* Step 3: read file parameters with jpeg_read_header() */ - - (void) jpeg_read_header(&cinfo, TRUE); - /* We can ignore the return value from jpeg_read_header since - * (a) suspension is not possible with the stdio data source, and - * (b) we passed TRUE to reject a tables-only JPEG file as an error. - * See libjpeg.doc for more info. - */ - - /* Step 4: set parameters for decompression */ - - /* In this example, we don't need to change any of the defaults set by - * jpeg_read_header(), so we do nothing here. - */ - - /* Step 5: Start decompressor */ - - (void) jpeg_start_decompress(&cinfo); - /* We can ignore the return value since suspension is not possible - * with the stdio data source. - */ - - /* We may need to do some setup of our own at this point before reading - * the data. After jpeg_start_decompress() we have the correct scaled - * output image dimensions available, as well as the output colormap - * if we asked for color quantization. - * In this example, we need to make an output work buffer of the right size. - */ - /* JSAMPLEs per row in output buffer */ - row_stride = cinfo.output_width * cinfo.output_components; - - out = Z_Malloc(cinfo.output_width*cinfo.output_height*cinfo.output_components); - - *pic = out; - *width = cinfo.output_width; - *height = cinfo.output_height; - - /* Step 6: while (scan lines remain to be read) */ - /* jpeg_read_scanlines(...); */ - - /* Here we use the library's state variable cinfo.output_scanline as the - * loop counter, so that we don't have to keep track ourselves. - */ - while (cinfo.output_scanline < cinfo.output_height) { - /* jpeg_read_scanlines expects an array of pointers to scanlines. - * Here the array is only one element long, but you could ask for - * more than one scanline at a time if that's more convenient. - */ - buffer = (JSAMPARRAY)out+(row_stride*cinfo.output_scanline); - (void) jpeg_read_scanlines(&cinfo, buffer, 1); - } - - /* Step 7: Finish decompression */ - - (void) jpeg_finish_decompress(&cinfo); - /* We can ignore the return value since suspension is not possible - * with the stdio data source. - */ - - /* Step 8: Release JPEG decompression object */ - - /* This is an important step since it will release a good deal of memory. */ - jpeg_destroy_decompress(&cinfo); - - /* After finish_decompress, we can close the input file. - * Here we postpone it until after no more JPEG errors are possible, - * so as to simplify the setjmp error logic above. (Actually, I don't - * think that jpeg_destroy can do an error exit, but why assume anything...) - */ - FS_FCloseFile(infile); - - /* At this point you may want to check to see whether any corrupt-data - * warnings occurred (test whether jerr.pub.num_warnings is nonzero). - */ - - /* And we're done! */ - return 1; -} - diff --git a/code/jpeg-6b/jmemansi.c b/code/jpeg-6b/jmemansi.c deleted file mode 100644 index 2d93e496..00000000 --- a/code/jpeg-6b/jmemansi.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * jmemansi.c - * - * Copyright (C) 1992-1996, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file provides a simple generic implementation of the system- - * dependent portion of the JPEG memory manager. This implementation - * assumes that you have the ANSI-standard library routine tmpfile(). - * Also, the problem of determining the amount of memory available - * is shoved onto the user. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jmemsys.h" /* import the system-dependent declarations */ - -#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void * malloc JPP((size_t size)); -extern void free JPP((void *ptr)); -#endif - -#ifndef SEEK_SET /* pre-ANSI systems may not define this; */ -#define SEEK_SET 0 /* if not, assume 0 is correct */ -#endif - - -/* - * Memory allocation and freeing are controlled by the regular library - * routines malloc() and free(). - */ - -GLOBAL(void *) -jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject) -{ - return (void *) malloc(sizeofobject); -} - -GLOBAL(void) -jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) -{ - free(object); -} - - -/* - * "Large" objects are treated the same as "small" ones. - * NB: although we include FAR keywords in the routine declarations, - * this file won't actually work in 80x86 small/medium model; at least, - * you probably won't be able to process useful-size images in only 64KB. - */ - -GLOBAL(void FAR *) -jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) -{ - return (void FAR *) malloc(sizeofobject); -} - -GLOBAL(void) -jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject) -{ - free(object); -} - - -/* - * This routine computes the total memory space available for allocation. - * It's impossible to do this in a portable way; our current solution is - * to make the user tell us (with a default value set at compile time). - * If you can actually get the available space, it's a good idea to subtract - * a slop factor of 5% or so. - */ - -#ifndef DEFAULT_MAX_MEM /* so can override from makefile */ -#define DEFAULT_MAX_MEM 1000000L /* default: one megabyte */ -#endif - -GLOBAL(long) -jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed, - long max_bytes_needed, long already_allocated) -{ - return cinfo->mem->max_memory_to_use - already_allocated; -} - - -/* - * Backing store (temporary file) management. - * Backing store objects are only used when the value returned by - * jpeg_mem_available is less than the total space needed. You can dispense - * with these routines if you have plenty of virtual memory; see jmemnobs.c. - */ - - -METHODDEF(void) -read_backing_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - if (fseek(info->temp_file, file_offset, SEEK_SET)) - ERREXIT(cinfo, JERR_TFILE_SEEK); - if (JFREAD(info->temp_file, buffer_address, byte_count) - != (size_t) byte_count) - ERREXIT(cinfo, JERR_TFILE_READ); -} - - -METHODDEF(void) -write_backing_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - if (fseek(info->temp_file, file_offset, SEEK_SET)) - ERREXIT(cinfo, JERR_TFILE_SEEK); - if (JFWRITE(info->temp_file, buffer_address, byte_count) - != (size_t) byte_count) - ERREXIT(cinfo, JERR_TFILE_WRITE); -} - - -METHODDEF(void) -close_backing_store (j_common_ptr cinfo, backing_store_ptr info) -{ - fclose(info->temp_file); - /* Since this implementation uses tmpfile() to create the file, - * no explicit file deletion is needed. - */ -} - - -/* - * Initial opening of a backing-store object. - * - * This version uses tmpfile(), which constructs a suitable file name - * behind the scenes. We don't have to use info->temp_name[] at all; - * indeed, we can't even find out the actual name of the temp file. - */ - -GLOBAL(void) -jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) -{ - if ((info->temp_file = tmpfile()) == NULL) - ERREXITS(cinfo, JERR_TFILE_CREATE, ""); - info->read_backing_store = read_backing_store; - info->write_backing_store = write_backing_store; - info->close_backing_store = close_backing_store; -} - - -/* - * These routines take care of any system-dependent initialization and - * cleanup required. - */ - -GLOBAL(long) -jpeg_mem_init (j_common_ptr cinfo) -{ - return DEFAULT_MAX_MEM; /* default for max_memory_to_use */ -} - -GLOBAL(void) -jpeg_mem_term (j_common_ptr cinfo) -{ - /* no work */ -} diff --git a/code/jpeg-6b/jmemdos.c b/code/jpeg-6b/jmemdos.c deleted file mode 100644 index 60b45c69..00000000 --- a/code/jpeg-6b/jmemdos.c +++ /dev/null @@ -1,638 +0,0 @@ -/* - * jmemdos.c - * - * Copyright (C) 1992-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file provides an MS-DOS-compatible implementation of the system- - * dependent portion of the JPEG memory manager. Temporary data can be - * stored in extended or expanded memory as well as in regular DOS files. - * - * If you use this file, you must be sure that NEED_FAR_POINTERS is defined - * if you compile in a small-data memory model; it should NOT be defined if - * you use a large-data memory model. This file is not recommended if you - * are using a flat-memory-space 386 environment such as DJGCC or Watcom C. - * Also, this code will NOT work if struct fields are aligned on greater than - * 2-byte boundaries. - * - * Based on code contributed by Ge' Weijers. - */ - -/* - * If you have both extended and expanded memory, you may want to change the - * order in which they are tried in jopen_backing_store. On a 286 machine - * expanded memory is usually faster, since extended memory access involves - * an expensive protected-mode-and-back switch. On 386 and better, extended - * memory is usually faster. As distributed, the code tries extended memory - * first (what? not everyone has a 386? :-). - * - * You can disable use of extended/expanded memory entirely by altering these - * definitions or overriding them from the Makefile (eg, -DEMS_SUPPORTED=0). - */ - -#ifndef XMS_SUPPORTED -#define XMS_SUPPORTED 1 -#endif -#ifndef EMS_SUPPORTED -#define EMS_SUPPORTED 1 -#endif - - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jmemsys.h" /* import the system-dependent declarations */ - -#ifndef HAVE_STDLIB_H /* should declare these */ -extern void * malloc JPP((size_t size)); -extern void free JPP((void *ptr)); -extern char * getenv JPP((const char * name)); -#endif - -#ifdef NEED_FAR_POINTERS - -#ifdef __TURBOC__ -/* These definitions work for Borland C (Turbo C) */ -#include /* need farmalloc(), farfree() */ -#define far_malloc(x) farmalloc(x) -#define far_free(x) farfree(x) -#else -/* These definitions work for Microsoft C and compatible compilers */ -#include /* need _fmalloc(), _ffree() */ -#define far_malloc(x) _fmalloc(x) -#define far_free(x) _ffree(x) -#endif - -#else /* not NEED_FAR_POINTERS */ - -#define far_malloc(x) malloc(x) -#define far_free(x) free(x) - -#endif /* NEED_FAR_POINTERS */ - -#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ -#define READ_BINARY "r" -#else -#define READ_BINARY "rb" -#endif - -#ifndef USE_MSDOS_MEMMGR /* make sure user got configuration right */ - You forgot to define USE_MSDOS_MEMMGR in jconfig.h. /* deliberate syntax error */ -#endif - -#if MAX_ALLOC_CHUNK >= 65535L /* make sure jconfig.h got this right */ - MAX_ALLOC_CHUNK should be less than 64K. /* deliberate syntax error */ -#endif - - -/* - * Declarations for assembly-language support routines (see jmemdosa.asm). - * - * The functions are declared "far" as are all their pointer arguments; - * this ensures the assembly source code will work regardless of the - * compiler memory model. We assume "short" is 16 bits, "long" is 32. - */ - -typedef void far * XMSDRIVER; /* actually a pointer to code */ -typedef struct { /* registers for calling XMS driver */ - unsigned short ax, dx, bx; - void far * ds_si; - } XMScontext; -typedef struct { /* registers for calling EMS driver */ - unsigned short ax, dx, bx; - void far * ds_si; - } EMScontext; - -extern short far jdos_open JPP((short far * handle, char far * filename)); -extern short far jdos_close JPP((short handle)); -extern short far jdos_seek JPP((short handle, long offset)); -extern short far jdos_read JPP((short handle, void far * buffer, - unsigned short count)); -extern short far jdos_write JPP((short handle, void far * buffer, - unsigned short count)); -extern void far jxms_getdriver JPP((XMSDRIVER far *)); -extern void far jxms_calldriver JPP((XMSDRIVER, XMScontext far *)); -extern short far jems_available JPP((void)); -extern void far jems_calldriver JPP((EMScontext far *)); - - -/* - * Selection of a file name for a temporary file. - * This is highly system-dependent, and you may want to customize it. - */ - -static int next_file_num; /* to distinguish among several temp files */ - -LOCAL(void) -select_file_name (char * fname) -{ - const char * env; - char * ptr; - FILE * tfile; - - /* Keep generating file names till we find one that's not in use */ - for (;;) { - /* Get temp directory name from environment TMP or TEMP variable; - * if none, use "." - */ - if ((env = (const char *) getenv("TMP")) == NULL) - if ((env = (const char *) getenv("TEMP")) == NULL) - env = "."; - if (*env == '\0') /* null string means "." */ - env = "."; - ptr = fname; /* copy name to fname */ - while (*env != '\0') - *ptr++ = *env++; - if (ptr[-1] != '\\' && ptr[-1] != '/') - *ptr++ = '\\'; /* append backslash if not in env variable */ - /* Append a suitable file name */ - next_file_num++; /* advance counter */ - sprintf(ptr, "JPG%03d.TMP", next_file_num); - /* Probe to see if file name is already in use */ - if ((tfile = fopen(fname, READ_BINARY)) == NULL) - break; - fclose(tfile); /* oops, it's there; close tfile & try again */ - } -} - - -/* - * Near-memory allocation and freeing are controlled by the regular library - * routines malloc() and free(). - */ - -GLOBAL(void *) -jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject) -{ - return (void *) malloc(sizeofobject); -} - -GLOBAL(void) -jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) -{ - free(object); -} - - -/* - * "Large" objects are allocated in far memory, if possible - */ - -GLOBAL(void FAR *) -jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) -{ - return (void FAR *) far_malloc(sizeofobject); -} - -GLOBAL(void) -jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject) -{ - far_free(object); -} - - -/* - * This routine computes the total memory space available for allocation. - * It's impossible to do this in a portable way; our current solution is - * to make the user tell us (with a default value set at compile time). - * If you can actually get the available space, it's a good idea to subtract - * a slop factor of 5% or so. - */ - -#ifndef DEFAULT_MAX_MEM /* so can override from makefile */ -#define DEFAULT_MAX_MEM 300000L /* for total usage about 450K */ -#endif - -GLOBAL(long) -jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed, - long max_bytes_needed, long already_allocated) -{ - return cinfo->mem->max_memory_to_use - already_allocated; -} - - -/* - * Backing store (temporary file) management. - * Backing store objects are only used when the value returned by - * jpeg_mem_available is less than the total space needed. You can dispense - * with these routines if you have plenty of virtual memory; see jmemnobs.c. - */ - -/* - * For MS-DOS we support three types of backing storage: - * 1. Conventional DOS files. We access these by direct DOS calls rather - * than via the stdio package. This provides a bit better performance, - * but the real reason is that the buffers to be read or written are FAR. - * The stdio library for small-data memory models can't cope with that. - * 2. Extended memory, accessed per the XMS V2.0 specification. - * 3. Expanded memory, accessed per the LIM/EMS 4.0 specification. - * You'll need copies of those specs to make sense of the related code. - * The specs are available by Internet FTP from the SIMTEL archives - * (oak.oakland.edu and its various mirror sites). See files - * pub/msdos/microsoft/xms20.arc and pub/msdos/info/limems41.zip. - */ - - -/* - * Access methods for a DOS file. - */ - - -METHODDEF(void) -read_file_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - if (jdos_seek(info->handle.file_handle, file_offset)) - ERREXIT(cinfo, JERR_TFILE_SEEK); - /* Since MAX_ALLOC_CHUNK is less than 64K, byte_count will be too. */ - if (byte_count > 65535L) /* safety check */ - ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK); - if (jdos_read(info->handle.file_handle, buffer_address, - (unsigned short) byte_count)) - ERREXIT(cinfo, JERR_TFILE_READ); -} - - -METHODDEF(void) -write_file_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - if (jdos_seek(info->handle.file_handle, file_offset)) - ERREXIT(cinfo, JERR_TFILE_SEEK); - /* Since MAX_ALLOC_CHUNK is less than 64K, byte_count will be too. */ - if (byte_count > 65535L) /* safety check */ - ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK); - if (jdos_write(info->handle.file_handle, buffer_address, - (unsigned short) byte_count)) - ERREXIT(cinfo, JERR_TFILE_WRITE); -} - - -METHODDEF(void) -close_file_store (j_common_ptr cinfo, backing_store_ptr info) -{ - jdos_close(info->handle.file_handle); /* close the file */ - remove(info->temp_name); /* delete the file */ -/* If your system doesn't have remove(), try unlink() instead. - * remove() is the ANSI-standard name for this function, but - * unlink() was more common in pre-ANSI systems. - */ - TRACEMSS(cinfo, 1, JTRC_TFILE_CLOSE, info->temp_name); -} - - -LOCAL(boolean) -open_file_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) -{ - short handle; - - select_file_name(info->temp_name); - if (jdos_open((short far *) & handle, (char far *) info->temp_name)) { - /* might as well exit since jpeg_open_backing_store will fail anyway */ - ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name); - return FALSE; - } - info->handle.file_handle = handle; - info->read_backing_store = read_file_store; - info->write_backing_store = write_file_store; - info->close_backing_store = close_file_store; - TRACEMSS(cinfo, 1, JTRC_TFILE_OPEN, info->temp_name); - return TRUE; /* succeeded */ -} - - -/* - * Access methods for extended memory. - */ - -#if XMS_SUPPORTED - -static XMSDRIVER xms_driver; /* saved address of XMS driver */ - -typedef union { /* either long offset or real-mode pointer */ - long offset; - void far * ptr; - } XMSPTR; - -typedef struct { /* XMS move specification structure */ - long length; - XMSH src_handle; - XMSPTR src; - XMSH dst_handle; - XMSPTR dst; - } XMSspec; - -#define ODD(X) (((X) & 1L) != 0) - - -METHODDEF(void) -read_xms_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - XMScontext ctx; - XMSspec spec; - char endbuffer[2]; - - /* The XMS driver can't cope with an odd length, so handle the last byte - * specially if byte_count is odd. We don't expect this to be common. - */ - - spec.length = byte_count & (~ 1L); - spec.src_handle = info->handle.xms_handle; - spec.src.offset = file_offset; - spec.dst_handle = 0; - spec.dst.ptr = buffer_address; - - ctx.ds_si = (void far *) & spec; - ctx.ax = 0x0b00; /* EMB move */ - jxms_calldriver(xms_driver, (XMScontext far *) & ctx); - if (ctx.ax != 1) - ERREXIT(cinfo, JERR_XMS_READ); - - if (ODD(byte_count)) { - read_xms_store(cinfo, info, (void FAR *) endbuffer, - file_offset + byte_count - 1L, 2L); - ((char FAR *) buffer_address)[byte_count - 1L] = endbuffer[0]; - } -} - - -METHODDEF(void) -write_xms_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - XMScontext ctx; - XMSspec spec; - char endbuffer[2]; - - /* The XMS driver can't cope with an odd length, so handle the last byte - * specially if byte_count is odd. We don't expect this to be common. - */ - - spec.length = byte_count & (~ 1L); - spec.src_handle = 0; - spec.src.ptr = buffer_address; - spec.dst_handle = info->handle.xms_handle; - spec.dst.offset = file_offset; - - ctx.ds_si = (void far *) & spec; - ctx.ax = 0x0b00; /* EMB move */ - jxms_calldriver(xms_driver, (XMScontext far *) & ctx); - if (ctx.ax != 1) - ERREXIT(cinfo, JERR_XMS_WRITE); - - if (ODD(byte_count)) { - read_xms_store(cinfo, info, (void FAR *) endbuffer, - file_offset + byte_count - 1L, 2L); - endbuffer[0] = ((char FAR *) buffer_address)[byte_count - 1L]; - write_xms_store(cinfo, info, (void FAR *) endbuffer, - file_offset + byte_count - 1L, 2L); - } -} - - -METHODDEF(void) -close_xms_store (j_common_ptr cinfo, backing_store_ptr info) -{ - XMScontext ctx; - - ctx.dx = info->handle.xms_handle; - ctx.ax = 0x0a00; - jxms_calldriver(xms_driver, (XMScontext far *) & ctx); - TRACEMS1(cinfo, 1, JTRC_XMS_CLOSE, info->handle.xms_handle); - /* we ignore any error return from the driver */ -} - - -LOCAL(boolean) -open_xms_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) -{ - XMScontext ctx; - - /* Get address of XMS driver */ - jxms_getdriver((XMSDRIVER far *) & xms_driver); - if (xms_driver == NULL) - return FALSE; /* no driver to be had */ - - /* Get version number, must be >= 2.00 */ - ctx.ax = 0x0000; - jxms_calldriver(xms_driver, (XMScontext far *) & ctx); - if (ctx.ax < (unsigned short) 0x0200) - return FALSE; - - /* Try to get space (expressed in kilobytes) */ - ctx.dx = (unsigned short) ((total_bytes_needed + 1023L) >> 10); - ctx.ax = 0x0900; - jxms_calldriver(xms_driver, (XMScontext far *) & ctx); - if (ctx.ax != 1) - return FALSE; - - /* Succeeded, save the handle and away we go */ - info->handle.xms_handle = ctx.dx; - info->read_backing_store = read_xms_store; - info->write_backing_store = write_xms_store; - info->close_backing_store = close_xms_store; - TRACEMS1(cinfo, 1, JTRC_XMS_OPEN, ctx.dx); - return TRUE; /* succeeded */ -} - -#endif /* XMS_SUPPORTED */ - - -/* - * Access methods for expanded memory. - */ - -#if EMS_SUPPORTED - -/* The EMS move specification structure requires word and long fields aligned - * at odd byte boundaries. Some compilers will align struct fields at even - * byte boundaries. While it's usually possible to force byte alignment, - * that causes an overall performance penalty and may pose problems in merging - * JPEG into a larger application. Instead we accept some rather dirty code - * here. Note this code would fail if the hardware did not allow odd-byte - * word & long accesses, but all 80x86 CPUs do. - */ - -typedef void far * EMSPTR; - -typedef union { /* EMS move specification structure */ - long length; /* It's easy to access first 4 bytes */ - char bytes[18]; /* Misaligned fields in here! */ - } EMSspec; - -/* Macros for accessing misaligned fields */ -#define FIELD_AT(spec,offset,type) (*((type *) &(spec.bytes[offset]))) -#define SRC_TYPE(spec) FIELD_AT(spec,4,char) -#define SRC_HANDLE(spec) FIELD_AT(spec,5,EMSH) -#define SRC_OFFSET(spec) FIELD_AT(spec,7,unsigned short) -#define SRC_PAGE(spec) FIELD_AT(spec,9,unsigned short) -#define SRC_PTR(spec) FIELD_AT(spec,7,EMSPTR) -#define DST_TYPE(spec) FIELD_AT(spec,11,char) -#define DST_HANDLE(spec) FIELD_AT(spec,12,EMSH) -#define DST_OFFSET(spec) FIELD_AT(spec,14,unsigned short) -#define DST_PAGE(spec) FIELD_AT(spec,16,unsigned short) -#define DST_PTR(spec) FIELD_AT(spec,14,EMSPTR) - -#define EMSPAGESIZE 16384L /* gospel, see the EMS specs */ - -#define HIBYTE(W) (((W) >> 8) & 0xFF) -#define LOBYTE(W) ((W) & 0xFF) - - -METHODDEF(void) -read_ems_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - EMScontext ctx; - EMSspec spec; - - spec.length = byte_count; - SRC_TYPE(spec) = 1; - SRC_HANDLE(spec) = info->handle.ems_handle; - SRC_PAGE(spec) = (unsigned short) (file_offset / EMSPAGESIZE); - SRC_OFFSET(spec) = (unsigned short) (file_offset % EMSPAGESIZE); - DST_TYPE(spec) = 0; - DST_HANDLE(spec) = 0; - DST_PTR(spec) = buffer_address; - - ctx.ds_si = (void far *) & spec; - ctx.ax = 0x5700; /* move memory region */ - jems_calldriver((EMScontext far *) & ctx); - if (HIBYTE(ctx.ax) != 0) - ERREXIT(cinfo, JERR_EMS_READ); -} - - -METHODDEF(void) -write_ems_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - EMScontext ctx; - EMSspec spec; - - spec.length = byte_count; - SRC_TYPE(spec) = 0; - SRC_HANDLE(spec) = 0; - SRC_PTR(spec) = buffer_address; - DST_TYPE(spec) = 1; - DST_HANDLE(spec) = info->handle.ems_handle; - DST_PAGE(spec) = (unsigned short) (file_offset / EMSPAGESIZE); - DST_OFFSET(spec) = (unsigned short) (file_offset % EMSPAGESIZE); - - ctx.ds_si = (void far *) & spec; - ctx.ax = 0x5700; /* move memory region */ - jems_calldriver((EMScontext far *) & ctx); - if (HIBYTE(ctx.ax) != 0) - ERREXIT(cinfo, JERR_EMS_WRITE); -} - - -METHODDEF(void) -close_ems_store (j_common_ptr cinfo, backing_store_ptr info) -{ - EMScontext ctx; - - ctx.ax = 0x4500; - ctx.dx = info->handle.ems_handle; - jems_calldriver((EMScontext far *) & ctx); - TRACEMS1(cinfo, 1, JTRC_EMS_CLOSE, info->handle.ems_handle); - /* we ignore any error return from the driver */ -} - - -LOCAL(boolean) -open_ems_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) -{ - EMScontext ctx; - - /* Is EMS driver there? */ - if (! jems_available()) - return FALSE; - - /* Get status, make sure EMS is OK */ - ctx.ax = 0x4000; - jems_calldriver((EMScontext far *) & ctx); - if (HIBYTE(ctx.ax) != 0) - return FALSE; - - /* Get version, must be >= 4.0 */ - ctx.ax = 0x4600; - jems_calldriver((EMScontext far *) & ctx); - if (HIBYTE(ctx.ax) != 0 || LOBYTE(ctx.ax) < 0x40) - return FALSE; - - /* Try to allocate requested space */ - ctx.ax = 0x4300; - ctx.bx = (unsigned short) ((total_bytes_needed + EMSPAGESIZE-1L) / EMSPAGESIZE); - jems_calldriver((EMScontext far *) & ctx); - if (HIBYTE(ctx.ax) != 0) - return FALSE; - - /* Succeeded, save the handle and away we go */ - info->handle.ems_handle = ctx.dx; - info->read_backing_store = read_ems_store; - info->write_backing_store = write_ems_store; - info->close_backing_store = close_ems_store; - TRACEMS1(cinfo, 1, JTRC_EMS_OPEN, ctx.dx); - return TRUE; /* succeeded */ -} - -#endif /* EMS_SUPPORTED */ - - -/* - * Initial opening of a backing-store object. - */ - -GLOBAL(void) -jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) -{ - /* Try extended memory, then expanded memory, then regular file. */ -#if XMS_SUPPORTED - if (open_xms_store(cinfo, info, total_bytes_needed)) - return; -#endif -#if EMS_SUPPORTED - if (open_ems_store(cinfo, info, total_bytes_needed)) - return; -#endif - if (open_file_store(cinfo, info, total_bytes_needed)) - return; - ERREXITS(cinfo, JERR_TFILE_CREATE, ""); -} - - -/* - * These routines take care of any system-dependent initialization and - * cleanup required. - */ - -GLOBAL(long) -jpeg_mem_init (j_common_ptr cinfo) -{ - next_file_num = 0; /* initialize temp file name generator */ - return DEFAULT_MAX_MEM; /* default for max_memory_to_use */ -} - -GLOBAL(void) -jpeg_mem_term (j_common_ptr cinfo) -{ - /* Microsoft C, at least in v6.00A, will not successfully reclaim freed - * blocks of size > 32Kbytes unless we give it a kick in the rear, like so: - */ -#ifdef NEED_FHEAPMIN - _fheapmin(); -#endif -} diff --git a/code/jpeg-6b/jmemname.c b/code/jpeg-6b/jmemname.c deleted file mode 100644 index ed96dee1..00000000 --- a/code/jpeg-6b/jmemname.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * jmemname.c - * - * Copyright (C) 1992-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file provides a generic implementation of the system-dependent - * portion of the JPEG memory manager. This implementation assumes that - * you must explicitly construct a name for each temp file. - * Also, the problem of determining the amount of memory available - * is shoved onto the user. - */ - -#define JPEG_INTERNALS -#include "jinclude.h" -#include "jpeglib.h" -#include "jmemsys.h" /* import the system-dependent declarations */ - -#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ -extern void * malloc JPP((size_t size)); -extern void free JPP((void *ptr)); -#endif - -#ifndef SEEK_SET /* pre-ANSI systems may not define this; */ -#define SEEK_SET 0 /* if not, assume 0 is correct */ -#endif - -#ifdef DONT_USE_B_MODE /* define mode parameters for fopen() */ -#define READ_BINARY "r" -#define RW_BINARY "w+" -#else -#ifdef VMS /* VMS is very nonstandard */ -#define READ_BINARY "rb", "ctx=stm" -#define RW_BINARY "w+b", "ctx=stm" -#else /* standard ANSI-compliant case */ -#define READ_BINARY "rb" -#define RW_BINARY "w+b" -#endif -#endif - - -/* - * Selection of a file name for a temporary file. - * This is system-dependent! - * - * The code as given is suitable for most Unix systems, and it is easily - * modified for most non-Unix systems. Some notes: - * 1. The temp file is created in the directory named by TEMP_DIRECTORY. - * The default value is /usr/tmp, which is the conventional place for - * creating large temp files on Unix. On other systems you'll probably - * want to change the file location. You can do this by editing the - * #define, or (preferred) by defining TEMP_DIRECTORY in jconfig.h. - * - * 2. If you need to change the file name as well as its location, - * you can override the TEMP_FILE_NAME macro. (Note that this is - * actually a printf format string; it must contain %s and %d.) - * Few people should need to do this. - * - * 3. mktemp() is used to ensure that multiple processes running - * simultaneously won't select the same file names. If your system - * doesn't have mktemp(), define NO_MKTEMP to do it the hard way. - * (If you don't have , also define NO_ERRNO_H.) - * - * 4. You probably want to define NEED_SIGNAL_CATCHER so that cjpeg.c/djpeg.c - * will cause the temp files to be removed if you stop the program early. - */ - -#ifndef TEMP_DIRECTORY /* can override from jconfig.h or Makefile */ -#define TEMP_DIRECTORY "/usr/tmp/" /* recommended setting for Unix */ -#endif - -static int next_file_num; /* to distinguish among several temp files */ - -#ifdef NO_MKTEMP - -#ifndef TEMP_FILE_NAME /* can override from jconfig.h or Makefile */ -#define TEMP_FILE_NAME "%sJPG%03d.TMP" -#endif - -#ifndef NO_ERRNO_H -#include /* to define ENOENT */ -#endif - -/* ANSI C specifies that errno is a macro, but on older systems it's more - * likely to be a plain int variable. And not all versions of errno.h - * bother to declare it, so we have to in order to be most portable. Thus: - */ -#ifndef errno -extern int errno; -#endif - - -LOCAL(void) -select_file_name (char * fname) -{ - FILE * tfile; - - /* Keep generating file names till we find one that's not in use */ - for (;;) { - next_file_num++; /* advance counter */ - sprintf(fname, TEMP_FILE_NAME, TEMP_DIRECTORY, next_file_num); - if ((tfile = fopen(fname, READ_BINARY)) == NULL) { - /* fopen could have failed for a reason other than the file not - * being there; for example, file there but unreadable. - * If isn't available, then we cannot test the cause. - */ -#ifdef ENOENT - if (errno != ENOENT) - continue; -#endif - break; - } - fclose(tfile); /* oops, it's there; close tfile & try again */ - } -} - -#else /* ! NO_MKTEMP */ - -/* Note that mktemp() requires the initial filename to end in six X's */ -#ifndef TEMP_FILE_NAME /* can override from jconfig.h or Makefile */ -#define TEMP_FILE_NAME "%sJPG%dXXXXXX" -#endif - -LOCAL(void) -select_file_name (char * fname) -{ - next_file_num++; /* advance counter */ - sprintf(fname, TEMP_FILE_NAME, TEMP_DIRECTORY, next_file_num); - mktemp(fname); /* make sure file name is unique */ - /* mktemp replaces the trailing XXXXXX with a unique string of characters */ -} - -#endif /* NO_MKTEMP */ - - -/* - * Memory allocation and freeing are controlled by the regular library - * routines malloc() and free(). - */ - -GLOBAL(void *) -jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject) -{ - return (void *) malloc(sizeofobject); -} - -GLOBAL(void) -jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) -{ - free(object); -} - - -/* - * "Large" objects are treated the same as "small" ones. - * NB: although we include FAR keywords in the routine declarations, - * this file won't actually work in 80x86 small/medium model; at least, - * you probably won't be able to process useful-size images in only 64KB. - */ - -GLOBAL(void FAR *) -jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) -{ - return (void FAR *) malloc(sizeofobject); -} - -GLOBAL(void) -jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject) -{ - free(object); -} - - -/* - * This routine computes the total memory space available for allocation. - * It's impossible to do this in a portable way; our current solution is - * to make the user tell us (with a default value set at compile time). - * If you can actually get the available space, it's a good idea to subtract - * a slop factor of 5% or so. - */ - -#ifndef DEFAULT_MAX_MEM /* so can override from makefile */ -#define DEFAULT_MAX_MEM 1000000L /* default: one megabyte */ -#endif - -GLOBAL(long) -jpeg_mem_available (j_common_ptr cinfo, long min_bytes_needed, - long max_bytes_needed, long already_allocated) -{ - return cinfo->mem->max_memory_to_use - already_allocated; -} - - -/* - * Backing store (temporary file) management. - * Backing store objects are only used when the value returned by - * jpeg_mem_available is less than the total space needed. You can dispense - * with these routines if you have plenty of virtual memory; see jmemnobs.c. - */ - - -METHODDEF(void) -read_backing_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - if (fseek(info->temp_file, file_offset, SEEK_SET)) - ERREXIT(cinfo, JERR_TFILE_SEEK); - if (JFREAD(info->temp_file, buffer_address, byte_count) - != (size_t) byte_count) - ERREXIT(cinfo, JERR_TFILE_READ); -} - - -METHODDEF(void) -write_backing_store (j_common_ptr cinfo, backing_store_ptr info, - void FAR * buffer_address, - long file_offset, long byte_count) -{ - if (fseek(info->temp_file, file_offset, SEEK_SET)) - ERREXIT(cinfo, JERR_TFILE_SEEK); - if (JFWRITE(info->temp_file, buffer_address, byte_count) - != (size_t) byte_count) - ERREXIT(cinfo, JERR_TFILE_WRITE); -} - - -METHODDEF(void) -close_backing_store (j_common_ptr cinfo, backing_store_ptr info) -{ - fclose(info->temp_file); /* close the file */ - unlink(info->temp_name); /* delete the file */ -/* If your system doesn't have unlink(), use remove() instead. - * remove() is the ANSI-standard name for this function, but if - * your system was ANSI you'd be using jmemansi.c, right? - */ - TRACEMSS(cinfo, 1, JTRC_TFILE_CLOSE, info->temp_name); -} - - -/* - * Initial opening of a backing-store object. - */ - -GLOBAL(void) -jpeg_open_backing_store (j_common_ptr cinfo, backing_store_ptr info, - long total_bytes_needed) -{ - select_file_name(info->temp_name); - if ((info->temp_file = fopen(info->temp_name, RW_BINARY)) == NULL) - ERREXITS(cinfo, JERR_TFILE_CREATE, info->temp_name); - info->read_backing_store = read_backing_store; - info->write_backing_store = write_backing_store; - info->close_backing_store = close_backing_store; - TRACEMSS(cinfo, 1, JTRC_TFILE_OPEN, info->temp_name); -} - - -/* - * These routines take care of any system-dependent initialization and - * cleanup required. - */ - -GLOBAL(long) -jpeg_mem_init (j_common_ptr cinfo) -{ - next_file_num = 0; /* initialize temp file name generator */ - return DEFAULT_MAX_MEM; /* default for max_memory_to_use */ -} - -GLOBAL(void) -jpeg_mem_term (j_common_ptr cinfo) -{ - /* no work */ -} diff --git a/code/jpeg-6b/jpegtran.c b/code/jpeg-6b/jpegtran.c deleted file mode 100644 index 20ef111b..00000000 --- a/code/jpeg-6b/jpegtran.c +++ /dev/null @@ -1,504 +0,0 @@ -/* - * jpegtran.c - * - * Copyright (C) 1995-1997, Thomas G. Lane. - * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. - * - * This file contains a command-line user interface for JPEG transcoding. - * It is very similar to cjpeg.c, but provides lossless transcoding between - * different JPEG file formats. It also provides some lossless and sort-of- - * lossless transformations of JPEG data. - */ - -#include "cdjpeg.h" /* Common decls for cjpeg/djpeg applications */ -#include "transupp.h" /* Support routines for jpegtran */ -#include "jversion.h" /* for version message */ - -#ifdef USE_CCOMMAND /* command-line reader for Macintosh */ -#ifdef __MWERKS__ -#include /* Metrowerks needs this */ -#include /* ... and this */ -#endif -#ifdef THINK_C -#include /* Think declares it here */ -#endif -#endif - - -/* - * Argument-parsing code. - * The switch parser is designed to be useful with DOS-style command line - * syntax, ie, intermixed switches and file names, where only the switches - * to the left of a given file name affect processing of that file. - * The main program in this file doesn't actually use this capability... - */ - - -static const char * progname; /* program name for error messages */ -static char * outfilename; /* for -outfile switch */ -static JCOPY_OPTION copyoption; /* -copy switch */ -static jpeg_transform_info transformoption; /* image transformation options */ - - -LOCAL(void) -usage (void) -/* complain about bad command line */ -{ - fprintf(stderr, "usage: %s [switches] ", progname); -#ifdef TWO_FILE_COMMANDLINE - fprintf(stderr, "inputfile outputfile\n"); -#else - fprintf(stderr, "[inputfile]\n"); -#endif - - fprintf(stderr, "Switches (names may be abbreviated):\n"); - fprintf(stderr, " -copy none Copy no extra markers from source file\n"); - fprintf(stderr, " -copy comments Copy only comment markers (default)\n"); - fprintf(stderr, " -copy all Copy all extra markers\n"); -#ifdef ENTROPY_OPT_SUPPORTED - fprintf(stderr, " -optimize Optimize Huffman table (smaller file, but slow compression)\n"); -#endif -#ifdef C_PROGRESSIVE_SUPPORTED - fprintf(stderr, " -progressive Create progressive JPEG file\n"); -#endif -#if TRANSFORMS_SUPPORTED - fprintf(stderr, "Switches for modifying the image:\n"); - fprintf(stderr, " -grayscale Reduce to grayscale (omit color data)\n"); - fprintf(stderr, " -flip [horizontal|vertical] Mirror image (left-right or top-bottom)\n"); - fprintf(stderr, " -rotate [90|180|270] Rotate image (degrees clockwise)\n"); - fprintf(stderr, " -transpose Transpose image\n"); - fprintf(stderr, " -transverse Transverse transpose image\n"); - fprintf(stderr, " -trim Drop non-transformable edge blocks\n"); -#endif /* TRANSFORMS_SUPPORTED */ - fprintf(stderr, "Switches for advanced users:\n"); - fprintf(stderr, " -restart N Set restart interval in rows, or in blocks with B\n"); - fprintf(stderr, " -maxmemory N Maximum memory to use (in kbytes)\n"); - fprintf(stderr, " -outfile name Specify name for output file\n"); - fprintf(stderr, " -verbose or -debug Emit debug output\n"); - fprintf(stderr, "Switches for wizards:\n"); -#ifdef C_ARITH_CODING_SUPPORTED - fprintf(stderr, " -arithmetic Use arithmetic coding\n"); -#endif -#ifdef C_MULTISCAN_FILES_SUPPORTED - fprintf(stderr, " -scans file Create multi-scan JPEG per script file\n"); -#endif - exit(EXIT_FAILURE); -} - - -LOCAL(void) -select_transform (JXFORM_CODE transform) -/* Silly little routine to detect multiple transform options, - * which we can't handle. - */ -{ -#if TRANSFORMS_SUPPORTED - if (transformoption.transform == JXFORM_NONE || - transformoption.transform == transform) { - transformoption.transform = transform; - } else { - fprintf(stderr, "%s: can only do one image transformation at a time\n", - progname); - usage(); - } -#else - fprintf(stderr, "%s: sorry, image transformation was not compiled\n", - progname); - exit(EXIT_FAILURE); -#endif -} - - -LOCAL(int) -parse_switches (j_compress_ptr cinfo, int argc, char **argv, - int last_file_arg_seen, boolean for_real) -/* Parse optional switches. - * Returns argv[] index of first file-name argument (== argc if none). - * Any file names with indexes <= last_file_arg_seen are ignored; - * they have presumably been processed in a previous iteration. - * (Pass 0 for last_file_arg_seen on the first or only iteration.) - * for_real is FALSE on the first (dummy) pass; we may skip any expensive - * processing. - */ -{ - int argn; - char * arg; - boolean simple_progressive; - char * scansarg = NULL; /* saves -scans parm if any */ - - /* Set up default JPEG parameters. */ - simple_progressive = FALSE; - outfilename = NULL; - copyoption = JCOPYOPT_DEFAULT; - transformoption.transform = JXFORM_NONE; - transformoption.trim = FALSE; - transformoption.force_grayscale = FALSE; - cinfo->err->trace_level = 0; - - /* Scan command line options, adjust parameters */ - - for (argn = 1; argn < argc; argn++) { - arg = argv[argn]; - if (*arg != '-') { - /* Not a switch, must be a file name argument */ - if (argn <= last_file_arg_seen) { - outfilename = NULL; /* -outfile applies to just one input file */ - continue; /* ignore this name if previously processed */ - } - break; /* else done parsing switches */ - } - arg++; /* advance past switch marker character */ - - if (keymatch(arg, "arithmetic", 1)) { - /* Use arithmetic coding. */ -#ifdef C_ARITH_CODING_SUPPORTED - cinfo->arith_code = TRUE; -#else - fprintf(stderr, "%s: sorry, arithmetic coding not supported\n", - progname); - exit(EXIT_FAILURE); -#endif - - } else if (keymatch(arg, "copy", 1)) { - /* Select which extra markers to copy. */ - if (++argn >= argc) /* advance to next argument */ - usage(); - if (keymatch(argv[argn], "none", 1)) { - copyoption = JCOPYOPT_NONE; - } else if (keymatch(argv[argn], "comments", 1)) { - copyoption = JCOPYOPT_COMMENTS; - } else if (keymatch(argv[argn], "all", 1)) { - copyoption = JCOPYOPT_ALL; - } else - usage(); - - } else if (keymatch(arg, "debug", 1) || keymatch(arg, "verbose", 1)) { - /* Enable debug printouts. */ - /* On first -d, print version identification */ - static boolean printed_version = FALSE; - - if (! printed_version) { - fprintf(stderr, "Independent JPEG Group's JPEGTRAN, version %s\n%s\n", - JVERSION, JCOPYRIGHT); - printed_version = TRUE; - } - cinfo->err->trace_level++; - - } else if (keymatch(arg, "flip", 1)) { - /* Mirror left-right or top-bottom. */ - if (++argn >= argc) /* advance to next argument */ - usage(); - if (keymatch(argv[argn], "horizontal", 1)) - select_transform(JXFORM_FLIP_H); - else if (keymatch(argv[argn], "vertical", 1)) - select_transform(JXFORM_FLIP_V); - else - usage(); - - } else if (keymatch(arg, "grayscale", 1) || keymatch(arg, "greyscale",1)) { - /* Force to grayscale. */ -#if TRANSFORMS_SUPPORTED - transformoption.force_grayscale = TRUE; -#else - select_transform(JXFORM_NONE); /* force an error */ -#endif - - } else if (keymatch(arg, "maxmemory", 3)) { - /* Maximum memory in Kb (or Mb with 'm'). */ - long lval; - char ch = 'x'; - - if (++argn >= argc) /* advance to next argument */ - usage(); - if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) - usage(); - if (ch == 'm' || ch == 'M') - lval *= 1000L; - cinfo->mem->max_memory_to_use = lval * 1000L; - - } else if (keymatch(arg, "optimize", 1) || keymatch(arg, "optimise", 1)) { - /* Enable entropy parm optimization. */ -#ifdef ENTROPY_OPT_SUPPORTED - cinfo->optimize_coding = TRUE; -#else - fprintf(stderr, "%s: sorry, entropy optimization was not compiled\n", - progname); - exit(EXIT_FAILURE); -#endif - - } else if (keymatch(arg, "outfile", 4)) { - /* Set output file name. */ - if (++argn >= argc) /* advance to next argument */ - usage(); - outfilename = argv[argn]; /* save it away for later use */ - - } else if (keymatch(arg, "progressive", 1)) { - /* Select simple progressive mode. */ -#ifdef C_PROGRESSIVE_SUPPORTED - simple_progressive = TRUE; - /* We must postpone execution until num_components is known. */ -#else - fprintf(stderr, "%s: sorry, progressive output was not compiled\n", - progname); - exit(EXIT_FAILURE); -#endif - - } else if (keymatch(arg, "restart", 1)) { - /* Restart interval in MCU rows (or in MCUs with 'b'). */ - long lval; - char ch = 'x'; - - if (++argn >= argc) /* advance to next argument */ - usage(); - if (sscanf(argv[argn], "%ld%c", &lval, &ch) < 1) - usage(); - if (lval < 0 || lval > 65535L) - usage(); - if (ch == 'b' || ch == 'B') { - cinfo->restart_interval = (unsigned int) lval; - cinfo->restart_in_rows = 0; /* else prior '-restart n' overrides me */ - } else { - cinfo->restart_in_rows = (int) lval; - /* restart_interval will be computed during startup */ - } - - } else if (keymatch(arg, "rotate", 2)) { - /* Rotate 90, 180, or 270 degrees (measured clockwise). */ - if (++argn >= argc) /* advance to next argument */ - usage(); - if (keymatch(argv[argn], "90", 2)) - select_transform(JXFORM_ROT_90); - else if (keymatch(argv[argn], "180", 3)) - select_transform(JXFORM_ROT_180); - else if (keymatch(argv[argn], "270", 3)) - select_transform(JXFORM_ROT_270); - else - usage(); - - } else if (keymatch(arg, "scans", 1)) { - /* Set scan script. */ -#ifdef C_MULTISCAN_FILES_SUPPORTED - if (++argn >= argc) /* advance to next argument */ - usage(); - scansarg = argv[argn]; - /* We must postpone reading the file in case -progressive appears. */ -#else - fprintf(stderr, "%s: sorry, multi-scan output was not compiled\n", - progname); - exit(EXIT_FAILURE); -#endif - - } else if (keymatch(arg, "transpose", 1)) { - /* Transpose (across UL-to-LR axis). */ - select_transform(JXFORM_TRANSPOSE); - - } else if (keymatch(arg, "transverse", 6)) { - /* Transverse transpose (across UR-to-LL axis). */ - select_transform(JXFORM_TRANSVERSE); - - } else if (keymatch(arg, "trim", 3)) { - /* Trim off any partial edge MCUs that the transform can't handle. */ - transformoption.trim = TRUE; - - } else { - usage(); /* bogus switch */ - } - } - - /* Post-switch-scanning cleanup */ - - if (for_real) { - -#ifdef C_PROGRESSIVE_SUPPORTED - if (simple_progressive) /* process -progressive; -scans can override */ - jpeg_simple_progression(cinfo); -#endif - -#ifdef C_MULTISCAN_FILES_SUPPORTED - if (scansarg != NULL) /* process -scans if it was present */ - if (! read_scan_script(cinfo, scansarg)) - usage(); -#endif - } - - return argn; /* return index of next arg (file name) */ -} - - -/* - * The main program. - */ - -int -main (int argc, char **argv) -{ - struct jpeg_decompress_struct srcinfo; - struct jpeg_compress_struct dstinfo; - struct jpeg_error_mgr jsrcerr, jdsterr; -#ifdef PROGRESS_REPORT - struct cdjpeg_progress_mgr progress; -#endif - jvirt_barray_ptr * src_coef_arrays; - jvirt_barray_ptr * dst_coef_arrays; - int file_index; - FILE * input_file; - FILE * output_file; - - /* On Mac, fetch a command line. */ -#ifdef USE_CCOMMAND - argc = ccommand(&argv); -#endif - - progname = argv[0]; - if (progname == NULL || progname[0] == 0) - progname = "jpegtran"; /* in case C library doesn't provide it */ - - /* Initialize the JPEG decompression object with default error handling. */ - srcinfo.err = jpeg_std_error(&jsrcerr); - jpeg_create_decompress(&srcinfo); - /* Initialize the JPEG compression object with default error handling. */ - dstinfo.err = jpeg_std_error(&jdsterr); - jpeg_create_compress(&dstinfo); - - /* Now safe to enable signal catcher. - * Note: we assume only the decompression object will have virtual arrays. - */ -#ifdef NEED_SIGNAL_CATCHER - enable_signal_catcher((j_common_ptr) &srcinfo); -#endif - - /* Scan command line to find file names. - * It is convenient to use just one switch-parsing routine, but the switch - * values read here are mostly ignored; we will rescan the switches after - * opening the input file. Also note that most of the switches affect the - * destination JPEG object, so we parse into that and then copy over what - * needs to affects the source too. - */ - - file_index = parse_switches(&dstinfo, argc, argv, 0, FALSE); - jsrcerr.trace_level = jdsterr.trace_level; - srcinfo.mem->max_memory_to_use = dstinfo.mem->max_memory_to_use; - -#ifdef TWO_FILE_COMMANDLINE - /* Must have either -outfile switch or explicit output file name */ - if (outfilename == NULL) { - if (file_index != argc-2) { - fprintf(stderr, "%s: must name one input and one output file\n", - progname); - usage(); - } - outfilename = argv[file_index+1]; - } else { - if (file_index != argc-1) { - fprintf(stderr, "%s: must name one input and one output file\n", - progname); - usage(); - } - } -#else - /* Unix style: expect zero or one file name */ - if (file_index < argc-1) { - fprintf(stderr, "%s: only one input file\n", progname); - usage(); - } -#endif /* TWO_FILE_COMMANDLINE */ - - /* Open the input file. */ - if (file_index < argc) { - if ((input_file = fopen(argv[file_index], READ_BINARY)) == NULL) { - fprintf(stderr, "%s: can't open %s\n", progname, argv[file_index]); - exit(EXIT_FAILURE); - } - } else { - /* default input file is stdin */ - input_file = read_stdin(); - } - - /* Open the output file. */ - if (outfilename != NULL) { - if ((output_file = fopen(outfilename, WRITE_BINARY)) == NULL) { - fprintf(stderr, "%s: can't open %s\n", progname, outfilename); - exit(EXIT_FAILURE); - } - } else { - /* default output file is stdout */ - output_file = write_stdout(); - } - -#ifdef PROGRESS_REPORT - start_progress_monitor((j_common_ptr) &dstinfo, &progress); -#endif - - /* Specify data source for decompression */ - jpeg_stdio_src(&srcinfo, input_file); - - /* Enable saving of extra markers that we want to copy */ - jcopy_markers_setup(&srcinfo, copyoption); - - /* Read file header */ - (void) jpeg_read_header(&srcinfo, TRUE); - - /* Any space needed by a transform option must be requested before - * jpeg_read_coefficients so that memory allocation will be done right. - */ -#if TRANSFORMS_SUPPORTED - jtransform_request_workspace(&srcinfo, &transformoption); -#endif - - /* Read source file as DCT coefficients */ - src_coef_arrays = jpeg_read_coefficients(&srcinfo); - - /* Initialize destination compression parameters from source values */ - jpeg_copy_critical_parameters(&srcinfo, &dstinfo); - - /* Adjust destination parameters if required by transform options; - * also find out which set of coefficient arrays will hold the output. - */ -#if TRANSFORMS_SUPPORTED - dst_coef_arrays = jtransform_adjust_parameters(&srcinfo, &dstinfo, - src_coef_arrays, - &transformoption); -#else - dst_coef_arrays = src_coef_arrays; -#endif - - /* Adjust default compression parameters by re-parsing the options */ - file_index = parse_switches(&dstinfo, argc, argv, 0, TRUE); - - /* Specify data destination for compression */ - jpeg_stdio_dest(&dstinfo, output_file); - - /* Start compressor (note no image data is actually written here) */ - jpeg_write_coefficients(&dstinfo, dst_coef_arrays); - - /* Copy to the output file any extra markers that we want to preserve */ - jcopy_markers_execute(&srcinfo, &dstinfo, copyoption); - - /* Execute image transformation, if any */ -#if TRANSFORMS_SUPPORTED - jtransform_execute_transformation(&srcinfo, &dstinfo, - src_coef_arrays, - &transformoption); -#endif - - /* Finish compression and release memory */ - jpeg_finish_compress(&dstinfo); - jpeg_destroy_compress(&dstinfo); - (void) jpeg_finish_decompress(&srcinfo); - jpeg_destroy_decompress(&srcinfo); - - /* Close files, if we opened them */ - if (input_file != stdin) - fclose(input_file); - if (output_file != stdout) - fclose(output_file); - -#ifdef PROGRESS_REPORT - end_progress_monitor((j_common_ptr) &dstinfo); -#endif - - /* All done. */ - exit(jsrcerr.num_warnings + jdsterr.num_warnings ?EXIT_WARNING:EXIT_SUCCESS); - return 0; /* suppress no-return-value warnings */ -} diff --git a/code/jpeg-8c/README b/code/jpeg-8c/README new file mode 100644 index 00000000..451265d7 --- /dev/null +++ b/code/jpeg-8c/README @@ -0,0 +1,326 @@ +The Independent JPEG Group's JPEG software +========================================== + +README for release 8c of 16-Jan-2011 +==================================== + +This distribution contains the eighth public release of the Independent JPEG +Group's free JPEG software. You are welcome to redistribute this software and +to use it for any purpose, subject to the conditions under LEGAL ISSUES, below. + +This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone, +Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson, +Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers, +and other members of the Independent JPEG Group. + +IJG is not affiliated with the official ISO JPEG standards committee. + + +DOCUMENTATION ROADMAP +===================== + +This file contains the following sections: + +OVERVIEW General description of JPEG and the IJG software. +LEGAL ISSUES Copyright, lack of warranty, terms of distribution. +REFERENCES Where to learn more about JPEG. +ARCHIVE LOCATIONS Where to find newer versions of this software. +ACKNOWLEDGMENTS Special thanks. +FILE FORMAT WARS Software *not* to get. +TO DO Plans for future IJG releases. + +Other documentation files in the distribution are: + +User documentation: + install.txt How to configure and install the IJG software. + usage.txt Usage instructions for cjpeg, djpeg, jpegtran, + rdjpgcom, and wrjpgcom. + *.1 Unix-style man pages for programs (same info as usage.txt). + wizard.txt Advanced usage instructions for JPEG wizards only. + change.log Version-to-version change highlights. +Programmer and internal documentation: + libjpeg.txt How to use the JPEG library in your own programs. + example.c Sample code for calling the JPEG library. + structure.txt Overview of the JPEG library's internal structure. + filelist.txt Road map of IJG files. + coderules.txt Coding style rules --- please read if you contribute code. + +Please read at least the files install.txt and usage.txt. Some information +can also be found in the JPEG FAQ (Frequently Asked Questions) article. See +ARCHIVE LOCATIONS below to find out where to obtain the FAQ article. + +If you want to understand how the JPEG code works, we suggest reading one or +more of the REFERENCES, then looking at the documentation files (in roughly +the order listed) before diving into the code. + + +OVERVIEW +======== + +This package contains C software to implement JPEG image encoding, decoding, +and transcoding. JPEG (pronounced "jay-peg") is a standardized compression +method for full-color and gray-scale images. + +This software implements JPEG baseline, extended-sequential, and progressive +compression processes. Provision is made for supporting all variants of these +processes, although some uncommon parameter settings aren't implemented yet. +We have made no provision for supporting the hierarchical or lossless +processes defined in the standard. + +We provide a set of library routines for reading and writing JPEG image files, +plus two sample applications "cjpeg" and "djpeg", which use the library to +perform conversion between JPEG and some other popular image file formats. +The library is intended to be reused in other applications. + +In order to support file conversion and viewing software, we have included +considerable functionality beyond the bare JPEG coding/decoding capability; +for example, the color quantization modules are not strictly part of JPEG +decoding, but they are essential for output to colormapped file formats or +colormapped displays. These extra functions can be compiled out of the +library if not required for a particular application. + +We have also included "jpegtran", a utility for lossless transcoding between +different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple +applications for inserting and extracting textual comments in JFIF files. + +The emphasis in designing this software has been on achieving portability and +flexibility, while also making it fast enough to be useful. In particular, +the software is not intended to be read as a tutorial on JPEG. (See the +REFERENCES section for introductory material.) Rather, it is intended to +be reliable, portable, industrial-strength code. We do not claim to have +achieved that goal in every aspect of the software, but we strive for it. + +We welcome the use of this software as a component of commercial products. +No royalty is required, but we do ask for an acknowledgement in product +documentation, as described under LEGAL ISSUES. + + +LEGAL ISSUES +============ + +In plain English: + +1. We don't promise that this software works. (But if you find any bugs, + please let us know!) +2. You can use this software for whatever you want. You don't have to pay us. +3. You may not pretend that you wrote this software. If you use it in a + program, you must acknowledge somewhere in your documentation that + you've used the IJG code. + +In legalese: + +The authors make NO WARRANTY or representation, either express or implied, +with respect to this software, its quality, accuracy, merchantability, or +fitness for a particular purpose. This software is provided "AS IS", and you, +its user, assume the entire risk as to its quality and accuracy. + +This software is copyright (C) 1991-2011, Thomas G. Lane, Guido Vollbeding. +All Rights Reserved except as specified below. + +Permission is hereby granted to use, copy, modify, and distribute this +software (or portions thereof) for any purpose, without fee, subject to these +conditions: +(1) If any part of the source code for this software is distributed, then this +README file must be included, with this copyright and no-warranty notice +unaltered; and any additions, deletions, or changes to the original files +must be clearly indicated in accompanying documentation. +(2) If only executable code is distributed, then the accompanying +documentation must state that "this software is based in part on the work of +the Independent JPEG Group". +(3) Permission for use of this software is granted only if the user accepts +full responsibility for any undesirable consequences; the authors accept +NO LIABILITY for damages of any kind. + +These conditions apply to any software derived from or based on the IJG code, +not just to the unmodified library. If you use our work, you ought to +acknowledge us. + +Permission is NOT granted for the use of any IJG author's name or company name +in advertising or publicity relating to this software or products derived from +it. This software may be referred to only as "the Independent JPEG Group's +software". + +We specifically permit and encourage the use of this software as the basis of +commercial products, provided that all warranty or liability claims are +assumed by the product vendor. + + +ansi2knr.c is included in this distribution by permission of L. Peter Deutsch, +sole proprietor of its copyright holder, Aladdin Enterprises of Menlo Park, CA. +ansi2knr.c is NOT covered by the above copyright and conditions, but instead +by the usual distribution terms of the Free Software Foundation; principally, +that you must include source code if you redistribute it. (See the file +ansi2knr.c for full details.) However, since ansi2knr.c is not needed as part +of any program generated from the IJG code, this does not limit you more than +the foregoing paragraphs do. + +The Unix configuration script "configure" was produced with GNU Autoconf. +It is copyright by the Free Software Foundation but is freely distributable. +The same holds for its supporting scripts (config.guess, config.sub, +ltmain.sh). Another support script, install-sh, is copyright by X Consortium +but is also freely distributable. + +The IJG distribution formerly included code to read and write GIF files. +To avoid entanglement with the Unisys LZW patent, GIF reading support has +been removed altogether, and the GIF writer has been simplified to produce +"uncompressed GIFs". This technique does not use the LZW algorithm; the +resulting GIF files are larger than usual, but are readable by all standard +GIF decoders. + +We are required to state that + "The Graphics Interchange Format(c) is the Copyright property of + CompuServe Incorporated. GIF(sm) is a Service Mark property of + CompuServe Incorporated." + + +REFERENCES +========== + +We recommend reading one or more of these references before trying to +understand the innards of the JPEG software. + +The best short technical introduction to the JPEG compression algorithm is + Wallace, Gregory K. "The JPEG Still Picture Compression Standard", + Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44. +(Adjacent articles in that issue discuss MPEG motion picture compression, +applications of JPEG, and related topics.) If you don't have the CACM issue +handy, a PostScript file containing a revised version of Wallace's article is +available at http://www.ijg.org/files/wallace.ps.gz. The file (actually +a preprint for an article that appeared in IEEE Trans. Consumer Electronics) +omits the sample images that appeared in CACM, but it includes corrections +and some added material. Note: the Wallace article is copyright ACM and IEEE, +and it may not be used for commercial purposes. + +A somewhat less technical, more leisurely introduction to JPEG can be found in +"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by +M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1. This book provides +good explanations and example C code for a multitude of compression methods +including JPEG. It is an excellent source if you are comfortable reading C +code but don't know much about data compression in general. The book's JPEG +sample code is far from industrial-strength, but when you are ready to look +at a full implementation, you've got one here... + +The best currently available description of JPEG is the textbook "JPEG Still +Image Data Compression Standard" by William B. Pennebaker and Joan L. +Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1. +Price US$59.95, 638 pp. The book includes the complete text of the ISO JPEG +standards (DIS 10918-1 and draft DIS 10918-2). +Although this is by far the most detailed and comprehensive exposition of +JPEG publicly available, we point out that it is still missing an explanation +of the most essential properties and algorithms of the underlying DCT +technology. +If you think that you know about DCT-based JPEG after reading this book, +then you are in delusion. The real fundamentals and corresponding potential +of DCT-based JPEG are not publicly known so far, and that is the reason for +all the mistaken developments taking place in the image coding domain. + +The original JPEG standard is divided into two parts, Part 1 being the actual +specification, while Part 2 covers compliance testing methods. Part 1 is +titled "Digital Compression and Coding of Continuous-tone Still Images, +Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS +10918-1, ITU-T T.81. Part 2 is titled "Digital Compression and Coding of +Continuous-tone Still Images, Part 2: Compliance testing" and has document +numbers ISO/IEC IS 10918-2, ITU-T T.83. +IJG JPEG 8 introduces an implementation of the JPEG SmartScale extension +which is specified in a contributed document at ITU and ISO with title "ITU-T +JPEG-Plus Proposal for Extending ITU-T T.81 for Advanced Image Coding", April +2006, Geneva, Switzerland. The latest version of the document is Revision 3. + +The JPEG standard does not specify all details of an interchangeable file +format. For the omitted details we follow the "JFIF" conventions, revision +1.02. JFIF 1.02 has been adopted as an Ecma International Technical Report +and thus received a formal publication status. It is available as a free +download in PDF format from +http://www.ecma-international.org/publications/techreports/E-TR-098.htm. +A PostScript version of the JFIF document is available at +http://www.ijg.org/files/jfif.ps.gz. There is also a plain text version at +http://www.ijg.org/files/jfif.txt.gz, but it is missing the figures. + +The TIFF 6.0 file format specification can be obtained by FTP from +ftp://ftp.sgi.com/graphics/tiff/TIFF6.ps.gz. The JPEG incorporation scheme +found in the TIFF 6.0 spec of 3-June-92 has a number of serious problems. +IJG does not recommend use of the TIFF 6.0 design (TIFF Compression tag 6). +Instead, we recommend the JPEG design proposed by TIFF Technical Note #2 +(Compression tag 7). Copies of this Note can be obtained from +http://www.ijg.org/files/. It is expected that the next revision +of the TIFF spec will replace the 6.0 JPEG design with the Note's design. +Although IJG's own code does not support TIFF/JPEG, the free libtiff library +uses our library to implement TIFF/JPEG per the Note. + + +ARCHIVE LOCATIONS +================= + +The "official" archive site for this software is www.ijg.org. +The most recent released version can always be found there in +directory "files". This particular version will be archived as +http://www.ijg.org/files/jpegsrc.v8c.tar.gz, and in Windows-compatible +"zip" archive format as http://www.ijg.org/files/jpegsr8c.zip. + +The JPEG FAQ (Frequently Asked Questions) article is a source of some +general information about JPEG. +It is available on the World Wide Web at http://www.faqs.org/faqs/jpeg-faq/ +and other news.answers archive sites, including the official news.answers +archive at rtfm.mit.edu: ftp://rtfm.mit.edu/pub/usenet/news.answers/jpeg-faq/. +If you don't have Web or FTP access, send e-mail to mail-server@rtfm.mit.edu +with body + send usenet/news.answers/jpeg-faq/part1 + send usenet/news.answers/jpeg-faq/part2 + + +ACKNOWLEDGMENTS +=============== + +Thank to Juergen Bruder for providing me with a copy of the common DCT +algorithm article, only to find out that I had come to the same result +in a more direct and comprehensible way with a more generative approach. + +Thank to Istvan Sebestyen and Joan L. Mitchell for inviting me to the +ITU JPEG (Study Group 16) meeting in Geneva, Switzerland. + +Thank to Thomas Wiegand and Gary Sullivan for inviting me to the +Joint Video Team (MPEG & ITU) meeting in Geneva, Switzerland. + +Thank to John Korejwa and Massimo Ballerini for inviting me to +fruitful consultations in Boston, MA and Milan, Italy. + +Thank to Hendrik Elstner, Roland Fassauer, Simone Zuck, Guenther +Maier-Gerber, Walter Stoeber, Fred Schmitz, and Norbert Braunagel +for corresponding business development. + +Thank to Nico Zschach and Dirk Stelling of the technical support team +at the Digital Images company in Halle for providing me with extra +equipment for configuration tests. + +Thank to Richard F. Lyon (then of Foveon Inc.) for fruitful +communication about JPEG configuration in Sigma Photo Pro software. + +Thank to Andrew Finkenstadt for hosting the ijg.org site. + +Last but not least special thank to Thomas G. Lane for the original +design and development of this singular software package. + + +FILE FORMAT WARS +================ + +The ISO JPEG standards committee actually promotes different formats like +"JPEG 2000" or "JPEG XR" which are incompatible with original DCT-based +JPEG and which are based on faulty technologies. IJG therefore does not +and will not support such momentary mistakes (see REFERENCES). +We have little or no sympathy for the promotion of these formats. Indeed, +one of the original reasons for developing this free software was to help +force convergence on common, interoperable format standards for JPEG files. +Don't use an incompatible file format! +(In any case, our decoder will remain capable of reading existing JPEG +image files indefinitely.) + + +TO DO +===== + +Version 8 is the first release of a new generation JPEG standard +to overcome the limitations of the original JPEG specification. +More features are being prepared for coming releases... + +Please send bug reports, offers of help, etc. to jpeg-info@uc.ag. diff --git a/code/jpeg-8c/jaricom.c b/code/jpeg-8c/jaricom.c new file mode 100644 index 00000000..f43e2ea7 --- /dev/null +++ b/code/jpeg-8c/jaricom.c @@ -0,0 +1,153 @@ +/* + * jaricom.c + * + * Developed 1997-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains probability estimation tables for common use in + * arithmetic entropy encoding and decoding routines. + * + * This data represents Table D.2 in the JPEG spec (ISO/IEC IS 10918-1 + * and CCITT Recommendation ITU-T T.81) and Table 24 in the JBIG spec + * (ISO/IEC IS 11544 and CCITT Recommendation ITU-T T.82). + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + +/* The following #define specifies the packing of the four components + * into the compact INT32 representation. + * Note that this formula must match the actual arithmetic encoder + * and decoder implementation. The implementation has to be changed + * if this formula is changed. + * The current organization is leaned on Markus Kuhn's JBIG + * implementation (jbig_tab.c). + */ + +#define V(i,a,b,c,d) (((INT32)a << 16) | ((INT32)c << 8) | ((INT32)d << 7) | b) + +const INT32 jpeg_aritab[113+1] = { +/* + * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS + */ + V( 0, 0x5a1d, 1, 1, 1 ), + V( 1, 0x2586, 14, 2, 0 ), + V( 2, 0x1114, 16, 3, 0 ), + V( 3, 0x080b, 18, 4, 0 ), + V( 4, 0x03d8, 20, 5, 0 ), + V( 5, 0x01da, 23, 6, 0 ), + V( 6, 0x00e5, 25, 7, 0 ), + V( 7, 0x006f, 28, 8, 0 ), + V( 8, 0x0036, 30, 9, 0 ), + V( 9, 0x001a, 33, 10, 0 ), + V( 10, 0x000d, 35, 11, 0 ), + V( 11, 0x0006, 9, 12, 0 ), + V( 12, 0x0003, 10, 13, 0 ), + V( 13, 0x0001, 12, 13, 0 ), + V( 14, 0x5a7f, 15, 15, 1 ), + V( 15, 0x3f25, 36, 16, 0 ), + V( 16, 0x2cf2, 38, 17, 0 ), + V( 17, 0x207c, 39, 18, 0 ), + V( 18, 0x17b9, 40, 19, 0 ), + V( 19, 0x1182, 42, 20, 0 ), + V( 20, 0x0cef, 43, 21, 0 ), + V( 21, 0x09a1, 45, 22, 0 ), + V( 22, 0x072f, 46, 23, 0 ), + V( 23, 0x055c, 48, 24, 0 ), + V( 24, 0x0406, 49, 25, 0 ), + V( 25, 0x0303, 51, 26, 0 ), + V( 26, 0x0240, 52, 27, 0 ), + V( 27, 0x01b1, 54, 28, 0 ), + V( 28, 0x0144, 56, 29, 0 ), + V( 29, 0x00f5, 57, 30, 0 ), + V( 30, 0x00b7, 59, 31, 0 ), + V( 31, 0x008a, 60, 32, 0 ), + V( 32, 0x0068, 62, 33, 0 ), + V( 33, 0x004e, 63, 34, 0 ), + V( 34, 0x003b, 32, 35, 0 ), + V( 35, 0x002c, 33, 9, 0 ), + V( 36, 0x5ae1, 37, 37, 1 ), + V( 37, 0x484c, 64, 38, 0 ), + V( 38, 0x3a0d, 65, 39, 0 ), + V( 39, 0x2ef1, 67, 40, 0 ), + V( 40, 0x261f, 68, 41, 0 ), + V( 41, 0x1f33, 69, 42, 0 ), + V( 42, 0x19a8, 70, 43, 0 ), + V( 43, 0x1518, 72, 44, 0 ), + V( 44, 0x1177, 73, 45, 0 ), + V( 45, 0x0e74, 74, 46, 0 ), + V( 46, 0x0bfb, 75, 47, 0 ), + V( 47, 0x09f8, 77, 48, 0 ), + V( 48, 0x0861, 78, 49, 0 ), + V( 49, 0x0706, 79, 50, 0 ), + V( 50, 0x05cd, 48, 51, 0 ), + V( 51, 0x04de, 50, 52, 0 ), + V( 52, 0x040f, 50, 53, 0 ), + V( 53, 0x0363, 51, 54, 0 ), + V( 54, 0x02d4, 52, 55, 0 ), + V( 55, 0x025c, 53, 56, 0 ), + V( 56, 0x01f8, 54, 57, 0 ), + V( 57, 0x01a4, 55, 58, 0 ), + V( 58, 0x0160, 56, 59, 0 ), + V( 59, 0x0125, 57, 60, 0 ), + V( 60, 0x00f6, 58, 61, 0 ), + V( 61, 0x00cb, 59, 62, 0 ), + V( 62, 0x00ab, 61, 63, 0 ), + V( 63, 0x008f, 61, 32, 0 ), + V( 64, 0x5b12, 65, 65, 1 ), + V( 65, 0x4d04, 80, 66, 0 ), + V( 66, 0x412c, 81, 67, 0 ), + V( 67, 0x37d8, 82, 68, 0 ), + V( 68, 0x2fe8, 83, 69, 0 ), + V( 69, 0x293c, 84, 70, 0 ), + V( 70, 0x2379, 86, 71, 0 ), + V( 71, 0x1edf, 87, 72, 0 ), + V( 72, 0x1aa9, 87, 73, 0 ), + V( 73, 0x174e, 72, 74, 0 ), + V( 74, 0x1424, 72, 75, 0 ), + V( 75, 0x119c, 74, 76, 0 ), + V( 76, 0x0f6b, 74, 77, 0 ), + V( 77, 0x0d51, 75, 78, 0 ), + V( 78, 0x0bb6, 77, 79, 0 ), + V( 79, 0x0a40, 77, 48, 0 ), + V( 80, 0x5832, 80, 81, 1 ), + V( 81, 0x4d1c, 88, 82, 0 ), + V( 82, 0x438e, 89, 83, 0 ), + V( 83, 0x3bdd, 90, 84, 0 ), + V( 84, 0x34ee, 91, 85, 0 ), + V( 85, 0x2eae, 92, 86, 0 ), + V( 86, 0x299a, 93, 87, 0 ), + V( 87, 0x2516, 86, 71, 0 ), + V( 88, 0x5570, 88, 89, 1 ), + V( 89, 0x4ca9, 95, 90, 0 ), + V( 90, 0x44d9, 96, 91, 0 ), + V( 91, 0x3e22, 97, 92, 0 ), + V( 92, 0x3824, 99, 93, 0 ), + V( 93, 0x32b4, 99, 94, 0 ), + V( 94, 0x2e17, 93, 86, 0 ), + V( 95, 0x56a8, 95, 96, 1 ), + V( 96, 0x4f46, 101, 97, 0 ), + V( 97, 0x47e5, 102, 98, 0 ), + V( 98, 0x41cf, 103, 99, 0 ), + V( 99, 0x3c3d, 104, 100, 0 ), + V( 100, 0x375e, 99, 93, 0 ), + V( 101, 0x5231, 105, 102, 0 ), + V( 102, 0x4c0f, 106, 103, 0 ), + V( 103, 0x4639, 107, 104, 0 ), + V( 104, 0x415e, 103, 99, 0 ), + V( 105, 0x5627, 105, 106, 1 ), + V( 106, 0x50e7, 108, 107, 0 ), + V( 107, 0x4b85, 109, 103, 0 ), + V( 108, 0x5597, 110, 109, 0 ), + V( 109, 0x504f, 111, 107, 0 ), + V( 110, 0x5a10, 110, 111, 1 ), + V( 111, 0x5522, 112, 109, 0 ), + V( 112, 0x59eb, 112, 111, 1 ), +/* + * This last entry is used for fixed probability estimate of 0.5 + * as recommended in Section 10.3 Table 5 of ITU-T Rec. T.851. + */ + V( 113, 0x5a1d, 113, 113, 0 ) +}; diff --git a/code/jpeg-6b/jcapimin.c b/code/jpeg-8c/jcapimin.c similarity index 96% rename from code/jpeg-6b/jcapimin.c rename to code/jpeg-8c/jcapimin.c index 54fb8c58..639ce86f 100644 --- a/code/jpeg-6b/jcapimin.c +++ b/code/jpeg-8c/jcapimin.c @@ -2,6 +2,7 @@ * jcapimin.c * * Copyright (C) 1994-1998, Thomas G. Lane. + * Modified 2003-2010 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -63,14 +64,21 @@ jpeg_CreateCompress (j_compress_ptr cinfo, int version, size_t structsize) cinfo->comp_info = NULL; - for (i = 0; i < NUM_QUANT_TBLS; i++) + for (i = 0; i < NUM_QUANT_TBLS; i++) { cinfo->quant_tbl_ptrs[i] = NULL; + cinfo->q_scale_factor[i] = 100; + } for (i = 0; i < NUM_HUFF_TBLS; i++) { cinfo->dc_huff_tbl_ptrs[i] = NULL; cinfo->ac_huff_tbl_ptrs[i] = NULL; } + /* Must do it here for emit_dqt in case jpeg_write_tables is used */ + cinfo->block_size = DCTSIZE; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + cinfo->script_space = NULL; cinfo->input_gamma = 1.0; /* in case application forgets */ diff --git a/code/jpeg-6b/jcapistd.c b/code/jpeg-8c/jcapistd.c similarity index 100% rename from code/jpeg-6b/jcapistd.c rename to code/jpeg-8c/jcapistd.c diff --git a/code/jpeg-8c/jcarith.c b/code/jpeg-8c/jcarith.c new file mode 100644 index 00000000..0b7ea55d --- /dev/null +++ b/code/jpeg-8c/jcarith.c @@ -0,0 +1,934 @@ +/* + * jcarith.c + * + * Developed 1997-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains portable arithmetic entropy encoding routines for JPEG + * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). + * + * Both sequential and progressive modes are supported in this single module. + * + * Suspension is not currently supported in this module. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + + +/* Expanded entropy encoder object for arithmetic encoding. */ + +typedef struct { + struct jpeg_entropy_encoder pub; /* public fields */ + + INT32 c; /* C register, base of coding interval, layout as in sec. D.1.3 */ + INT32 a; /* A register, normalized size of coding interval */ + INT32 sc; /* counter for stacked 0xFF values which might overflow */ + INT32 zc; /* counter for pending 0x00 output values which might * + * be discarded at the end ("Pacman" termination) */ + int ct; /* bit shift counter, determines when next byte will be written */ + int buffer; /* buffer for most recent output byte != 0xFF */ + + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ + int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */ + + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ + + /* Pointers to statistics areas (these workspaces have image lifespan) */ + unsigned char * dc_stats[NUM_ARITH_TBLS]; + unsigned char * ac_stats[NUM_ARITH_TBLS]; + + /* Statistics bin for coding with fixed probability 0.5 */ + unsigned char fixed_bin[4]; +} arith_entropy_encoder; + +typedef arith_entropy_encoder * arith_entropy_ptr; + +/* The following two definitions specify the allocation chunk size + * for the statistics area. + * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least + * 49 statistics bins for DC, and 245 statistics bins for AC coding. + * + * We use a compact representation with 1 byte per statistics bin, + * thus the numbers directly represent byte sizes. + * This 1 byte per statistics bin contains the meaning of the MPS + * (more probable symbol) in the highest bit (mask 0x80), and the + * index into the probability estimation state machine table + * in the lower bits (mask 0x7F). + */ + +#define DC_STAT_BINS 64 +#define AC_STAT_BINS 256 + +/* NOTE: Uncomment the following #define if you want to use the + * given formula for calculating the AC conditioning parameter Kx + * for spectral selection progressive coding in section G.1.3.2 + * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4). + * Although the spec and P&M authors claim that this "has proven + * to give good results for 8 bit precision samples", I'm not + * convinced yet that this is really beneficial. + * Early tests gave only very marginal compression enhancements + * (a few - around 5 or so - bytes even for very large files), + * which would turn out rather negative if we'd suppress the + * DAC (Define Arithmetic Conditioning) marker segments for + * the default parameters in the future. + * Note that currently the marker writing module emits 12-byte + * DAC segments for a full-component scan in a color image. + * This is not worth worrying about IMHO. However, since the + * spec defines the default values to be used if the tables + * are omitted (unlike Huffman tables, which are required + * anyway), one might optimize this behaviour in the future, + * and then it would be disadvantageous to use custom tables if + * they don't provide sufficient gain to exceed the DAC size. + * + * On the other hand, I'd consider it as a reasonable result + * that the conditioning has no significant influence on the + * compression performance. This means that the basic + * statistical model is already rather stable. + * + * Thus, at the moment, we use the default conditioning values + * anyway, and do not use the custom formula. + * +#define CALCULATE_SPECTRAL_CONDITIONING + */ + +/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32. + * We assume that int right shift is unsigned if INT32 right shift is, + * which should be safe. + */ + +#ifdef RIGHT_SHIFT_IS_UNSIGNED +#define ISHIFT_TEMPS int ishift_temp; +#define IRIGHT_SHIFT(x,shft) \ + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ + (ishift_temp >> (shft))) +#else +#define ISHIFT_TEMPS +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#endif + + +LOCAL(void) +emit_byte (int val, j_compress_ptr cinfo) +/* Write next output byte; we do not support suspension in this module. */ +{ + struct jpeg_destination_mgr * dest = cinfo->dest; + + *dest->next_output_byte++ = (JOCTET) val; + if (--dest->free_in_buffer == 0) + if (! (*dest->empty_output_buffer) (cinfo)) + ERREXIT(cinfo, JERR_CANT_SUSPEND); +} + + +/* + * Finish up at the end of an arithmetic-compressed scan. + */ + +METHODDEF(void) +finish_pass (j_compress_ptr cinfo) +{ + arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; + INT32 temp; + + /* Section D.1.8: Termination of encoding */ + + /* Find the e->c in the coding interval with the largest + * number of trailing zero bits */ + if ((temp = (e->a - 1 + e->c) & 0xFFFF0000L) < e->c) + e->c = temp + 0x8000L; + else + e->c = temp; + /* Send remaining bytes to output */ + e->c <<= e->ct; + if (e->c & 0xF8000000L) { + /* One final overflow has to be handled */ + if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer + 1, cinfo); + if (e->buffer + 1 == 0xFF) + emit_byte(0x00, cinfo); + } + e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ + e->sc = 0; + } else { + if (e->buffer == 0) + ++e->zc; + else if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer, cinfo); + } + if (e->sc) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + do { + emit_byte(0xFF, cinfo); + emit_byte(0x00, cinfo); + } while (--e->sc); + } + } + /* Output final bytes only if they are not 0x00 */ + if (e->c & 0x7FFF800L) { + if (e->zc) /* output final pending zero bytes */ + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte((e->c >> 19) & 0xFF, cinfo); + if (((e->c >> 19) & 0xFF) == 0xFF) + emit_byte(0x00, cinfo); + if (e->c & 0x7F800L) { + emit_byte((e->c >> 11) & 0xFF, cinfo); + if (((e->c >> 11) & 0xFF) == 0xFF) + emit_byte(0x00, cinfo); + } + } +} + + +/* + * The core arithmetic encoding routine (common in JPEG and JBIG). + * This needs to go as fast as possible. + * Machine-dependent optimization facilities + * are not utilized in this portable implementation. + * However, this code should be fairly efficient and + * may be a good base for further optimizations anyway. + * + * Parameter 'val' to be encoded may be 0 or 1 (binary decision). + * + * Note: I've added full "Pacman" termination support to the + * byte output routines, which is equivalent to the optional + * Discard_final_zeros procedure (Figure D.15) in the spec. + * Thus, we always produce the shortest possible output + * stream compliant to the spec (no trailing zero bytes, + * except for FF stuffing). + * + * I've also introduced a new scheme for accessing + * the probability estimation state machine table, + * derived from Markus Kuhn's JBIG implementation. + */ + +LOCAL(void) +arith_encode (j_compress_ptr cinfo, unsigned char *st, int val) +{ + register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; + register unsigned char nl, nm; + register INT32 qe, temp; + register int sv; + + /* Fetch values from our compact representation of Table D.2: + * Qe values and probability estimation state machine + */ + sv = *st; + qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + + /* Encode & estimation procedures per sections D.1.4 & D.1.5 */ + e->a -= qe; + if (val != (sv >> 7)) { + /* Encode the less probable symbol */ + if (e->a >= qe) { + /* If the interval size (qe) for the less probable symbol (LPS) + * is larger than the interval size for the MPS, then exchange + * the two symbols for coding efficiency, otherwise code the LPS + * as usual: */ + e->c += e->a; + e->a = qe; + } + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + } else { + /* Encode the more probable symbol */ + if (e->a >= 0x8000L) + return; /* A >= 0x8000 -> ready, no renormalization required */ + if (e->a < qe) { + /* If the interval size (qe) for the less probable symbol (LPS) + * is larger than the interval size for the MPS, then exchange + * the two symbols for coding efficiency: */ + e->c += e->a; + e->a = qe; + } + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + } + + /* Renormalization & data output per section D.1.6 */ + do { + e->a <<= 1; + e->c <<= 1; + if (--e->ct == 0) { + /* Another byte is ready for output */ + temp = e->c >> 19; + if (temp > 0xFF) { + /* Handle overflow over all stacked 0xFF bytes */ + if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer + 1, cinfo); + if (e->buffer + 1 == 0xFF) + emit_byte(0x00, cinfo); + } + e->zc += e->sc; /* carry-over converts stacked 0xFF bytes to 0x00 */ + e->sc = 0; + /* Note: The 3 spacer bits in the C register guarantee + * that the new buffer byte can't be 0xFF here + * (see page 160 in the P&M JPEG book). */ + e->buffer = temp & 0xFF; /* new output byte, might overflow later */ + } else if (temp == 0xFF) { + ++e->sc; /* stack 0xFF byte (which might overflow later) */ + } else { + /* Output all stacked 0xFF bytes, they will not overflow any more */ + if (e->buffer == 0) + ++e->zc; + else if (e->buffer >= 0) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + emit_byte(e->buffer, cinfo); + } + if (e->sc) { + if (e->zc) + do emit_byte(0x00, cinfo); + while (--e->zc); + do { + emit_byte(0xFF, cinfo); + emit_byte(0x00, cinfo); + } while (--e->sc); + } + e->buffer = temp & 0xFF; /* new output byte (can still overflow) */ + } + e->c &= 0x7FFFFL; + e->ct += 8; + } + } while (e->a < 0x8000L); +} + + +/* + * Emit a restart marker & resynchronize predictions. + */ + +LOCAL(void) +emit_restart (j_compress_ptr cinfo, int restart_num) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + int ci; + jpeg_component_info * compptr; + + finish_pass(cinfo); + + emit_byte(0xFF, cinfo); + emit_byte(JPEG_RST0 + restart_num, cinfo); + + /* Re-initialize statistics areas */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) { + MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS); + /* Reset DC predictions to 0 */ + entropy->last_dc_val[ci] = 0; + entropy->dc_context[ci] = 0; + } + /* AC needs no table when not present */ + if (cinfo->Se) { + MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS); + } + } + + /* Reset arithmetic encoding variables */ + entropy->c = 0; + entropy->a = 0x10000L; + entropy->sc = 0; + entropy->zc = 0; + entropy->ct = 11; + entropy->buffer = -1; /* empty */ +} + + +/* + * MCU encoding for DC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + JBLOCKROW block; + unsigned char *st; + int blkn, ci, tbl; + int v, v2, m; + ISHIFT_TEMPS + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + emit_restart(cinfo, entropy->next_restart_num); + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + ci = cinfo->MCU_membership[blkn]; + tbl = cinfo->cur_comp_info[ci]->dc_tbl_no; + + /* Compute the DC value after the required point transform by Al. + * This is simply an arithmetic right shift. + */ + m = IRIGHT_SHIFT((int) ((*block)[0]), cinfo->Al); + + /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */ + + /* Table F.4: Point to statistics bin S0 for DC coefficient coding */ + st = entropy->dc_stats[tbl] + entropy->dc_context[ci]; + + /* Figure F.4: Encode_DC_DIFF */ + if ((v = m - entropy->last_dc_val[ci]) == 0) { + arith_encode(cinfo, st, 0); + entropy->dc_context[ci] = 0; /* zero diff category */ + } else { + entropy->last_dc_val[ci] = m; + arith_encode(cinfo, st, 1); + /* Figure F.6: Encoding nonzero value v */ + /* Figure F.7: Encoding the sign of v */ + if (v > 0) { + arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ + st += 2; /* Table F.4: SP = S0 + 2 */ + entropy->dc_context[ci] = 4; /* small positive diff category */ + } else { + v = -v; + arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ + st += 3; /* Table F.4: SN = S0 + 3 */ + entropy->dc_context[ci] = 8; /* small negative diff category */ + } + /* Figure F.8: Encoding the magnitude category of v */ + m = 0; + if (v -= 1) { + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } + } + arith_encode(cinfo, st, 0); + /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ + if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + entropy->dc_context[ci] = 0; /* zero diff category */ + else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + entropy->dc_context[ci] += 8; /* large diff category */ + /* Figure F.9: Encoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + arith_encode(cinfo, st, (m & v) ? 1 : 0); + } + } + + return TRUE; +} + + +/* + * MCU encoding for AC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + JBLOCKROW block; + unsigned char *st; + int tbl, k, ke; + int v, v2, m; + const int * natural_order; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + emit_restart(cinfo, entropy->next_restart_num); + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + natural_order = cinfo->natural_order; + + /* Encode the MCU data block */ + block = MCU_data[0]; + tbl = cinfo->cur_comp_info[0]->ac_tbl_no; + + /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */ + + /* Establish EOB (end-of-block) index */ + for (ke = cinfo->Se; ke > 0; ke--) + /* We must apply the point transform by Al. For AC coefficients this + * is an integer division with rounding towards 0. To do this portably + * in C, we shift after obtaining the absolute value. + */ + if ((v = (*block)[natural_order[ke]]) >= 0) { + if (v >>= cinfo->Al) break; + } else { + v = -v; + if (v >>= cinfo->Al) break; + } + + /* Figure F.5: Encode_AC_Coefficients */ + for (k = cinfo->Ss; k <= ke; k++) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + arith_encode(cinfo, st, 0); /* EOB decision */ + for (;;) { + if ((v = (*block)[natural_order[k]]) >= 0) { + if (v >>= cinfo->Al) { + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 0); + break; + } + } else { + v = -v; + if (v >>= cinfo->Al) { + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 1); + break; + } + } + arith_encode(cinfo, st + 1, 0); st += 3; k++; + } + st += 2; + /* Figure F.8: Encoding the magnitude category of v */ + m = 0; + if (v -= 1) { + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + if (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } + } + } + arith_encode(cinfo, st, 0); + /* Figure F.9: Encoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + arith_encode(cinfo, st, (m & v) ? 1 : 0); + } + /* Encode EOB decision only if k <= cinfo->Se */ + if (k <= cinfo->Se) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + arith_encode(cinfo, st, 1); + } + + return TRUE; +} + + +/* + * MCU encoding for DC successive approximation refinement scan. + */ + +METHODDEF(boolean) +encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + unsigned char *st; + int Al, blkn; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + emit_restart(cinfo, entropy->next_restart_num); + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + st = entropy->fixed_bin; /* use fixed probability estimation */ + Al = cinfo->Al; + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + /* We simply emit the Al'th bit of the DC coefficient value. */ + arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1); + } + + return TRUE; +} + + +/* + * MCU encoding for AC successive approximation refinement scan. + */ + +METHODDEF(boolean) +encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + JBLOCKROW block; + unsigned char *st; + int tbl, k, ke, kex; + int v; + const int * natural_order; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + emit_restart(cinfo, entropy->next_restart_num); + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + natural_order = cinfo->natural_order; + + /* Encode the MCU data block */ + block = MCU_data[0]; + tbl = cinfo->cur_comp_info[0]->ac_tbl_no; + + /* Section G.1.3.3: Encoding of AC coefficients */ + + /* Establish EOB (end-of-block) index */ + for (ke = cinfo->Se; ke > 0; ke--) + /* We must apply the point transform by Al. For AC coefficients this + * is an integer division with rounding towards 0. To do this portably + * in C, we shift after obtaining the absolute value. + */ + if ((v = (*block)[natural_order[ke]]) >= 0) { + if (v >>= cinfo->Al) break; + } else { + v = -v; + if (v >>= cinfo->Al) break; + } + + /* Establish EOBx (previous stage end-of-block) index */ + for (kex = ke; kex > 0; kex--) + if ((v = (*block)[natural_order[kex]]) >= 0) { + if (v >>= cinfo->Ah) break; + } else { + v = -v; + if (v >>= cinfo->Ah) break; + } + + /* Figure G.10: Encode_AC_Coefficients_SA */ + for (k = cinfo->Ss; k <= ke; k++) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + if (k > kex) + arith_encode(cinfo, st, 0); /* EOB decision */ + for (;;) { + if ((v = (*block)[natural_order[k]]) >= 0) { + if (v >>= cinfo->Al) { + if (v >> 1) /* previously nonzero coef */ + arith_encode(cinfo, st + 2, (v & 1)); + else { /* newly nonzero coef */ + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 0); + } + break; + } + } else { + v = -v; + if (v >>= cinfo->Al) { + if (v >> 1) /* previously nonzero coef */ + arith_encode(cinfo, st + 2, (v & 1)); + else { /* newly nonzero coef */ + arith_encode(cinfo, st + 1, 1); + arith_encode(cinfo, entropy->fixed_bin, 1); + } + break; + } + } + arith_encode(cinfo, st + 1, 0); st += 3; k++; + } + } + /* Encode EOB decision only if k <= cinfo->Se */ + if (k <= cinfo->Se) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + arith_encode(cinfo, st, 1); + } + + return TRUE; +} + + +/* + * Encode and output one MCU's worth of arithmetic-compressed coefficients. + */ + +METHODDEF(boolean) +encode_mcu (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + jpeg_component_info * compptr; + JBLOCKROW block; + unsigned char *st; + int blkn, ci, tbl, k, ke; + int v, v2, m; + const int * natural_order; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + emit_restart(cinfo, entropy->next_restart_num); + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + natural_order = cinfo->natural_order; + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + + /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */ + + tbl = compptr->dc_tbl_no; + + /* Table F.4: Point to statistics bin S0 for DC coefficient coding */ + st = entropy->dc_stats[tbl] + entropy->dc_context[ci]; + + /* Figure F.4: Encode_DC_DIFF */ + if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) { + arith_encode(cinfo, st, 0); + entropy->dc_context[ci] = 0; /* zero diff category */ + } else { + entropy->last_dc_val[ci] = (*block)[0]; + arith_encode(cinfo, st, 1); + /* Figure F.6: Encoding nonzero value v */ + /* Figure F.7: Encoding the sign of v */ + if (v > 0) { + arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */ + st += 2; /* Table F.4: SP = S0 + 2 */ + entropy->dc_context[ci] = 4; /* small positive diff category */ + } else { + v = -v; + arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */ + st += 3; /* Table F.4: SN = S0 + 3 */ + entropy->dc_context[ci] = 8; /* small negative diff category */ + } + /* Figure F.8: Encoding the magnitude category of v */ + m = 0; + if (v -= 1) { + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } + } + arith_encode(cinfo, st, 0); + /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ + if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + entropy->dc_context[ci] = 0; /* zero diff category */ + else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + entropy->dc_context[ci] += 8; /* large diff category */ + /* Figure F.9: Encoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + arith_encode(cinfo, st, (m & v) ? 1 : 0); + } + + /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */ + + tbl = compptr->ac_tbl_no; + + /* Establish EOB (end-of-block) index */ + for (ke = cinfo->lim_Se; ke > 0; ke--) + if ((*block)[natural_order[ke]]) break; + + /* Figure F.5: Encode_AC_Coefficients */ + for (k = 1; k <= ke; k++) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + arith_encode(cinfo, st, 0); /* EOB decision */ + while ((v = (*block)[natural_order[k]]) == 0) { + arith_encode(cinfo, st + 1, 0); st += 3; k++; + } + arith_encode(cinfo, st + 1, 1); + /* Figure F.6: Encoding nonzero value v */ + /* Figure F.7: Encoding the sign of v */ + if (v > 0) { + arith_encode(cinfo, entropy->fixed_bin, 0); + } else { + v = -v; + arith_encode(cinfo, entropy->fixed_bin, 1); + } + st += 2; + /* Figure F.8: Encoding the magnitude category of v */ + m = 0; + if (v -= 1) { + arith_encode(cinfo, st, 1); + m = 1; + v2 = v; + if (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (v2 >>= 1) { + arith_encode(cinfo, st, 1); + m <<= 1; + st += 1; + } + } + } + arith_encode(cinfo, st, 0); + /* Figure F.9: Encoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + arith_encode(cinfo, st, (m & v) ? 1 : 0); + } + /* Encode EOB decision only if k <= cinfo->lim_Se */ + if (k <= cinfo->lim_Se) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + arith_encode(cinfo, st, 1); + } + } + + return TRUE; +} + + +/* + * Initialize for an arithmetic-compressed scan. + */ + +METHODDEF(void) +start_pass (j_compress_ptr cinfo, boolean gather_statistics) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + int ci, tbl; + jpeg_component_info * compptr; + + if (gather_statistics) + /* Make sure to avoid that in the master control logic! + * We are fully adaptive here and need no extra + * statistics gathering pass! + */ + ERREXIT(cinfo, JERR_NOT_COMPILED); + + /* We assume jcmaster.c already validated the progressive scan parameters. */ + + /* Select execution routines */ + if (cinfo->progressive_mode) { + if (cinfo->Ah == 0) { + if (cinfo->Ss == 0) + entropy->pub.encode_mcu = encode_mcu_DC_first; + else + entropy->pub.encode_mcu = encode_mcu_AC_first; + } else { + if (cinfo->Ss == 0) + entropy->pub.encode_mcu = encode_mcu_DC_refine; + else + entropy->pub.encode_mcu = encode_mcu_AC_refine; + } + } else + entropy->pub.encode_mcu = encode_mcu; + + /* Allocate & initialize requested statistics areas */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) { + tbl = compptr->dc_tbl_no; + if (tbl < 0 || tbl >= NUM_ARITH_TBLS) + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + if (entropy->dc_stats[tbl] == NULL) + entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); + /* Initialize DC predictions to 0 */ + entropy->last_dc_val[ci] = 0; + entropy->dc_context[ci] = 0; + } + /* AC needs no table when not present */ + if (cinfo->Se) { + tbl = compptr->ac_tbl_no; + if (tbl < 0 || tbl >= NUM_ARITH_TBLS) + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + if (entropy->ac_stats[tbl] == NULL) + entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); +#ifdef CALCULATE_SPECTRAL_CONDITIONING + if (cinfo->progressive_mode) + /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */ + cinfo->arith_ac_K[tbl] = cinfo->Ss + ((8 + cinfo->Se - cinfo->Ss) >> 4); +#endif + } + } + + /* Initialize arithmetic encoding variables */ + entropy->c = 0; + entropy->a = 0x10000L; + entropy->sc = 0; + entropy->zc = 0; + entropy->ct = 11; + entropy->buffer = -1; /* empty */ + + /* Initialize restart stuff */ + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num = 0; +} + + +/* + * Module initialization routine for arithmetic entropy encoding. + */ + +GLOBAL(void) +jinit_arith_encoder (j_compress_ptr cinfo) +{ + arith_entropy_ptr entropy; + int i; + + entropy = (arith_entropy_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(arith_entropy_encoder)); + cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; + entropy->pub.start_pass = start_pass; + entropy->pub.finish_pass = finish_pass; + + /* Mark tables unallocated */ + for (i = 0; i < NUM_ARITH_TBLS; i++) { + entropy->dc_stats[i] = NULL; + entropy->ac_stats[i] = NULL; + } + + /* Initialize index for fixed probability estimation */ + entropy->fixed_bin[0] = 113; +} diff --git a/code/jpeg-6b/jccoefct.c b/code/jpeg-8c/jccoefct.c similarity index 96% rename from code/jpeg-6b/jccoefct.c rename to code/jpeg-8c/jccoefct.c index 1963ddb6..d775313b 100644 --- a/code/jpeg-6b/jccoefct.c +++ b/code/jpeg-8c/jccoefct.c @@ -149,6 +149,7 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) int blkn, bi, ci, yindex, yoffset, blockcnt; JDIMENSION ypos, xpos; jpeg_component_info *compptr; + forward_DCT_ptr forward_DCT; /* Loop to write as much as one whole iMCU row */ for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row; @@ -167,17 +168,19 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) blkn = 0; for (ci = 0; ci < cinfo->comps_in_scan; ci++) { compptr = cinfo->cur_comp_info[ci]; + forward_DCT = cinfo->fdct->forward_DCT[compptr->component_index]; blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width : compptr->last_col_width; xpos = MCU_col_num * compptr->MCU_sample_width; - ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */ + ypos = yoffset * compptr->DCT_v_scaled_size; + /* ypos == (yoffset+yindex) * DCTSIZE */ for (yindex = 0; yindex < compptr->MCU_height; yindex++) { if (coef->iMCU_row_num < last_iMCU_row || yoffset+yindex < compptr->last_row_height) { - (*cinfo->fdct->forward_DCT) (cinfo, compptr, - input_buf[compptr->component_index], - coef->MCU_buffer[blkn], - ypos, xpos, (JDIMENSION) blockcnt); + (*forward_DCT) (cinfo, compptr, + input_buf[compptr->component_index], + coef->MCU_buffer[blkn], + ypos, xpos, (JDIMENSION) blockcnt); if (blockcnt < compptr->MCU_width) { /* Create some dummy blocks at the right edge of the image. */ jzero_far((void FAR *) coef->MCU_buffer[blkn + blockcnt], @@ -195,7 +198,7 @@ compress_data (j_compress_ptr cinfo, JSAMPIMAGE input_buf) } } blkn += compptr->MCU_width; - ypos += DCTSIZE; + ypos += compptr->DCT_v_scaled_size; } } /* Try to write the MCU. In event of a suspension failure, we will @@ -252,6 +255,7 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) jpeg_component_info *compptr; JBLOCKARRAY buffer; JBLOCKROW thisblockrow, lastblockrow; + forward_DCT_ptr forward_DCT; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { @@ -274,15 +278,15 @@ compress_first_pass (j_compress_ptr cinfo, JSAMPIMAGE input_buf) ndummy = (int) (blocks_across % h_samp_factor); if (ndummy > 0) ndummy = h_samp_factor - ndummy; + forward_DCT = cinfo->fdct->forward_DCT[ci]; /* Perform DCT for all non-dummy blocks in this iMCU row. Each call * on forward_DCT processes a complete horizontal row of DCT blocks. */ for (block_row = 0; block_row < block_rows; block_row++) { thisblockrow = buffer[block_row]; - (*cinfo->fdct->forward_DCT) (cinfo, compptr, - input_buf[ci], thisblockrow, - (JDIMENSION) (block_row * DCTSIZE), - (JDIMENSION) 0, blocks_across); + (*forward_DCT) (cinfo, compptr, input_buf[ci], thisblockrow, + (JDIMENSION) (block_row * compptr->DCT_v_scaled_size), + (JDIMENSION) 0, blocks_across); if (ndummy > 0) { /* Create dummy blocks at the right edge of the image. */ thisblockrow += blocks_across; /* => first dummy block */ diff --git a/code/jpeg-6b/jccolor.c b/code/jpeg-8c/jccolor.c similarity index 100% rename from code/jpeg-6b/jccolor.c rename to code/jpeg-8c/jccolor.c diff --git a/code/jpeg-6b/jcdctmgr.c b/code/jpeg-8c/jcdctmgr.c similarity index 60% rename from code/jpeg-6b/jcdctmgr.c rename to code/jpeg-8c/jcdctmgr.c index 94d5cda4..0bbdbb68 100644 --- a/code/jpeg-6b/jcdctmgr.c +++ b/code/jpeg-8c/jcdctmgr.c @@ -23,7 +23,7 @@ typedef struct { struct jpeg_forward_dct pub; /* public fields */ /* Pointer to the DCT routine actually in use */ - forward_DCT_method_ptr do_dct; + forward_DCT_method_ptr do_dct[MAX_COMPONENTS]; /* The actual post-DCT divisors --- not identical to the quant table * entries, because of scaling (especially for an unnormalized DCT). @@ -33,7 +33,7 @@ typedef struct { #ifdef DCT_FLOAT_SUPPORTED /* Same as above for the floating-point case. */ - float_DCT_method_ptr do_float_dct; + float_DCT_method_ptr do_float_dct[MAX_COMPONENTS]; FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; #endif } my_fdct_controller; @@ -41,134 +41,18 @@ typedef struct { typedef my_fdct_controller * my_fdct_ptr; -/* - * Initialize for a processing pass. - * Verify that all referenced Q-tables are present, and set up - * the divisor table for each one. - * In the current implementation, DCT of all components is done during - * the first pass, even if only some components will be output in the - * first scan. Hence all components should be examined here. +/* The current scaled-DCT routines require ISLOW-style divisor tables, + * so be sure to compile that code if either ISLOW or SCALING is requested. */ - -METHODDEF(void) -start_pass_fdctmgr (j_compress_ptr cinfo) -{ - my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - int ci, qtblno, i; - jpeg_component_info *compptr; - JQUANT_TBL * qtbl; - - for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; - ci++, compptr++) { - qtblno = compptr->quant_tbl_no; - /* Make sure specified quantization table is present */ - if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || - cinfo->quant_tbl_ptrs[qtblno] == NULL) - ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); - qtbl = cinfo->quant_tbl_ptrs[qtblno]; - /* Compute divisors for this quant table */ - /* We may do this more than once for same table, but it's not a big deal */ - switch (cinfo->dct_method) { #ifdef DCT_ISLOW_SUPPORTED - case JDCT_ISLOW: - /* For LL&M IDCT method, divisors are equal to raw quantization - * coefficients multiplied by 8 (to counteract scaling). - */ - if (fdct->divisors[qtblno] == NULL) { - fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - DCTSIZE2 * SIZEOF(DCTELEM)); - } - dtbl = fdct->divisors[qtblno]; - for (i = 0; i < DCTSIZE2; i++) { - dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3; - } - break; +#define PROVIDE_ISLOW_TABLES +#else +#ifdef DCT_SCALING_SUPPORTED +#define PROVIDE_ISLOW_TABLES #endif -#ifdef DCT_IFAST_SUPPORTED - case JDCT_IFAST: - { - /* For AA&N IDCT method, divisors are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * We apply a further scale factor of 8. - */ -#define CONST_BITS 14 - static const INT16 aanscales[DCTSIZE2] = { - /* precomputed values scaled up by 14 bits */ - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, - 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, - 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, - 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, - 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, - 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, - 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 - }; - SHIFT_TEMPS - - if (fdct->divisors[qtblno] == NULL) { - fdct->divisors[qtblno] = (DCTELEM *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - DCTSIZE2 * SIZEOF(DCTELEM)); - } - dtbl = fdct->divisors[qtblno]; - for (i = 0; i < DCTSIZE2; i++) { - dtbl[i] = (DCTELEM) - DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], - (INT32) aanscales[i]), - CONST_BITS-3); - } - } - break; #endif -#ifdef DCT_FLOAT_SUPPORTED - case JDCT_FLOAT: - { - /* For float AA&N IDCT method, divisors are equal to quantization - * coefficients scaled by scalefactor[row]*scalefactor[col], where - * scalefactor[0] = 1 - * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 - * We apply a further scale factor of 8. - * What's actually stored is 1/divisor so that the inner loop can - * use a multiplication rather than a division. - */ - FAST_FLOAT * fdtbl; - int row, col; - static const double aanscalefactor[DCTSIZE] = { - 1.0, 1.387039845, 1.306562965, 1.175875602, - 1.0, 0.785694958, 0.541196100, 0.275899379 - }; - - if (fdct->float_divisors[qtblno] == NULL) { - fdct->float_divisors[qtblno] = (FAST_FLOAT *) - (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, - DCTSIZE2 * SIZEOF(FAST_FLOAT)); - } - fdtbl = fdct->float_divisors[qtblno]; - i = 0; - for (row = 0; row < DCTSIZE; row++) { - for (col = 0; col < DCTSIZE; col++) { - fdtbl[i] = (FAST_FLOAT) - (1.0 / (((double) qtbl->quantval[i] * - aanscalefactor[row] * aanscalefactor[col] * 8.0))); - i++; - } - } - } - break; -#endif - default: - ERREXIT(cinfo, JERR_NOT_COMPILED); - break; - } - } -} -/* code/jpeg-6b/jcdctmgr.c:184: warning: ‘forward_DCT’ defined but not used */ -#if 0 /* * Perform forward DCT on one or more blocks of a component. * @@ -186,43 +70,16 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, { /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - forward_DCT_method_ptr do_dct = fdct->do_dct; + forward_DCT_method_ptr do_dct = fdct->do_dct[compptr->component_index]; DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; DCTELEM workspace[DCTSIZE2]; /* work area for FDCT subroutine */ JDIMENSION bi; sample_data += start_row; /* fold in the vertical offset once */ - for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { - /* Load data into workspace, applying unsigned->signed conversion */ - { register DCTELEM *workspaceptr; - register JSAMPROW elemptr; - register int elemr; - - workspaceptr = workspace; - for (elemr = 0; elemr < DCTSIZE; elemr++) { - elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; -#else - { register int elemc; - for (elemc = DCTSIZE; elemc > 0; elemc--) { - *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; - } - } -#endif - } - } - + for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) { /* Perform the DCT */ - (*do_dct) (workspace); + (*do_dct) (workspace, sample_data, start_col); /* Quantize/descale the coefficients, and store into coef_blocks[] */ { register DCTELEM temp, qval; @@ -263,7 +120,6 @@ forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, } } } -#endif #ifdef DCT_FLOAT_SUPPORTED @@ -277,44 +133,16 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, { /* This routine is heavily used, so it's worth coding it tightly. */ my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; - float_DCT_method_ptr do_dct = fdct->do_float_dct; + float_DCT_method_ptr do_dct = fdct->do_float_dct[compptr->component_index]; FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; FAST_FLOAT workspace[DCTSIZE2]; /* work area for FDCT subroutine */ JDIMENSION bi; sample_data += start_row; /* fold in the vertical offset once */ - for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { - /* Load data into workspace, applying unsigned->signed conversion */ - { register FAST_FLOAT *workspaceptr; - register JSAMPROW elemptr; - register int elemr; - - workspaceptr = workspace; - for (elemr = 0; elemr < DCTSIZE; elemr++) { - elemptr = sample_data[elemr] + start_col; -#if DCTSIZE == 8 /* unroll the inner loop */ - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); -#else - { register int elemc; - for (elemc = DCTSIZE; elemc > 0; elemc--) { - *workspaceptr++ = (FAST_FLOAT) - (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); - } - } -#endif - } - } - + for (bi = 0; bi < num_blocks; bi++, start_col += compptr->DCT_h_scaled_size) { /* Perform the DCT */ - (*do_dct) (workspace); + (*do_dct) (workspace, sample_data, start_col); /* Quantize/descale the coefficients, and store into coef_blocks[] */ { register FAST_FLOAT temp; @@ -339,6 +167,295 @@ forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, #endif /* DCT_FLOAT_SUPPORTED */ +/* + * Initialize for a processing pass. + * Verify that all referenced Q-tables are present, and set up + * the divisor table for each one. + * In the current implementation, DCT of all components is done during + * the first pass, even if only some components will be output in the + * first scan. Hence all components should be examined here. + */ + +METHODDEF(void) +start_pass_fdctmgr (j_compress_ptr cinfo) +{ + my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; + int ci, qtblno, i; + jpeg_component_info *compptr; + int method = 0; + JQUANT_TBL * qtbl; + DCTELEM * dtbl; + + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + /* Select the proper DCT routine for this component's scaling */ + switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) { +#ifdef DCT_SCALING_SUPPORTED + case ((1 << 8) + 1): + fdct->do_dct[ci] = jpeg_fdct_1x1; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((2 << 8) + 2): + fdct->do_dct[ci] = jpeg_fdct_2x2; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((3 << 8) + 3): + fdct->do_dct[ci] = jpeg_fdct_3x3; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((4 << 8) + 4): + fdct->do_dct[ci] = jpeg_fdct_4x4; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((5 << 8) + 5): + fdct->do_dct[ci] = jpeg_fdct_5x5; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((6 << 8) + 6): + fdct->do_dct[ci] = jpeg_fdct_6x6; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((7 << 8) + 7): + fdct->do_dct[ci] = jpeg_fdct_7x7; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((9 << 8) + 9): + fdct->do_dct[ci] = jpeg_fdct_9x9; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((10 << 8) + 10): + fdct->do_dct[ci] = jpeg_fdct_10x10; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((11 << 8) + 11): + fdct->do_dct[ci] = jpeg_fdct_11x11; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((12 << 8) + 12): + fdct->do_dct[ci] = jpeg_fdct_12x12; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((13 << 8) + 13): + fdct->do_dct[ci] = jpeg_fdct_13x13; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((14 << 8) + 14): + fdct->do_dct[ci] = jpeg_fdct_14x14; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((15 << 8) + 15): + fdct->do_dct[ci] = jpeg_fdct_15x15; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((16 << 8) + 16): + fdct->do_dct[ci] = jpeg_fdct_16x16; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((16 << 8) + 8): + fdct->do_dct[ci] = jpeg_fdct_16x8; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((14 << 8) + 7): + fdct->do_dct[ci] = jpeg_fdct_14x7; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((12 << 8) + 6): + fdct->do_dct[ci] = jpeg_fdct_12x6; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((10 << 8) + 5): + fdct->do_dct[ci] = jpeg_fdct_10x5; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((8 << 8) + 4): + fdct->do_dct[ci] = jpeg_fdct_8x4; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((6 << 8) + 3): + fdct->do_dct[ci] = jpeg_fdct_6x3; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((4 << 8) + 2): + fdct->do_dct[ci] = jpeg_fdct_4x2; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((2 << 8) + 1): + fdct->do_dct[ci] = jpeg_fdct_2x1; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((8 << 8) + 16): + fdct->do_dct[ci] = jpeg_fdct_8x16; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((7 << 8) + 14): + fdct->do_dct[ci] = jpeg_fdct_7x14; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((6 << 8) + 12): + fdct->do_dct[ci] = jpeg_fdct_6x12; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((5 << 8) + 10): + fdct->do_dct[ci] = jpeg_fdct_5x10; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((4 << 8) + 8): + fdct->do_dct[ci] = jpeg_fdct_4x8; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((3 << 8) + 6): + fdct->do_dct[ci] = jpeg_fdct_3x6; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((2 << 8) + 4): + fdct->do_dct[ci] = jpeg_fdct_2x4; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; + case ((1 << 8) + 2): + fdct->do_dct[ci] = jpeg_fdct_1x2; + method = JDCT_ISLOW; /* jfdctint uses islow-style table */ + break; +#endif + case ((DCTSIZE << 8) + DCTSIZE): + switch (cinfo->dct_method) { +#ifdef DCT_ISLOW_SUPPORTED + case JDCT_ISLOW: + fdct->do_dct[ci] = jpeg_fdct_islow; + method = JDCT_ISLOW; + break; +#endif +#ifdef DCT_IFAST_SUPPORTED + case JDCT_IFAST: + fdct->do_dct[ci] = jpeg_fdct_ifast; + method = JDCT_IFAST; + break; +#endif +#ifdef DCT_FLOAT_SUPPORTED + case JDCT_FLOAT: + fdct->do_float_dct[ci] = jpeg_fdct_float; + method = JDCT_FLOAT; + break; +#endif + default: + ERREXIT(cinfo, JERR_NOT_COMPILED); + break; + } + break; + default: + ERREXIT2(cinfo, JERR_BAD_DCTSIZE, + compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size); + break; + } + qtblno = compptr->quant_tbl_no; + /* Make sure specified quantization table is present */ + if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || + cinfo->quant_tbl_ptrs[qtblno] == NULL) + ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); + qtbl = cinfo->quant_tbl_ptrs[qtblno]; + /* Compute divisors for this quant table */ + /* We may do this more than once for same table, but it's not a big deal */ + switch (method) { +#ifdef PROVIDE_ISLOW_TABLES + case JDCT_ISLOW: + /* For LL&M IDCT method, divisors are equal to raw quantization + * coefficients multiplied by 8 (to counteract scaling). + */ + if (fdct->divisors[qtblno] == NULL) { + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + DCTSIZE2 * SIZEOF(DCTELEM)); + } + dtbl = fdct->divisors[qtblno]; + for (i = 0; i < DCTSIZE2; i++) { + dtbl[i] = ((DCTELEM) qtbl->quantval[i]) << 3; + } + fdct->pub.forward_DCT[ci] = forward_DCT; + break; +#endif +#ifdef DCT_IFAST_SUPPORTED + case JDCT_IFAST: + { + /* For AA&N IDCT method, divisors are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * We apply a further scale factor of 8. + */ +#define CONST_BITS 14 + static const INT16 aanscales[DCTSIZE2] = { + /* precomputed values scaled up by 14 bits */ + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, + 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, + 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, + 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, + 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, + 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 + }; + SHIFT_TEMPS + + if (fdct->divisors[qtblno] == NULL) { + fdct->divisors[qtblno] = (DCTELEM *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + DCTSIZE2 * SIZEOF(DCTELEM)); + } + dtbl = fdct->divisors[qtblno]; + for (i = 0; i < DCTSIZE2; i++) { + dtbl[i] = (DCTELEM) + DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], + (INT32) aanscales[i]), + CONST_BITS-3); + } + } + fdct->pub.forward_DCT[ci] = forward_DCT; + break; +#endif +#ifdef DCT_FLOAT_SUPPORTED + case JDCT_FLOAT: + { + /* For float AA&N IDCT method, divisors are equal to quantization + * coefficients scaled by scalefactor[row]*scalefactor[col], where + * scalefactor[0] = 1 + * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * We apply a further scale factor of 8. + * What's actually stored is 1/divisor so that the inner loop can + * use a multiplication rather than a division. + */ + FAST_FLOAT * fdtbl; + int row, col; + static const double aanscalefactor[DCTSIZE] = { + 1.0, 1.387039845, 1.306562965, 1.175875602, + 1.0, 0.785694958, 0.541196100, 0.275899379 + }; + + if (fdct->float_divisors[qtblno] == NULL) { + fdct->float_divisors[qtblno] = (FAST_FLOAT *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + DCTSIZE2 * SIZEOF(FAST_FLOAT)); + } + fdtbl = fdct->float_divisors[qtblno]; + i = 0; + for (row = 0; row < DCTSIZE; row++) { + for (col = 0; col < DCTSIZE; col++) { + fdtbl[i] = (FAST_FLOAT) + (1.0 / (((double) qtbl->quantval[i] * + aanscalefactor[row] * aanscalefactor[col] * 8.0))); + i++; + } + } + } + fdct->pub.forward_DCT[ci] = forward_DCT_float; + break; +#endif + default: + ERREXIT(cinfo, JERR_NOT_COMPILED); + break; + } + } +} + + /* * Initialize FDCT manager. */ @@ -355,30 +472,6 @@ jinit_forward_dct (j_compress_ptr cinfo) cinfo->fdct = (struct jpeg_forward_dct *) fdct; fdct->pub.start_pass = start_pass_fdctmgr; - switch (cinfo->dct_method) { -#ifdef DCT_ISLOW_SUPPORTED - case JDCT_ISLOW: - fdct->pub.forward_DCT = forward_DCT; - fdct->do_dct = jpeg_fdct_islow; - break; -#endif -#ifdef DCT_IFAST_SUPPORTED - case JDCT_IFAST: - fdct->pub.forward_DCT = forward_DCT; - fdct->do_dct = jpeg_fdct_ifast; - break; -#endif -#ifdef DCT_FLOAT_SUPPORTED - case JDCT_FLOAT: - fdct->pub.forward_DCT = forward_DCT_float; - fdct->do_float_dct = jpeg_fdct_float; - break; -#endif - default: - ERREXIT(cinfo, JERR_NOT_COMPILED); - break; - } - /* Mark divisor tables unallocated */ for (i = 0; i < NUM_QUANT_TBLS; i++) { fdct->divisors[i] = NULL; diff --git a/code/jpeg-8c/jchuff.c b/code/jpeg-8c/jchuff.c new file mode 100644 index 00000000..257d7aa1 --- /dev/null +++ b/code/jpeg-8c/jchuff.c @@ -0,0 +1,1576 @@ +/* + * jchuff.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2006-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains Huffman entropy encoding routines. + * Both sequential and progressive modes are supported in this single module. + * + * Much of the complexity here has to do with supporting output suspension. + * If the data destination module demands suspension, we want to be able to + * back up to the start of the current MCU. To do this, we copy state + * variables into local working storage, and update them back to the + * permanent JPEG objects only upon successful completion of an MCU. + * + * We do not support output suspension for the progressive JPEG mode, since + * the library currently does not allow multiple-scan files to be written + * with output suspension. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + + +/* The legal range of a DCT coefficient is + * -1024 .. +1023 for 8-bit data; + * -16384 .. +16383 for 12-bit data. + * Hence the magnitude should always fit in 10 or 14 bits respectively. + */ + +#if BITS_IN_JSAMPLE == 8 +#define MAX_COEF_BITS 10 +#else +#define MAX_COEF_BITS 14 +#endif + +/* Derived data constructed for each Huffman table */ + +typedef struct { + unsigned int ehufco[256]; /* code for each symbol */ + char ehufsi[256]; /* length of code for each symbol */ + /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */ +} c_derived_tbl; + + +/* Expanded entropy encoder object for Huffman encoding. + * + * The savable_state subrecord contains fields that change within an MCU, + * but must not be updated permanently until we complete the MCU. + */ + +typedef struct { + INT32 put_buffer; /* current bit-accumulation buffer */ + int put_bits; /* # of bits now in it */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ +} savable_state; + +/* This macro is to work around compilers with missing or broken + * structure assignment. You'll need to fix this code if you have + * such a compiler and you change MAX_COMPS_IN_SCAN. + */ + +#ifndef NO_STRUCT_ASSIGN +#define ASSIGN_STATE(dest,src) ((dest) = (src)) +#else +#if MAX_COMPS_IN_SCAN == 4 +#define ASSIGN_STATE(dest,src) \ + ((dest).put_buffer = (src).put_buffer, \ + (dest).put_bits = (src).put_bits, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) +#endif +#endif + + +typedef struct { + struct jpeg_entropy_encoder pub; /* public fields */ + + savable_state saved; /* Bit buffer & DC state at start of MCU */ + + /* These fields are NOT loaded into local working state. */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + int next_restart_num; /* next restart number to write (0-7) */ + + /* Pointers to derived tables (these workspaces have image lifespan) */ + c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; + c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; + + /* Statistics tables for optimization */ + long * dc_count_ptrs[NUM_HUFF_TBLS]; + long * ac_count_ptrs[NUM_HUFF_TBLS]; + + /* Following fields used only in progressive mode */ + + /* Mode flag: TRUE for optimization, FALSE for actual data output */ + boolean gather_statistics; + + /* next_output_byte/free_in_buffer are local copies of cinfo->dest fields. + */ + JOCTET * next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ + + /* Coding status for AC components */ + int ac_tbl_no; /* the table number of the single component */ + unsigned int EOBRUN; /* run length of EOBs */ + unsigned int BE; /* # of buffered correction bits before MCU */ + char * bit_buffer; /* buffer for correction bits (1 per char) */ + /* packing correction bits tightly would save some space but cost time... */ +} huff_entropy_encoder; + +typedef huff_entropy_encoder * huff_entropy_ptr; + +/* Working state while writing an MCU (sequential mode). + * This struct contains all the fields that are needed by subroutines. + */ + +typedef struct { + JOCTET * next_output_byte; /* => next byte to write in buffer */ + size_t free_in_buffer; /* # of byte spaces remaining in buffer */ + savable_state cur; /* Current bit buffer & DC state */ + j_compress_ptr cinfo; /* dump_buffer needs access to this */ +} working_state; + +/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit + * buffer can hold. Larger sizes may slightly improve compression, but + * 1000 is already well into the realm of overkill. + * The minimum safe size is 64 bits. + */ + +#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ + +/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than INT32. + * We assume that int right shift is unsigned if INT32 right shift is, + * which should be safe. + */ + +#ifdef RIGHT_SHIFT_IS_UNSIGNED +#define ISHIFT_TEMPS int ishift_temp; +#define IRIGHT_SHIFT(x,shft) \ + ((ishift_temp = (x)) < 0 ? \ + (ishift_temp >> (shft)) | ((~0) << (16-(shft))) : \ + (ishift_temp >> (shft))) +#else +#define ISHIFT_TEMPS +#define IRIGHT_SHIFT(x,shft) ((x) >> (shft)) +#endif + + +/* + * Compute the derived values for a Huffman table. + * This routine also performs some validation checks on the table. + */ + +LOCAL(void) +jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, + c_derived_tbl ** pdtbl) +{ + JHUFF_TBL *htbl; + c_derived_tbl *dtbl; + int p, i, l, lastp, si, maxsymbol; + char huffsize[257]; + unsigned int huffcode[257]; + unsigned int code; + + /* Note that huffsize[] and huffcode[] are filled in code-length order, + * paralleling the order of the symbols themselves in htbl->huffval[]. + */ + + /* Find the input Huffman table */ + if (tblno < 0 || tblno >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); + htbl = + isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; + if (htbl == NULL) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); + + /* Allocate a workspace if we haven't already done so. */ + if (*pdtbl == NULL) + *pdtbl = (c_derived_tbl *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(c_derived_tbl)); + dtbl = *pdtbl; + + /* Figure C.1: make table of Huffman code length for each symbol */ + + p = 0; + for (l = 1; l <= 16; l++) { + i = (int) htbl->bits[l]; + if (i < 0 || p + i > 256) /* protect against table overrun */ + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + while (i--) + huffsize[p++] = (char) l; + } + huffsize[p] = 0; + lastp = p; + + /* Figure C.2: generate the codes themselves */ + /* We also validate that the counts represent a legal Huffman code tree. */ + + code = 0; + si = huffsize[0]; + p = 0; + while (huffsize[p]) { + while (((int) huffsize[p]) == si) { + huffcode[p++] = code; + code++; + } + /* code is now 1 more than the last code used for codelength si; but + * it must still fit in si bits, since no code is allowed to be all ones. + */ + if (((INT32) code) >= (((INT32) 1) << si)) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + code <<= 1; + si++; + } + + /* Figure C.3: generate encoding tables */ + /* These are code and size indexed by symbol value */ + + /* Set all codeless symbols to have code length 0; + * this lets us detect duplicate VAL entries here, and later + * allows emit_bits to detect any attempt to emit such symbols. + */ + MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi)); + + /* This is also a convenient place to check for out-of-range + * and duplicated VAL entries. We allow 0..255 for AC symbols + * but only 0..15 for DC. (We could constrain them further + * based on data depth and mode, but this seems enough.) + */ + maxsymbol = isDC ? 15 : 255; + + for (p = 0; p < lastp; p++) { + i = htbl->huffval[p]; + if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + dtbl->ehufco[i] = huffcode[p]; + dtbl->ehufsi[i] = huffsize[p]; + } +} + + +/* Outputting bytes to the file. + * NB: these must be called only when actually outputting, + * that is, entropy->gather_statistics == FALSE. + */ + +/* Emit a byte, taking 'action' if must suspend. */ +#define emit_byte_s(state,val,action) \ + { *(state)->next_output_byte++ = (JOCTET) (val); \ + if (--(state)->free_in_buffer == 0) \ + if (! dump_buffer_s(state)) \ + { action; } } + +/* Emit a byte */ +#define emit_byte_e(entropy,val) \ + { *(entropy)->next_output_byte++ = (JOCTET) (val); \ + if (--(entropy)->free_in_buffer == 0) \ + dump_buffer_e(entropy); } + + +LOCAL(boolean) +dump_buffer_s (working_state * state) +/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ +{ + struct jpeg_destination_mgr * dest = state->cinfo->dest; + + if (! (*dest->empty_output_buffer) (state->cinfo)) + return FALSE; + /* After a successful buffer dump, must reset buffer pointers */ + state->next_output_byte = dest->next_output_byte; + state->free_in_buffer = dest->free_in_buffer; + return TRUE; +} + + +LOCAL(void) +dump_buffer_e (huff_entropy_ptr entropy) +/* Empty the output buffer; we do not support suspension in this case. */ +{ + struct jpeg_destination_mgr * dest = entropy->cinfo->dest; + + if (! (*dest->empty_output_buffer) (entropy->cinfo)) + ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); + /* After a successful buffer dump, must reset buffer pointers */ + entropy->next_output_byte = dest->next_output_byte; + entropy->free_in_buffer = dest->free_in_buffer; +} + + +/* Outputting bits to the file */ + +/* Only the right 24 bits of put_buffer are used; the valid bits are + * left-justified in this part. At most 16 bits can be passed to emit_bits + * in one call, and we never retain more than 7 bits in put_buffer + * between calls, so 24 bits are sufficient. + */ + +INLINE +LOCAL(boolean) +emit_bits_s (working_state * state, unsigned int code, int size) +/* Emit some bits; return TRUE if successful, FALSE if must suspend */ +{ + /* This routine is heavily used, so it's worth coding tightly. */ + register INT32 put_buffer = (INT32) code; + register int put_bits = state->cur.put_bits; + + /* if size is 0, caller used an invalid Huffman table entry */ + if (size == 0) + ERREXIT(state->cinfo, JERR_HUFF_MISSING_CODE); + + put_buffer &= (((INT32) 1)<cur.put_buffer; /* and merge with old buffer contents */ + + while (put_bits >= 8) { + int c = (int) ((put_buffer >> 16) & 0xFF); + + emit_byte_s(state, c, return FALSE); + if (c == 0xFF) { /* need to stuff a zero byte? */ + emit_byte_s(state, 0, return FALSE); + } + put_buffer <<= 8; + put_bits -= 8; + } + + state->cur.put_buffer = put_buffer; /* update state variables */ + state->cur.put_bits = put_bits; + + return TRUE; +} + + +INLINE +LOCAL(void) +emit_bits_e (huff_entropy_ptr entropy, unsigned int code, int size) +/* Emit some bits, unless we are in gather mode */ +{ + /* This routine is heavily used, so it's worth coding tightly. */ + register INT32 put_buffer = (INT32) code; + register int put_bits = entropy->saved.put_bits; + + /* if size is 0, caller used an invalid Huffman table entry */ + if (size == 0) + ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); + + if (entropy->gather_statistics) + return; /* do nothing if we're only getting stats */ + + put_buffer &= (((INT32) 1)<saved.put_buffer; + + while (put_bits >= 8) { + int c = (int) ((put_buffer >> 16) & 0xFF); + + emit_byte_e(entropy, c); + if (c == 0xFF) { /* need to stuff a zero byte? */ + emit_byte_e(entropy, 0); + } + put_buffer <<= 8; + put_bits -= 8; + } + + entropy->saved.put_buffer = put_buffer; /* update variables */ + entropy->saved.put_bits = put_bits; +} + + +LOCAL(boolean) +flush_bits_s (working_state * state) +{ + if (! emit_bits_s(state, 0x7F, 7)) /* fill any partial byte with ones */ + return FALSE; + state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ + state->cur.put_bits = 0; + return TRUE; +} + + +LOCAL(void) +flush_bits_e (huff_entropy_ptr entropy) +{ + emit_bits_e(entropy, 0x7F, 7); /* fill any partial byte with ones */ + entropy->saved.put_buffer = 0; /* and reset bit-buffer to empty */ + entropy->saved.put_bits = 0; +} + + +/* + * Emit (or just count) a Huffman symbol. + */ + +INLINE +LOCAL(void) +emit_dc_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol) +{ + if (entropy->gather_statistics) + entropy->dc_count_ptrs[tbl_no][symbol]++; + else { + c_derived_tbl * tbl = entropy->dc_derived_tbls[tbl_no]; + emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); + } +} + + +INLINE +LOCAL(void) +emit_ac_symbol (huff_entropy_ptr entropy, int tbl_no, int symbol) +{ + if (entropy->gather_statistics) + entropy->ac_count_ptrs[tbl_no][symbol]++; + else { + c_derived_tbl * tbl = entropy->ac_derived_tbls[tbl_no]; + emit_bits_e(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); + } +} + + +/* + * Emit bits from a correction bit buffer. + */ + +LOCAL(void) +emit_buffered_bits (huff_entropy_ptr entropy, char * bufstart, + unsigned int nbits) +{ + if (entropy->gather_statistics) + return; /* no real work */ + + while (nbits > 0) { + emit_bits_e(entropy, (unsigned int) (*bufstart), 1); + bufstart++; + nbits--; + } +} + + +/* + * Emit any pending EOBRUN symbol. + */ + +LOCAL(void) +emit_eobrun (huff_entropy_ptr entropy) +{ + register int temp, nbits; + + if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ + temp = entropy->EOBRUN; + nbits = 0; + while ((temp >>= 1)) + nbits++; + /* safety check: shouldn't happen given limited correction-bit buffer */ + if (nbits > 14) + ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); + + emit_ac_symbol(entropy, entropy->ac_tbl_no, nbits << 4); + if (nbits) + emit_bits_e(entropy, entropy->EOBRUN, nbits); + + entropy->EOBRUN = 0; + + /* Emit any buffered correction bits */ + emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE); + entropy->BE = 0; + } +} + + +/* + * Emit a restart marker & resynchronize predictions. + */ + +LOCAL(boolean) +emit_restart_s (working_state * state, int restart_num) +{ + int ci; + + if (! flush_bits_s(state)) + return FALSE; + + emit_byte_s(state, 0xFF, return FALSE); + emit_byte_s(state, JPEG_RST0 + restart_num, return FALSE); + + /* Re-initialize DC predictions to 0 */ + for (ci = 0; ci < state->cinfo->comps_in_scan; ci++) + state->cur.last_dc_val[ci] = 0; + + /* The restart counter is not updated until we successfully write the MCU. */ + + return TRUE; +} + + +LOCAL(void) +emit_restart_e (huff_entropy_ptr entropy, int restart_num) +{ + int ci; + + emit_eobrun(entropy); + + if (! entropy->gather_statistics) { + flush_bits_e(entropy); + emit_byte_e(entropy, 0xFF); + emit_byte_e(entropy, JPEG_RST0 + restart_num); + } + + if (entropy->cinfo->Ss == 0) { + /* Re-initialize DC predictions to 0 */ + for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++) + entropy->saved.last_dc_val[ci] = 0; + } else { + /* Re-initialize all AC-related fields to 0 */ + entropy->EOBRUN = 0; + entropy->BE = 0; + } +} + + +/* + * MCU encoding for DC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +encode_mcu_DC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + register int temp, temp2; + register int nbits; + int blkn, ci; + int Al = cinfo->Al; + JBLOCKROW block; + jpeg_component_info * compptr; + ISHIFT_TEMPS + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart_e(entropy, entropy->next_restart_num); + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + + /* Compute the DC value after the required point transform by Al. + * This is simply an arithmetic right shift. + */ + temp2 = IRIGHT_SHIFT((int) ((*block)[0]), Al); + + /* DC differences are figured on the point-transformed values. */ + temp = temp2 - entropy->saved.last_dc_val[ci]; + entropy->saved.last_dc_val[ci] = temp2; + + /* Encode the DC coefficient difference per section G.1.2.1 */ + temp2 = temp; + if (temp < 0) { + temp = -temp; /* temp is abs value of input */ + /* For a negative input, want temp2 = bitwise complement of abs(input) */ + /* This code assumes we are on a two's complement machine */ + temp2--; + } + + /* Find the number of bits needed for the magnitude of the coefficient */ + nbits = 0; + while (temp) { + nbits++; + temp >>= 1; + } + /* Check for out-of-range coefficient values. + * Since we're encoding a difference, the range limit is twice as much. + */ + if (nbits > MAX_COEF_BITS+1) + ERREXIT(cinfo, JERR_BAD_DCT_COEF); + + /* Count/emit the Huffman-coded symbol for the number of bits */ + emit_dc_symbol(entropy, compptr->dc_tbl_no, nbits); + + /* Emit that number of bits of the value, if positive, */ + /* or the complement of its magnitude, if negative. */ + if (nbits) /* emit_bits rejects calls with size 0 */ + emit_bits_e(entropy, (unsigned int) temp2, nbits); + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * MCU encoding for AC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +encode_mcu_AC_first (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + register int temp, temp2; + register int nbits; + register int r, k; + int Se, Al; + const int * natural_order; + JBLOCKROW block; + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart_e(entropy, entropy->next_restart_num); + + Se = cinfo->Se; + Al = cinfo->Al; + natural_order = cinfo->natural_order; + + /* Encode the MCU data block */ + block = MCU_data[0]; + + /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ + + r = 0; /* r = run length of zeros */ + + for (k = cinfo->Ss; k <= Se; k++) { + if ((temp = (*block)[natural_order[k]]) == 0) { + r++; + continue; + } + /* We must apply the point transform by Al. For AC coefficients this + * is an integer division with rounding towards 0. To do this portably + * in C, we shift after obtaining the absolute value; so the code is + * interwoven with finding the abs value (temp) and output bits (temp2). + */ + if (temp < 0) { + temp = -temp; /* temp is abs value of input */ + temp >>= Al; /* apply the point transform */ + /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ + temp2 = ~temp; + } else { + temp >>= Al; /* apply the point transform */ + temp2 = temp; + } + /* Watch out for case that nonzero coef is zero after point transform */ + if (temp == 0) { + r++; + continue; + } + + /* Emit any pending EOBRUN */ + if (entropy->EOBRUN > 0) + emit_eobrun(entropy); + /* if run length > 15, must emit special run-length-16 codes (0xF0) */ + while (r > 15) { + emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0); + r -= 16; + } + + /* Find the number of bits needed for the magnitude of the coefficient */ + nbits = 1; /* there must be at least one 1 bit */ + while ((temp >>= 1)) + nbits++; + /* Check for out-of-range coefficient values */ + if (nbits > MAX_COEF_BITS) + ERREXIT(cinfo, JERR_BAD_DCT_COEF); + + /* Count/emit Huffman symbol for run length / number of bits */ + emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); + + /* Emit that number of bits of the value, if positive, */ + /* or the complement of its magnitude, if negative. */ + emit_bits_e(entropy, (unsigned int) temp2, nbits); + + r = 0; /* reset zero run length */ + } + + if (r > 0) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ + if (entropy->EOBRUN == 0x7FFF) + emit_eobrun(entropy); /* force it out to avoid overflow */ + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * MCU encoding for DC successive approximation refinement scan. + * Note: we assume such scans can be multi-component, although the spec + * is not very clear on the point. + */ + +METHODDEF(boolean) +encode_mcu_DC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + register int temp; + int blkn; + int Al = cinfo->Al; + JBLOCKROW block; + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart_e(entropy, entropy->next_restart_num); + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + + /* We simply emit the Al'th bit of the DC coefficient value. */ + temp = (*block)[0]; + emit_bits_e(entropy, (unsigned int) (temp >> Al), 1); + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * MCU encoding for AC successive approximation refinement scan. + */ + +METHODDEF(boolean) +encode_mcu_AC_refine (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + register int temp; + register int r, k; + int EOB; + char *BR_buffer; + unsigned int BR; + int Se, Al; + const int * natural_order; + JBLOCKROW block; + int absvalues[DCTSIZE2]; + + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) + if (entropy->restarts_to_go == 0) + emit_restart_e(entropy, entropy->next_restart_num); + + Se = cinfo->Se; + Al = cinfo->Al; + natural_order = cinfo->natural_order; + + /* Encode the MCU data block */ + block = MCU_data[0]; + + /* It is convenient to make a pre-pass to determine the transformed + * coefficients' absolute values and the EOB position. + */ + EOB = 0; + for (k = cinfo->Ss; k <= Se; k++) { + temp = (*block)[natural_order[k]]; + /* We must apply the point transform by Al. For AC coefficients this + * is an integer division with rounding towards 0. To do this portably + * in C, we shift after obtaining the absolute value. + */ + if (temp < 0) + temp = -temp; /* temp is abs value of input */ + temp >>= Al; /* apply the point transform */ + absvalues[k] = temp; /* save abs value for main pass */ + if (temp == 1) + EOB = k; /* EOB = index of last newly-nonzero coef */ + } + + /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ + + r = 0; /* r = run length of zeros */ + BR = 0; /* BR = count of buffered bits added now */ + BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ + + for (k = cinfo->Ss; k <= Se; k++) { + if ((temp = absvalues[k]) == 0) { + r++; + continue; + } + + /* Emit any required ZRLs, but not if they can be folded into EOB */ + while (r > 15 && k <= EOB) { + /* emit any pending EOBRUN and the BE correction bits */ + emit_eobrun(entropy); + /* Emit ZRL */ + emit_ac_symbol(entropy, entropy->ac_tbl_no, 0xF0); + r -= 16; + /* Emit buffered correction bits that must be associated with ZRL */ + emit_buffered_bits(entropy, BR_buffer, BR); + BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ + BR = 0; + } + + /* If the coef was previously nonzero, it only needs a correction bit. + * NOTE: a straight translation of the spec's figure G.7 would suggest + * that we also need to test r > 15. But if r > 15, we can only get here + * if k > EOB, which implies that this coefficient is not 1. + */ + if (temp > 1) { + /* The correction bit is the next bit of the absolute value. */ + BR_buffer[BR++] = (char) (temp & 1); + continue; + } + + /* Emit any pending EOBRUN and the BE correction bits */ + emit_eobrun(entropy); + + /* Count/emit Huffman symbol for run length / number of bits */ + emit_ac_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); + + /* Emit output bit for newly-nonzero coef */ + temp = ((*block)[natural_order[k]] < 0) ? 0 : 1; + emit_bits_e(entropy, (unsigned int) temp, 1); + + /* Emit buffered correction bits that must be associated with this code */ + emit_buffered_bits(entropy, BR_buffer, BR); + BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ + BR = 0; + r = 0; /* reset zero run length */ + } + + if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ + entropy->EOBRUN++; /* count an EOB */ + entropy->BE += BR; /* concat my correction bits to older ones */ + /* We force out the EOB if we risk either: + * 1. overflow of the EOB counter; + * 2. overflow of the correction bit buffer during the next MCU. + */ + if (entropy->EOBRUN == 0x7FFF || entropy->BE > (MAX_CORR_BITS-DCTSIZE2+1)) + emit_eobrun(entropy); + } + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* Encode a single block's worth of coefficients */ + +LOCAL(boolean) +encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val, + c_derived_tbl *dctbl, c_derived_tbl *actbl) +{ + register int temp, temp2; + register int nbits; + register int k, r, i; + int Se = state->cinfo->lim_Se; + const int * natural_order = state->cinfo->natural_order; + + /* Encode the DC coefficient difference per section F.1.2.1 */ + + temp = temp2 = block[0] - last_dc_val; + + if (temp < 0) { + temp = -temp; /* temp is abs value of input */ + /* For a negative input, want temp2 = bitwise complement of abs(input) */ + /* This code assumes we are on a two's complement machine */ + temp2--; + } + + /* Find the number of bits needed for the magnitude of the coefficient */ + nbits = 0; + while (temp) { + nbits++; + temp >>= 1; + } + /* Check for out-of-range coefficient values. + * Since we're encoding a difference, the range limit is twice as much. + */ + if (nbits > MAX_COEF_BITS+1) + ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); + + /* Emit the Huffman-coded symbol for the number of bits */ + if (! emit_bits_s(state, dctbl->ehufco[nbits], dctbl->ehufsi[nbits])) + return FALSE; + + /* Emit that number of bits of the value, if positive, */ + /* or the complement of its magnitude, if negative. */ + if (nbits) /* emit_bits rejects calls with size 0 */ + if (! emit_bits_s(state, (unsigned int) temp2, nbits)) + return FALSE; + + /* Encode the AC coefficients per section F.1.2.2 */ + + r = 0; /* r = run length of zeros */ + + for (k = 1; k <= Se; k++) { + if ((temp = block[natural_order[k]]) == 0) { + r++; + } else { + /* if run length > 15, must emit special run-length-16 codes (0xF0) */ + while (r > 15) { + if (! emit_bits_s(state, actbl->ehufco[0xF0], actbl->ehufsi[0xF0])) + return FALSE; + r -= 16; + } + + temp2 = temp; + if (temp < 0) { + temp = -temp; /* temp is abs value of input */ + /* This code assumes we are on a two's complement machine */ + temp2--; + } + + /* Find the number of bits needed for the magnitude of the coefficient */ + nbits = 1; /* there must be at least one 1 bit */ + while ((temp >>= 1)) + nbits++; + /* Check for out-of-range coefficient values */ + if (nbits > MAX_COEF_BITS) + ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); + + /* Emit Huffman symbol for run length / number of bits */ + i = (r << 4) + nbits; + if (! emit_bits_s(state, actbl->ehufco[i], actbl->ehufsi[i])) + return FALSE; + + /* Emit that number of bits of the value, if positive, */ + /* or the complement of its magnitude, if negative. */ + if (! emit_bits_s(state, (unsigned int) temp2, nbits)) + return FALSE; + + r = 0; + } + } + + /* If the last coef(s) were zero, emit an end-of-block code */ + if (r > 0) + if (! emit_bits_s(state, actbl->ehufco[0], actbl->ehufsi[0])) + return FALSE; + + return TRUE; +} + + +/* + * Encode and output one MCU's worth of Huffman-compressed coefficients. + */ + +METHODDEF(boolean) +encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + working_state state; + int blkn, ci; + jpeg_component_info * compptr; + + /* Load up working state */ + state.next_output_byte = cinfo->dest->next_output_byte; + state.free_in_buffer = cinfo->dest->free_in_buffer; + ASSIGN_STATE(state.cur, entropy->saved); + state.cinfo = cinfo; + + /* Emit restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + if (! emit_restart_s(&state, entropy->next_restart_num)) + return FALSE; + } + + /* Encode the MCU data blocks */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + if (! encode_one_block(&state, + MCU_data[blkn][0], state.cur.last_dc_val[ci], + entropy->dc_derived_tbls[compptr->dc_tbl_no], + entropy->ac_derived_tbls[compptr->ac_tbl_no])) + return FALSE; + /* Update last_dc_val */ + state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; + } + + /* Completed MCU, so update state */ + cinfo->dest->next_output_byte = state.next_output_byte; + cinfo->dest->free_in_buffer = state.free_in_buffer; + ASSIGN_STATE(entropy->saved, state.cur); + + /* Update restart-interval state too */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num++; + entropy->next_restart_num &= 7; + } + entropy->restarts_to_go--; + } + + return TRUE; +} + + +/* + * Finish up at the end of a Huffman-compressed scan. + */ + +METHODDEF(void) +finish_pass_huff (j_compress_ptr cinfo) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + working_state state; + + if (cinfo->progressive_mode) { + entropy->next_output_byte = cinfo->dest->next_output_byte; + entropy->free_in_buffer = cinfo->dest->free_in_buffer; + + /* Flush out any buffered data */ + emit_eobrun(entropy); + flush_bits_e(entropy); + + cinfo->dest->next_output_byte = entropy->next_output_byte; + cinfo->dest->free_in_buffer = entropy->free_in_buffer; + } else { + /* Load up working state ... flush_bits needs it */ + state.next_output_byte = cinfo->dest->next_output_byte; + state.free_in_buffer = cinfo->dest->free_in_buffer; + ASSIGN_STATE(state.cur, entropy->saved); + state.cinfo = cinfo; + + /* Flush out the last data */ + if (! flush_bits_s(&state)) + ERREXIT(cinfo, JERR_CANT_SUSPEND); + + /* Update state */ + cinfo->dest->next_output_byte = state.next_output_byte; + cinfo->dest->free_in_buffer = state.free_in_buffer; + ASSIGN_STATE(entropy->saved, state.cur); + } +} + + +/* + * Huffman coding optimization. + * + * We first scan the supplied data and count the number of uses of each symbol + * that is to be Huffman-coded. (This process MUST agree with the code above.) + * Then we build a Huffman coding tree for the observed counts. + * Symbols which are not needed at all for the particular image are not + * assigned any code, which saves space in the DHT marker as well as in + * the compressed data. + */ + + +/* Process a single block's worth of coefficients */ + +LOCAL(void) +htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, + long dc_counts[], long ac_counts[]) +{ + register int temp; + register int nbits; + register int k, r; + int Se = cinfo->lim_Se; + const int * natural_order = cinfo->natural_order; + + /* Encode the DC coefficient difference per section F.1.2.1 */ + + temp = block[0] - last_dc_val; + if (temp < 0) + temp = -temp; + + /* Find the number of bits needed for the magnitude of the coefficient */ + nbits = 0; + while (temp) { + nbits++; + temp >>= 1; + } + /* Check for out-of-range coefficient values. + * Since we're encoding a difference, the range limit is twice as much. + */ + if (nbits > MAX_COEF_BITS+1) + ERREXIT(cinfo, JERR_BAD_DCT_COEF); + + /* Count the Huffman symbol for the number of bits */ + dc_counts[nbits]++; + + /* Encode the AC coefficients per section F.1.2.2 */ + + r = 0; /* r = run length of zeros */ + + for (k = 1; k <= Se; k++) { + if ((temp = block[natural_order[k]]) == 0) { + r++; + } else { + /* if run length > 15, must emit special run-length-16 codes (0xF0) */ + while (r > 15) { + ac_counts[0xF0]++; + r -= 16; + } + + /* Find the number of bits needed for the magnitude of the coefficient */ + if (temp < 0) + temp = -temp; + + /* Find the number of bits needed for the magnitude of the coefficient */ + nbits = 1; /* there must be at least one 1 bit */ + while ((temp >>= 1)) + nbits++; + /* Check for out-of-range coefficient values */ + if (nbits > MAX_COEF_BITS) + ERREXIT(cinfo, JERR_BAD_DCT_COEF); + + /* Count Huffman symbol for run length / number of bits */ + ac_counts[(r << 4) + nbits]++; + + r = 0; + } + } + + /* If the last coef(s) were zero, emit an end-of-block code */ + if (r > 0) + ac_counts[0]++; +} + + +/* + * Trial-encode one MCU's worth of Huffman-compressed coefficients. + * No data is actually output, so no suspension return is possible. + */ + +METHODDEF(boolean) +encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int blkn, ci; + jpeg_component_info * compptr; + + /* Take care of restart intervals if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) { + /* Re-initialize DC predictions to 0 */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) + entropy->saved.last_dc_val[ci] = 0; + /* Update restart state */ + entropy->restarts_to_go = cinfo->restart_interval; + } + entropy->restarts_to_go--; + } + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci], + entropy->dc_count_ptrs[compptr->dc_tbl_no], + entropy->ac_count_ptrs[compptr->ac_tbl_no]); + entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0]; + } + + return TRUE; +} + + +/* + * Generate the best Huffman code table for the given counts, fill htbl. + * + * The JPEG standard requires that no symbol be assigned a codeword of all + * one bits (so that padding bits added at the end of a compressed segment + * can't look like a valid code). Because of the canonical ordering of + * codewords, this just means that there must be an unused slot in the + * longest codeword length category. Section K.2 of the JPEG spec suggests + * reserving such a slot by pretending that symbol 256 is a valid symbol + * with count 1. In theory that's not optimal; giving it count zero but + * including it in the symbol set anyway should give a better Huffman code. + * But the theoretically better code actually seems to come out worse in + * practice, because it produces more all-ones bytes (which incur stuffed + * zero bytes in the final file). In any case the difference is tiny. + * + * The JPEG standard requires Huffman codes to be no more than 16 bits long. + * If some symbols have a very small but nonzero probability, the Huffman tree + * must be adjusted to meet the code length restriction. We currently use + * the adjustment method suggested in JPEG section K.2. This method is *not* + * optimal; it may not choose the best possible limited-length code. But + * typically only very-low-frequency symbols will be given less-than-optimal + * lengths, so the code is almost optimal. Experimental comparisons against + * an optimal limited-length-code algorithm indicate that the difference is + * microscopic --- usually less than a hundredth of a percent of total size. + * So the extra complexity of an optimal algorithm doesn't seem worthwhile. + */ + +LOCAL(void) +jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]) +{ +#define MAX_CLEN 32 /* assumed maximum initial code length */ + UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ + int codesize[257]; /* codesize[k] = code length of symbol k */ + int others[257]; /* next symbol in current branch of tree */ + int c1, c2; + int p, i, j; + long v; + + /* This algorithm is explained in section K.2 of the JPEG standard */ + + MEMZERO(bits, SIZEOF(bits)); + MEMZERO(codesize, SIZEOF(codesize)); + for (i = 0; i < 257; i++) + others[i] = -1; /* init links to empty */ + + freq[256] = 1; /* make sure 256 has a nonzero count */ + /* Including the pseudo-symbol 256 in the Huffman procedure guarantees + * that no real symbol is given code-value of all ones, because 256 + * will be placed last in the largest codeword category. + */ + + /* Huffman's basic algorithm to assign optimal code lengths to symbols */ + + for (;;) { + /* Find the smallest nonzero frequency, set c1 = its symbol */ + /* In case of ties, take the larger symbol number */ + c1 = -1; + v = 1000000000L; + for (i = 0; i <= 256; i++) { + if (freq[i] && freq[i] <= v) { + v = freq[i]; + c1 = i; + } + } + + /* Find the next smallest nonzero frequency, set c2 = its symbol */ + /* In case of ties, take the larger symbol number */ + c2 = -1; + v = 1000000000L; + for (i = 0; i <= 256; i++) { + if (freq[i] && freq[i] <= v && i != c1) { + v = freq[i]; + c2 = i; + } + } + + /* Done if we've merged everything into one frequency */ + if (c2 < 0) + break; + + /* Else merge the two counts/trees */ + freq[c1] += freq[c2]; + freq[c2] = 0; + + /* Increment the codesize of everything in c1's tree branch */ + codesize[c1]++; + while (others[c1] >= 0) { + c1 = others[c1]; + codesize[c1]++; + } + + others[c1] = c2; /* chain c2 onto c1's tree branch */ + + /* Increment the codesize of everything in c2's tree branch */ + codesize[c2]++; + while (others[c2] >= 0) { + c2 = others[c2]; + codesize[c2]++; + } + } + + /* Now count the number of symbols of each code length */ + for (i = 0; i <= 256; i++) { + if (codesize[i]) { + /* The JPEG standard seems to think that this can't happen, */ + /* but I'm paranoid... */ + if (codesize[i] > MAX_CLEN) + ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); + + bits[codesize[i]]++; + } + } + + /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure + * Huffman procedure assigned any such lengths, we must adjust the coding. + * Here is what the JPEG spec says about how this next bit works: + * Since symbols are paired for the longest Huffman code, the symbols are + * removed from this length category two at a time. The prefix for the pair + * (which is one bit shorter) is allocated to one of the pair; then, + * skipping the BITS entry for that prefix length, a code word from the next + * shortest nonzero BITS entry is converted into a prefix for two code words + * one bit longer. + */ + + for (i = MAX_CLEN; i > 16; i--) { + while (bits[i] > 0) { + j = i - 2; /* find length of new prefix to be used */ + while (bits[j] == 0) + j--; + + bits[i] -= 2; /* remove two symbols */ + bits[i-1]++; /* one goes in this length */ + bits[j+1] += 2; /* two new symbols in this length */ + bits[j]--; /* symbol of this length is now a prefix */ + } + } + + /* Remove the count for the pseudo-symbol 256 from the largest codelength */ + while (bits[i] == 0) /* find largest codelength still in use */ + i--; + bits[i]--; + + /* Return final symbol counts (only for lengths 0..16) */ + MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits)); + + /* Return a list of the symbols sorted by code length */ + /* It's not real clear to me why we don't need to consider the codelength + * changes made above, but the JPEG spec seems to think this works. + */ + p = 0; + for (i = 1; i <= MAX_CLEN; i++) { + for (j = 0; j <= 255; j++) { + if (codesize[j] == i) { + htbl->huffval[p] = (UINT8) j; + p++; + } + } + } + + /* Set sent_table FALSE so updated table will be written to JPEG file. */ + htbl->sent_table = FALSE; +} + + +/* + * Finish up a statistics-gathering pass and create the new Huffman tables. + */ + +METHODDEF(void) +finish_pass_gather (j_compress_ptr cinfo) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int ci, tbl; + jpeg_component_info * compptr; + JHUFF_TBL **htblptr; + boolean did_dc[NUM_HUFF_TBLS]; + boolean did_ac[NUM_HUFF_TBLS]; + + /* It's important not to apply jpeg_gen_optimal_table more than once + * per table, because it clobbers the input frequency counts! + */ + if (cinfo->progressive_mode) + /* Flush out buffered data (all we care about is counting the EOB symbol) */ + emit_eobrun(entropy); + + MEMZERO(did_dc, SIZEOF(did_dc)); + MEMZERO(did_ac, SIZEOF(did_ac)); + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) { + tbl = compptr->dc_tbl_no; + if (! did_dc[tbl]) { + htblptr = & cinfo->dc_huff_tbl_ptrs[tbl]; + if (*htblptr == NULL) + *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[tbl]); + did_dc[tbl] = TRUE; + } + } + /* AC needs no table when not present */ + if (cinfo->Se) { + tbl = compptr->ac_tbl_no; + if (! did_ac[tbl]) { + htblptr = & cinfo->ac_huff_tbl_ptrs[tbl]; + if (*htblptr == NULL) + *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); + jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[tbl]); + did_ac[tbl] = TRUE; + } + } + } +} + + +/* + * Initialize for a Huffman-compressed scan. + * If gather_statistics is TRUE, we do not output anything during the scan, + * just count the Huffman symbols used and generate Huffman code tables. + */ + +METHODDEF(void) +start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int ci, tbl; + jpeg_component_info * compptr; + + if (gather_statistics) + entropy->pub.finish_pass = finish_pass_gather; + else + entropy->pub.finish_pass = finish_pass_huff; + + if (cinfo->progressive_mode) { + entropy->cinfo = cinfo; + entropy->gather_statistics = gather_statistics; + + /* We assume jcmaster.c already validated the scan parameters. */ + + /* Select execution routine */ + if (cinfo->Ah == 0) { + if (cinfo->Ss == 0) + entropy->pub.encode_mcu = encode_mcu_DC_first; + else + entropy->pub.encode_mcu = encode_mcu_AC_first; + } else { + if (cinfo->Ss == 0) + entropy->pub.encode_mcu = encode_mcu_DC_refine; + else { + entropy->pub.encode_mcu = encode_mcu_AC_refine; + /* AC refinement needs a correction bit buffer */ + if (entropy->bit_buffer == NULL) + entropy->bit_buffer = (char *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + MAX_CORR_BITS * SIZEOF(char)); + } + } + + /* Initialize AC stuff */ + entropy->ac_tbl_no = cinfo->cur_comp_info[0]->ac_tbl_no; + entropy->EOBRUN = 0; + entropy->BE = 0; + } else { + if (gather_statistics) + entropy->pub.encode_mcu = encode_mcu_gather; + else + entropy->pub.encode_mcu = encode_mcu_huff; + } + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) { + tbl = compptr->dc_tbl_no; + if (gather_statistics) { + /* Check for invalid table index */ + /* (make_c_derived_tbl does this in the other path) */ + if (tbl < 0 || tbl >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl); + /* Allocate and zero the statistics tables */ + /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ + if (entropy->dc_count_ptrs[tbl] == NULL) + entropy->dc_count_ptrs[tbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * SIZEOF(long)); + MEMZERO(entropy->dc_count_ptrs[tbl], 257 * SIZEOF(long)); + } else { + /* Compute derived values for Huffman tables */ + /* We may do this more than once for a table, but it's not expensive */ + jpeg_make_c_derived_tbl(cinfo, TRUE, tbl, + & entropy->dc_derived_tbls[tbl]); + } + /* Initialize DC predictions to 0 */ + entropy->saved.last_dc_val[ci] = 0; + } + /* AC needs no table when not present */ + if (cinfo->Se) { + tbl = compptr->ac_tbl_no; + if (gather_statistics) { + if (tbl < 0 || tbl >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl); + if (entropy->ac_count_ptrs[tbl] == NULL) + entropy->ac_count_ptrs[tbl] = (long *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + 257 * SIZEOF(long)); + MEMZERO(entropy->ac_count_ptrs[tbl], 257 * SIZEOF(long)); + } else { + jpeg_make_c_derived_tbl(cinfo, FALSE, tbl, + & entropy->ac_derived_tbls[tbl]); + } + } + } + + /* Initialize bit buffer to empty */ + entropy->saved.put_buffer = 0; + entropy->saved.put_bits = 0; + + /* Initialize restart stuff */ + entropy->restarts_to_go = cinfo->restart_interval; + entropy->next_restart_num = 0; +} + + +/* + * Module initialization routine for Huffman entropy encoding. + */ + +GLOBAL(void) +jinit_huff_encoder (j_compress_ptr cinfo) +{ + huff_entropy_ptr entropy; + int i; + + entropy = (huff_entropy_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(huff_entropy_encoder)); + cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; + entropy->pub.start_pass = start_pass_huff; + + /* Mark tables unallocated */ + for (i = 0; i < NUM_HUFF_TBLS; i++) { + entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL; + entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL; + } + + if (cinfo->progressive_mode) + entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ +} diff --git a/code/jpeg-6b/jcinit.c b/code/jpeg-8c/jcinit.c similarity index 88% rename from code/jpeg-6b/jcinit.c rename to code/jpeg-8c/jcinit.c index 5efffe33..0ba310f2 100644 --- a/code/jpeg-6b/jcinit.c +++ b/code/jpeg-8c/jcinit.c @@ -41,17 +41,10 @@ jinit_compress_master (j_compress_ptr cinfo) /* Forward DCT */ jinit_forward_dct(cinfo); /* Entropy encoding: either Huffman or arithmetic coding. */ - if (cinfo->arith_code) { - ERREXIT(cinfo, JERR_ARITH_NOTIMPL); - } else { - if (cinfo->progressive_mode) { -#ifdef C_PROGRESSIVE_SUPPORTED - jinit_phuff_encoder(cinfo); -#else - ERREXIT(cinfo, JERR_NOT_COMPILED); -#endif - } else - jinit_huff_encoder(cinfo); + if (cinfo->arith_code) + jinit_arith_encoder(cinfo); + else { + jinit_huff_encoder(cinfo); } /* Need a full-image coefficient buffer in any multi-pass mode. */ diff --git a/code/jpeg-6b/jcmainct.c b/code/jpeg-8c/jcmainct.c similarity index 72% rename from code/jpeg-6b/jcmainct.c rename to code/jpeg-8c/jcmainct.c index 03a61a7e..7de75d16 100644 --- a/code/jpeg-6b/jcmainct.c +++ b/code/jpeg-8c/jcmainct.c @@ -68,32 +68,32 @@ METHODDEF(void) process_data_buffer_main METHODDEF(void) start_pass_main (j_compress_ptr cinfo, J_BUF_MODE pass_mode) { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; /* Do nothing in raw-data mode. */ if (cinfo->raw_data_in) return; - jmain->cur_iMCU_row = 0; /* initialize counters */ - jmain->rowgroup_ctr = 0; - jmain->suspended = FALSE; - jmain->pass_mode = pass_mode; /* save mode for use by process_data */ + main->cur_iMCU_row = 0; /* initialize counters */ + main->rowgroup_ctr = 0; + main->suspended = FALSE; + main->pass_mode = pass_mode; /* save mode for use by process_data */ switch (pass_mode) { case JBUF_PASS_THRU: #ifdef FULL_MAIN_BUFFER_SUPPORTED - if (jmain->whole_image[0] != NULL) + if (main->whole_image[0] != NULL) ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); #endif - jmain->pub.process_data = process_data_simple_main; + main->pub.process_data = process_data_simple_main; break; #ifdef FULL_MAIN_BUFFER_SUPPORTED case JBUF_SAVE_SOURCE: case JBUF_CRANK_DEST: case JBUF_SAVE_AND_PASS: - if (jmain->whole_image[0] == NULL) + if (main->whole_image[0] == NULL) ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); - jmain->pub.process_data = process_data_buffer_main; + main->pub.process_data = process_data_buffer_main; break; #endif default: @@ -114,46 +114,46 @@ process_data_simple_main (j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail) { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; - while (jmain->cur_iMCU_row < cinfo->total_iMCU_rows) { - /* Read input data if we haven't filled the jmain buffer yet */ - if (jmain->rowgroup_ctr < DCTSIZE) + while (main->cur_iMCU_row < cinfo->total_iMCU_rows) { + /* Read input data if we haven't filled the main buffer yet */ + if (main->rowgroup_ctr < (JDIMENSION) cinfo->min_DCT_v_scaled_size) (*cinfo->prep->pre_process_data) (cinfo, input_buf, in_row_ctr, in_rows_avail, - jmain->buffer, &jmain->rowgroup_ctr, - (JDIMENSION) DCTSIZE); + main->buffer, &main->rowgroup_ctr, + (JDIMENSION) cinfo->min_DCT_v_scaled_size); /* If we don't have a full iMCU row buffered, return to application for * more data. Note that preprocessor will always pad to fill the iMCU row * at the bottom of the image. */ - if (jmain->rowgroup_ctr != DCTSIZE) + if (main->rowgroup_ctr != (JDIMENSION) cinfo->min_DCT_v_scaled_size) return; /* Send the completed row to the compressor */ - if (! (*cinfo->coef->compress_data) (cinfo, jmain->buffer)) { + if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) { /* If compressor did not consume the whole row, then we must need to * suspend processing and return to the application. In this situation * we pretend we didn't yet consume the last input row; otherwise, if * it happened to be the last row of the image, the application would * think we were done. */ - if (! jmain->suspended) { + if (! main->suspended) { (*in_row_ctr)--; - jmain->suspended = TRUE; + main->suspended = TRUE; } return; } /* We did finish the row. Undo our little suspension hack if a previous - * call suspended; then mark the jmain buffer empty. + * call suspended; then mark the main buffer empty. */ - if (jmain->suspended) { + if (main->suspended) { (*in_row_ctr)++; - jmain->suspended = FALSE; + main->suspended = FALSE; } - jmain->rowgroup_ctr = 0; - jmain->cur_iMCU_row++; + main->rowgroup_ctr = 0; + main->cur_iMCU_row++; } } @@ -170,25 +170,25 @@ process_data_buffer_main (j_compress_ptr cinfo, JSAMPARRAY input_buf, JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail) { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; int ci; jpeg_component_info *compptr; - boolean writing = (jmain->pass_mode != JBUF_CRANK_DEST); + boolean writing = (main->pass_mode != JBUF_CRANK_DEST); - while (jmain->cur_iMCU_row < cinfo->total_iMCU_rows) { + while (main->cur_iMCU_row < cinfo->total_iMCU_rows) { /* Realign the virtual buffers if at the start of an iMCU row. */ - if (jmain->rowgroup_ctr == 0) { + if (main->rowgroup_ctr == 0) { for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - jmain->buffer[ci] = (*cinfo->mem->access_virt_sarray) - ((j_common_ptr) cinfo, jmain->whole_image[ci], - jmain->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), + main->buffer[ci] = (*cinfo->mem->access_virt_sarray) + ((j_common_ptr) cinfo, main->whole_image[ci], + main->cur_iMCU_row * (compptr->v_samp_factor * DCTSIZE), (JDIMENSION) (compptr->v_samp_factor * DCTSIZE), writing); } /* In a read pass, pretend we just read some source data. */ if (! writing) { *in_row_ctr += cinfo->max_v_samp_factor * DCTSIZE; - jmain->rowgroup_ctr = DCTSIZE; + main->rowgroup_ctr = DCTSIZE; } } @@ -197,40 +197,40 @@ process_data_buffer_main (j_compress_ptr cinfo, if (writing) { (*cinfo->prep->pre_process_data) (cinfo, input_buf, in_row_ctr, in_rows_avail, - jmain->buffer, &jmain->rowgroup_ctr, + main->buffer, &main->rowgroup_ctr, (JDIMENSION) DCTSIZE); /* Return to application if we need more data to fill the iMCU row. */ - if (jmain->rowgroup_ctr < DCTSIZE) + if (main->rowgroup_ctr < DCTSIZE) return; } /* Emit data, unless this is a sink-only pass. */ - if (jmain->pass_mode != JBUF_SAVE_SOURCE) { - if (! (*cinfo->coef->compress_data) (cinfo, jmain->buffer)) { + if (main->pass_mode != JBUF_SAVE_SOURCE) { + if (! (*cinfo->coef->compress_data) (cinfo, main->buffer)) { /* If compressor did not consume the whole row, then we must need to * suspend processing and return to the application. In this situation * we pretend we didn't yet consume the last input row; otherwise, if * it happened to be the last row of the image, the application would * think we were done. */ - if (! jmain->suspended) { + if (! main->suspended) { (*in_row_ctr)--; - jmain->suspended = TRUE; + main->suspended = TRUE; } return; } /* We did finish the row. Undo our little suspension hack if a previous - * call suspended; then mark the jmain buffer empty. + * call suspended; then mark the main buffer empty. */ - if (jmain->suspended) { + if (main->suspended) { (*in_row_ctr)++; - jmain->suspended = FALSE; + main->suspended = FALSE; } } /* If get here, we are done with this iMCU row. Mark buffer empty. */ - jmain->rowgroup_ctr = 0; - jmain->cur_iMCU_row++; + main->rowgroup_ctr = 0; + main->cur_iMCU_row++; } } @@ -238,21 +238,21 @@ process_data_buffer_main (j_compress_ptr cinfo, /* - * Initialize jmain buffer controller. + * Initialize main buffer controller. */ GLOBAL(void) jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer) { - my_main_ptr jmain; + my_main_ptr main; int ci; jpeg_component_info *compptr; - jmain = (my_main_ptr) + main = (my_main_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_main_controller)); - cinfo->main = (struct jpeg_c_main_controller *) jmain; - jmain->pub.start_pass = start_pass_main; + cinfo->main = (struct jpeg_c_main_controller *) main; + main->pub.start_pass = start_pass_main; /* We don't need to create a buffer in raw-data mode. */ if (cinfo->raw_data_in) @@ -267,27 +267,27 @@ jinit_c_main_controller (j_compress_ptr cinfo, boolean need_full_buffer) /* Note we pad the bottom to a multiple of the iMCU height */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - jmain->whole_image[ci] = (*cinfo->mem->request_virt_sarray) + main->whole_image[ci] = (*cinfo->mem->request_virt_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, FALSE, - compptr->width_in_blocks * DCTSIZE, + compptr->width_in_blocks * compptr->DCT_h_scaled_size, (JDIMENSION) jround_up((long) compptr->height_in_blocks, (long) compptr->v_samp_factor) * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); + (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size)); } #else ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); #endif } else { #ifdef FULL_MAIN_BUFFER_SUPPORTED - jmain->whole_image[0] = NULL; /* flag for no virtual arrays */ + main->whole_image[0] = NULL; /* flag for no virtual arrays */ #endif /* Allocate a strip buffer for each component */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - jmain->buffer[ci] = (*cinfo->mem->alloc_sarray) + main->buffer[ci] = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * DCTSIZE, - (JDIMENSION) (compptr->v_samp_factor * DCTSIZE)); + compptr->width_in_blocks * compptr->DCT_h_scaled_size, + (JDIMENSION) (compptr->v_samp_factor * compptr->DCT_v_scaled_size)); } } } diff --git a/code/jpeg-6b/jcmarker.c b/code/jpeg-8c/jcmarker.c similarity index 86% rename from code/jpeg-6b/jcmarker.c rename to code/jpeg-8c/jcmarker.c index 3d1e6c6d..606c19af 100644 --- a/code/jpeg-6b/jcmarker.c +++ b/code/jpeg-8c/jcmarker.c @@ -2,6 +2,7 @@ * jcmarker.c * * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2003-2010 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -153,21 +154,22 @@ emit_dqt (j_compress_ptr cinfo, int index) ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index); prec = 0; - for (i = 0; i < DCTSIZE2; i++) { - if (qtbl->quantval[i] > 255) + for (i = 0; i <= cinfo->lim_Se; i++) { + if (qtbl->quantval[cinfo->natural_order[i]] > 255) prec = 1; } if (! qtbl->sent_table) { emit_marker(cinfo, M_DQT); - emit_2bytes(cinfo, prec ? DCTSIZE2*2 + 1 + 2 : DCTSIZE2 + 1 + 2); + emit_2bytes(cinfo, + prec ? cinfo->lim_Se * 2 + 2 + 1 + 2 : cinfo->lim_Se + 1 + 1 + 2); emit_byte(cinfo, index + (prec<<4)); - for (i = 0; i < DCTSIZE2; i++) { + for (i = 0; i <= cinfo->lim_Se; i++) { /* The table entries must be emitted in zigzag order. */ - unsigned int qval = qtbl->quantval[jpeg_natural_order[i]]; + unsigned int qval = qtbl->quantval[cinfo->natural_order[i]]; if (prec) emit_byte(cinfo, (int) (qval >> 8)); emit_byte(cinfo, (int) (qval & 0xFF)); @@ -229,32 +231,38 @@ emit_dac (j_compress_ptr cinfo) char ac_in_use[NUM_ARITH_TBLS]; int length, i; jpeg_component_info *compptr; - + for (i = 0; i < NUM_ARITH_TBLS; i++) dc_in_use[i] = ac_in_use[i] = 0; - + for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; - dc_in_use[compptr->dc_tbl_no] = 1; - ac_in_use[compptr->ac_tbl_no] = 1; + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) + dc_in_use[compptr->dc_tbl_no] = 1; + /* AC needs no table when not present */ + if (cinfo->Se) + ac_in_use[compptr->ac_tbl_no] = 1; } - + length = 0; for (i = 0; i < NUM_ARITH_TBLS; i++) length += dc_in_use[i] + ac_in_use[i]; - - emit_marker(cinfo, M_DAC); - - emit_2bytes(cinfo, length*2 + 2); - - for (i = 0; i < NUM_ARITH_TBLS; i++) { - if (dc_in_use[i]) { - emit_byte(cinfo, i); - emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); - } - if (ac_in_use[i]) { - emit_byte(cinfo, i + 0x10); - emit_byte(cinfo, cinfo->arith_ac_K[i]); + + if (length) { + emit_marker(cinfo, M_DAC); + + emit_2bytes(cinfo, length*2 + 2); + + for (i = 0; i < NUM_ARITH_TBLS; i++) { + if (dc_in_use[i]) { + emit_byte(cinfo, i); + emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i]<<4)); + } + if (ac_in_use[i]) { + emit_byte(cinfo, i + 0x10); + emit_byte(cinfo, cinfo->arith_ac_K[i]); + } } } #endif /* C_ARITH_CODING_SUPPORTED */ @@ -285,13 +293,13 @@ emit_sof (j_compress_ptr cinfo, JPEG_MARKER code) emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */ /* Make sure image isn't bigger than SOF field can handle */ - if ((long) cinfo->image_height > 65535L || - (long) cinfo->image_width > 65535L) + if ((long) cinfo->jpeg_height > 65535L || + (long) cinfo->jpeg_width > 65535L) ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) 65535); emit_byte(cinfo, cinfo->data_precision); - emit_2bytes(cinfo, (int) cinfo->image_height); - emit_2bytes(cinfo, (int) cinfo->image_width); + emit_2bytes(cinfo, (int) cinfo->jpeg_height); + emit_2bytes(cinfo, (int) cinfo->jpeg_width); emit_byte(cinfo, cinfo->num_components); @@ -320,22 +328,16 @@ emit_sos (j_compress_ptr cinfo) for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; emit_byte(cinfo, compptr->component_id); - td = compptr->dc_tbl_no; - ta = compptr->ac_tbl_no; - if (cinfo->progressive_mode) { - /* Progressive mode: only DC or only AC tables are used in one scan; - * furthermore, Huffman coding of DC refinement uses no table at all. - * We emit 0 for unused field(s); this is recommended by the P&M text - * but does not seem to be specified in the standard. - */ - if (cinfo->Ss == 0) { - ta = 0; /* DC scan */ - if (cinfo->Ah != 0 && !cinfo->arith_code) - td = 0; /* no DC table either */ - } else { - td = 0; /* AC scan */ - } - } + + /* We emit 0 for unused field(s); this is recommended by the P&M text + * but does not seem to be specified in the standard. + */ + + /* DC needs no table for refinement scan */ + td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0; + /* AC needs no table when not present */ + ta = cinfo->Se ? compptr->ac_tbl_no : 0; + emit_byte(cinfo, (td << 4) + ta); } @@ -345,6 +347,22 @@ emit_sos (j_compress_ptr cinfo) } +LOCAL(void) +emit_pseudo_sos (j_compress_ptr cinfo) +/* Emit a pseudo SOS marker */ +{ + emit_marker(cinfo, M_SOS); + + emit_2bytes(cinfo, 2 + 1 + 3); /* length */ + + emit_byte(cinfo, 0); /* Ns */ + + emit_byte(cinfo, 0); /* Ss */ + emit_byte(cinfo, cinfo->block_size * cinfo->block_size - 1); /* Se */ + emit_byte(cinfo, 0); /* Ah/Al */ +} + + LOCAL(void) emit_jfif_app0 (j_compress_ptr cinfo) /* Emit a JFIF-compliant APP0 marker */ @@ -484,7 +502,7 @@ write_file_header (j_compress_ptr cinfo) /* * Write frame header. - * This consists of DQT and SOFn markers. + * This consists of DQT and SOFn markers, and a conditional pseudo SOS marker. * Note that we do not emit the SOF until we have emitted the DQT(s). * This avoids compatibility problems with incorrect implementations that * try to error-check the quant table numbers as soon as they see the SOF. @@ -511,7 +529,7 @@ write_frame_header (j_compress_ptr cinfo) * Note we assume that Huffman table numbers won't be changed later. */ if (cinfo->arith_code || cinfo->progressive_mode || - cinfo->data_precision != 8) { + cinfo->data_precision != 8 || cinfo->block_size != DCTSIZE) { is_baseline = FALSE; } else { is_baseline = TRUE; @@ -529,7 +547,10 @@ write_frame_header (j_compress_ptr cinfo) /* Emit the proper SOF marker */ if (cinfo->arith_code) { - emit_sof(cinfo, M_SOF9); /* SOF code for arithmetic coding */ + if (cinfo->progressive_mode) + emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */ + else + emit_sof(cinfo, M_SOF9); /* SOF code for sequential arithmetic */ } else { if (cinfo->progressive_mode) emit_sof(cinfo, M_SOF2); /* SOF code for progressive Huffman */ @@ -538,6 +559,10 @@ write_frame_header (j_compress_ptr cinfo) else emit_sof(cinfo, M_SOF1); /* SOF code for non-baseline Huffman file */ } + + /* Check to emit pseudo SOS marker */ + if (cinfo->progressive_mode && cinfo->block_size != DCTSIZE) + emit_pseudo_sos(cinfo); } @@ -566,19 +591,12 @@ write_scan_header (j_compress_ptr cinfo) */ for (i = 0; i < cinfo->comps_in_scan; i++) { compptr = cinfo->cur_comp_info[i]; - if (cinfo->progressive_mode) { - /* Progressive mode: only DC or only AC tables are used in one scan */ - if (cinfo->Ss == 0) { - if (cinfo->Ah == 0) /* DC needs no table for refinement scan */ - emit_dht(cinfo, compptr->dc_tbl_no, FALSE); - } else { - emit_dht(cinfo, compptr->ac_tbl_no, TRUE); - } - } else { - /* Sequential mode: need both DC and AC tables */ + /* DC needs no table for refinement scan */ + if (cinfo->Ss == 0 && cinfo->Ah == 0) emit_dht(cinfo, compptr->dc_tbl_no, FALSE); + /* AC needs no table when not present */ + if (cinfo->Se) emit_dht(cinfo, compptr->ac_tbl_no, TRUE); - } } } diff --git a/code/jpeg-6b/jcmaster.c b/code/jpeg-8c/jcmaster.c similarity index 57% rename from code/jpeg-6b/jcmaster.c rename to code/jpeg-8c/jcmaster.c index aab4020b..caf80a53 100644 --- a/code/jpeg-6b/jcmaster.c +++ b/code/jpeg-8c/jcmaster.c @@ -2,6 +2,7 @@ * jcmaster.c * * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2003-2011 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -42,23 +43,220 @@ typedef my_comp_master * my_master_ptr; * Support routines that do various essential calculations. */ -LOCAL(void) -initial_setup (j_compress_ptr cinfo) +/* + * Compute JPEG image dimensions and related values. + * NOTE: this is exported for possible use by application. + * Hence it mustn't do anything that can't be done twice. + */ + +GLOBAL(void) +jpeg_calc_jpeg_dimensions (j_compress_ptr cinfo) /* Do computations that are needed before master selection phase */ { - int ci; +#ifdef DCT_SCALING_SUPPORTED + + /* Sanity check on input image dimensions to prevent overflow in + * following calculation. + * We do check jpeg_width and jpeg_height in initial_setup below, + * but image_width and image_height can come from arbitrary data, + * and we need some space for multiplication by block_size. + */ + if (((long) cinfo->image_width >> 24) || ((long) cinfo->image_height >> 24)) + ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION); + + /* Compute actual JPEG image dimensions and DCT scaling choices. */ + if (cinfo->scale_num >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/1 scaling */ + cinfo->jpeg_width = cinfo->image_width * cinfo->block_size; + cinfo->jpeg_height = cinfo->image_height * cinfo->block_size; + cinfo->min_DCT_h_scaled_size = 1; + cinfo->min_DCT_v_scaled_size = 1; + } else if (cinfo->scale_num * 2 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/2 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 2L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 2L); + cinfo->min_DCT_h_scaled_size = 2; + cinfo->min_DCT_v_scaled_size = 2; + } else if (cinfo->scale_num * 3 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/3 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 3L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 3L); + cinfo->min_DCT_h_scaled_size = 3; + cinfo->min_DCT_v_scaled_size = 3; + } else if (cinfo->scale_num * 4 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/4 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 4L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 4L); + cinfo->min_DCT_h_scaled_size = 4; + cinfo->min_DCT_v_scaled_size = 4; + } else if (cinfo->scale_num * 5 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/5 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 5L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 5L); + cinfo->min_DCT_h_scaled_size = 5; + cinfo->min_DCT_v_scaled_size = 5; + } else if (cinfo->scale_num * 6 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/6 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 6L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 6L); + cinfo->min_DCT_h_scaled_size = 6; + cinfo->min_DCT_v_scaled_size = 6; + } else if (cinfo->scale_num * 7 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/7 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 7L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 7L); + cinfo->min_DCT_h_scaled_size = 7; + cinfo->min_DCT_v_scaled_size = 7; + } else if (cinfo->scale_num * 8 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/8 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 8L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 8L); + cinfo->min_DCT_h_scaled_size = 8; + cinfo->min_DCT_v_scaled_size = 8; + } else if (cinfo->scale_num * 9 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/9 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 9L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 9L); + cinfo->min_DCT_h_scaled_size = 9; + cinfo->min_DCT_v_scaled_size = 9; + } else if (cinfo->scale_num * 10 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/10 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 10L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 10L); + cinfo->min_DCT_h_scaled_size = 10; + cinfo->min_DCT_v_scaled_size = 10; + } else if (cinfo->scale_num * 11 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/11 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 11L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 11L); + cinfo->min_DCT_h_scaled_size = 11; + cinfo->min_DCT_v_scaled_size = 11; + } else if (cinfo->scale_num * 12 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/12 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 12L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 12L); + cinfo->min_DCT_h_scaled_size = 12; + cinfo->min_DCT_v_scaled_size = 12; + } else if (cinfo->scale_num * 13 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/13 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 13L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 13L); + cinfo->min_DCT_h_scaled_size = 13; + cinfo->min_DCT_v_scaled_size = 13; + } else if (cinfo->scale_num * 14 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/14 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 14L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 14L); + cinfo->min_DCT_h_scaled_size = 14; + cinfo->min_DCT_v_scaled_size = 14; + } else if (cinfo->scale_num * 15 >= cinfo->scale_denom * cinfo->block_size) { + /* Provide block_size/15 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 15L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 15L); + cinfo->min_DCT_h_scaled_size = 15; + cinfo->min_DCT_v_scaled_size = 15; + } else { + /* Provide block_size/16 scaling */ + cinfo->jpeg_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * cinfo->block_size, 16L); + cinfo->jpeg_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * cinfo->block_size, 16L); + cinfo->min_DCT_h_scaled_size = 16; + cinfo->min_DCT_v_scaled_size = 16; + } + +#else /* !DCT_SCALING_SUPPORTED */ + + /* Hardwire it to "no scaling" */ + cinfo->jpeg_width = cinfo->image_width; + cinfo->jpeg_height = cinfo->image_height; + cinfo->min_DCT_h_scaled_size = DCTSIZE; + cinfo->min_DCT_v_scaled_size = DCTSIZE; + +#endif /* DCT_SCALING_SUPPORTED */ +} + + +LOCAL(void) +jpeg_calc_trans_dimensions (j_compress_ptr cinfo) +{ + if (cinfo->min_DCT_h_scaled_size != cinfo->min_DCT_v_scaled_size) + ERREXIT2(cinfo, JERR_BAD_DCTSIZE, + cinfo->min_DCT_h_scaled_size, cinfo->min_DCT_v_scaled_size); + + cinfo->block_size = cinfo->min_DCT_h_scaled_size; +} + + +LOCAL(void) +initial_setup (j_compress_ptr cinfo, boolean transcode_only) +/* Do computations that are needed before master selection phase */ +{ + int ci, ssize; jpeg_component_info *compptr; long samplesperrow; JDIMENSION jd_samplesperrow; + if (transcode_only) + jpeg_calc_trans_dimensions(cinfo); + else + jpeg_calc_jpeg_dimensions(cinfo); + + /* Sanity check on block_size */ + if (cinfo->block_size < 1 || cinfo->block_size > 16) + ERREXIT2(cinfo, JERR_BAD_DCTSIZE, cinfo->block_size, cinfo->block_size); + + /* Derive natural_order from block_size */ + switch (cinfo->block_size) { + case 2: cinfo->natural_order = jpeg_natural_order2; break; + case 3: cinfo->natural_order = jpeg_natural_order3; break; + case 4: cinfo->natural_order = jpeg_natural_order4; break; + case 5: cinfo->natural_order = jpeg_natural_order5; break; + case 6: cinfo->natural_order = jpeg_natural_order6; break; + case 7: cinfo->natural_order = jpeg_natural_order7; break; + default: cinfo->natural_order = jpeg_natural_order; break; + } + + /* Derive lim_Se from block_size */ + cinfo->lim_Se = cinfo->block_size < DCTSIZE ? + cinfo->block_size * cinfo->block_size - 1 : DCTSIZE2-1; + /* Sanity check on image dimensions */ - if (cinfo->image_height <= 0 || cinfo->image_width <= 0 - || cinfo->num_components <= 0 || cinfo->input_components <= 0) + if (cinfo->jpeg_height <= 0 || cinfo->jpeg_width <= 0 || + cinfo->num_components <= 0 || cinfo->input_components <= 0) ERREXIT(cinfo, JERR_EMPTY_IMAGE); /* Make sure image isn't bigger than I can handle */ - if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION || - (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION) + if ((long) cinfo->jpeg_height > (long) JPEG_MAX_DIMENSION || + (long) cinfo->jpeg_width > (long) JPEG_MAX_DIMENSION) ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION); /* Width of an input scanline must be representable as JDIMENSION. */ @@ -95,22 +293,52 @@ initial_setup (j_compress_ptr cinfo) ci++, compptr++) { /* Fill in the correct component_index value; don't rely on application */ compptr->component_index = ci; - /* For compression, we never do DCT scaling. */ - compptr->DCT_scaled_size = DCTSIZE; + /* In selecting the actual DCT scaling for each component, we try to + * scale down the chroma components via DCT scaling rather than downsampling. + * This saves time if the downsampler gets to use 1:1 scaling. + * Note this code adapts subsampling ratios which are powers of 2. + */ + ssize = 1; +#ifdef DCT_SCALING_SUPPORTED + while (cinfo->min_DCT_h_scaled_size * ssize <= + (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) && + (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) { + ssize = ssize * 2; + } +#endif + compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize; + ssize = 1; +#ifdef DCT_SCALING_SUPPORTED + while (cinfo->min_DCT_v_scaled_size * ssize <= + (cinfo->do_fancy_downsampling ? DCTSIZE : DCTSIZE / 2) && + (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) { + ssize = ssize * 2; + } +#endif + compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize; + + /* We don't support DCT ratios larger than 2. */ + if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2) + compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2; + else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2) + compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2; + /* Size in DCT blocks */ compptr->width_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + jdiv_round_up((long) cinfo->jpeg_width * (long) compptr->h_samp_factor, + (long) (cinfo->max_h_samp_factor * cinfo->block_size)); compptr->height_in_blocks = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + jdiv_round_up((long) cinfo->jpeg_height * (long) compptr->v_samp_factor, + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); /* Size in samples */ compptr->downsampled_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, - (long) cinfo->max_h_samp_factor); + jdiv_round_up((long) cinfo->jpeg_width * + (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size), + (long) (cinfo->max_h_samp_factor * cinfo->block_size)); compptr->downsampled_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, - (long) cinfo->max_v_samp_factor); + jdiv_round_up((long) cinfo->jpeg_height * + (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size), + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); /* Mark component needed (this flag isn't actually used for compression) */ compptr->component_needed = TRUE; } @@ -119,8 +347,8 @@ initial_setup (j_compress_ptr cinfo) * main controller will call coefficient controller). */ cinfo->total_iMCU_rows = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + jdiv_round_up((long) cinfo->jpeg_height, + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); } @@ -260,6 +488,39 @@ validate_script (j_compress_ptr cinfo) } } + +LOCAL(void) +reduce_script (j_compress_ptr cinfo) +/* Adapt scan script for use with reduced block size; + * assume that script has been validated before. + */ +{ + jpeg_scan_info * scanptr; + int idxout, idxin; + + /* Circumvent const declaration for this function */ + scanptr = (jpeg_scan_info *) cinfo->scan_info; + idxout = 0; + + for (idxin = 0; idxin < cinfo->num_scans; idxin++) { + /* After skipping, idxout becomes smaller than idxin */ + if (idxin != idxout) + /* Copy rest of data; + * note we stay in given chunk of allocated memory. + */ + scanptr[idxout] = scanptr[idxin]; + if (scanptr[idxout].Ss > cinfo->lim_Se) + /* Entire scan out of range - skip this entry */ + continue; + if (scanptr[idxout].Se > cinfo->lim_Se) + /* Limit scan to end of block */ + scanptr[idxout].Se = cinfo->lim_Se; + idxout++; + } + + cinfo->num_scans = idxout; +} + #endif /* C_MULTISCAN_FILES_SUPPORTED */ @@ -280,10 +541,13 @@ select_scan_parameters (j_compress_ptr cinfo) cinfo->cur_comp_info[ci] = &cinfo->comp_info[scanptr->component_index[ci]]; } - cinfo->Ss = scanptr->Ss; - cinfo->Se = scanptr->Se; - cinfo->Ah = scanptr->Ah; - cinfo->Al = scanptr->Al; + if (cinfo->progressive_mode) { + cinfo->Ss = scanptr->Ss; + cinfo->Se = scanptr->Se; + cinfo->Ah = scanptr->Ah; + cinfo->Al = scanptr->Al; + return; + } } else #endif @@ -296,11 +560,11 @@ select_scan_parameters (j_compress_ptr cinfo) for (ci = 0; ci < cinfo->num_components; ci++) { cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci]; } - cinfo->Ss = 0; - cinfo->Se = DCTSIZE2-1; - cinfo->Ah = 0; - cinfo->Al = 0; } + cinfo->Ss = 0; + cinfo->Se = cinfo->block_size * cinfo->block_size - 1; + cinfo->Ah = 0; + cinfo->Al = 0; } @@ -325,7 +589,7 @@ per_scan_setup (j_compress_ptr cinfo) compptr->MCU_width = 1; compptr->MCU_height = 1; compptr->MCU_blocks = 1; - compptr->MCU_sample_width = DCTSIZE; + compptr->MCU_sample_width = compptr->DCT_h_scaled_size; compptr->last_col_width = 1; /* For noninterleaved scans, it is convenient to define last_row_height * as the number of block rows present in the last iMCU row. @@ -347,11 +611,11 @@ per_scan_setup (j_compress_ptr cinfo) /* Overall image size in MCUs */ cinfo->MCUs_per_row = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, - (long) (cinfo->max_h_samp_factor*DCTSIZE)); + jdiv_round_up((long) cinfo->jpeg_width, + (long) (cinfo->max_h_samp_factor * cinfo->block_size)); cinfo->MCU_rows_in_scan = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, - (long) (cinfo->max_v_samp_factor*DCTSIZE)); + jdiv_round_up((long) cinfo->jpeg_height, + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); cinfo->blocks_in_MCU = 0; @@ -361,7 +625,7 @@ per_scan_setup (j_compress_ptr cinfo) compptr->MCU_width = compptr->h_samp_factor; compptr->MCU_height = compptr->v_samp_factor; compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height; - compptr->MCU_sample_width = compptr->MCU_width * DCTSIZE; + compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size; /* Figure number of non-dummy blocks in last MCU column & row */ tmp = (int) (compptr->width_in_blocks % compptr->MCU_width); if (tmp == 0) tmp = compptr->MCU_width; @@ -433,7 +697,7 @@ prepare_for_pass (j_compress_ptr cinfo) /* Do Huffman optimization for a scan after the first one. */ select_scan_parameters(cinfo); per_scan_setup(cinfo); - if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code) { + if (cinfo->Ss != 0 || cinfo->Ah == 0) { (*cinfo->entropy->start_pass) (cinfo, TRUE); (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST); master->pub.call_pass_startup = FALSE; @@ -554,11 +818,13 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) master->pub.is_last_pass = FALSE; /* Validate parameters, determine derived values */ - initial_setup(cinfo); + initial_setup(cinfo, transcode_only); if (cinfo->scan_info != NULL) { #ifdef C_MULTISCAN_FILES_SUPPORTED validate_script(cinfo); + if (cinfo->block_size < DCTSIZE) + reduce_script(cinfo); #else ERREXIT(cinfo, JERR_NOT_COMPILED); #endif @@ -567,8 +833,10 @@ jinit_c_master_control (j_compress_ptr cinfo, boolean transcode_only) cinfo->num_scans = 1; } - if (cinfo->progressive_mode) /* TEMPORARY HACK ??? */ - cinfo->optimize_coding = TRUE; /* assume default tables no good for progressive mode */ + if ((cinfo->progressive_mode || cinfo->block_size < DCTSIZE) && + !cinfo->arith_code) /* TEMPORARY HACK ??? */ + /* assume default tables no good for progressive or downscale mode */ + cinfo->optimize_coding = TRUE; /* Initialize my private state */ if (transcode_only) { diff --git a/code/jpeg-6b/jcomapi.c b/code/jpeg-8c/jcomapi.c similarity index 100% rename from code/jpeg-6b/jcomapi.c rename to code/jpeg-8c/jcomapi.c diff --git a/code/jpeg-6b/jcparam.c b/code/jpeg-8c/jcparam.c similarity index 92% rename from code/jpeg-6b/jcparam.c rename to code/jpeg-8c/jcparam.c index 6fc48f53..c5e85dda 100644 --- a/code/jpeg-6b/jcparam.c +++ b/code/jpeg-8c/jcparam.c @@ -2,6 +2,7 @@ * jcparam.c * * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2003-2008 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -60,6 +61,47 @@ jpeg_add_quant_table (j_compress_ptr cinfo, int which_tbl, } +/* These are the sample quantization tables given in JPEG spec section K.1. + * The spec says that the values given produce "good" quality, and + * when divided by 2, "very good" quality. + */ +static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = { + 16, 11, 10, 16, 24, 40, 51, 61, + 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, + 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, + 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, + 72, 92, 95, 98, 112, 100, 103, 99 +}; +static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { + 17, 18, 24, 47, 99, 99, 99, 99, + 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, + 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99 +}; + + +GLOBAL(void) +jpeg_default_qtables (j_compress_ptr cinfo, boolean force_baseline) +/* Set or change the 'quality' (quantization) setting, using default tables + * and straight percentage-scaling quality scales. + * This entry point allows different scalings for luminance and chrominance. + */ +{ + /* Set up two quantization tables using the specified scaling */ + jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, + cinfo->q_scale_factor[0], force_baseline); + jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl, + cinfo->q_scale_factor[1], force_baseline); +} + + GLOBAL(void) jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, boolean force_baseline) @@ -69,31 +111,6 @@ jpeg_set_linear_quality (j_compress_ptr cinfo, int scale_factor, * applications that insist on a linear percentage scaling. */ { - /* These are the sample quantization tables given in JPEG spec section K.1. - * The spec says that the values given produce "good" quality, and - * when divided by 2, "very good" quality. - */ - static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = { - 16, 11, 10, 16, 24, 40, 51, 61, - 12, 12, 14, 19, 26, 58, 60, 55, - 14, 13, 16, 24, 40, 57, 69, 56, - 14, 17, 22, 29, 51, 87, 80, 62, - 18, 22, 37, 56, 68, 109, 103, 77, - 24, 35, 55, 64, 81, 104, 113, 92, - 49, 64, 78, 87, 103, 121, 120, 101, - 72, 92, 95, 98, 112, 100, 103, 99 - }; - static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = { - 17, 18, 24, 47, 99, 99, 99, 99, - 18, 21, 26, 66, 99, 99, 99, 99, - 24, 26, 56, 99, 99, 99, 99, 99, - 47, 66, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99 - }; - /* Set up two quantization tables using the specified scaling */ jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl, scale_factor, force_baseline); @@ -284,6 +301,8 @@ jpeg_set_defaults (j_compress_ptr cinfo) /* Initialize everything not dependent on the color space */ + cinfo->scale_num = 1; /* 1:1 scaling */ + cinfo->scale_denom = 1; cinfo->data_precision = BITS_IN_JSAMPLE; /* Set up two quantization tables using default quality of 75 */ jpeg_set_quality(cinfo, 75, TRUE); @@ -320,6 +339,9 @@ jpeg_set_defaults (j_compress_ptr cinfo) /* By default, use the simpler non-cosited sampling alignment */ cinfo->CCIR601_sampling = FALSE; + /* By default, apply fancy downsampling */ + cinfo->do_fancy_downsampling = TRUE; + /* No input smoothing */ cinfo->smoothing_factor = 0; diff --git a/code/jpeg-6b/jcprepct.c b/code/jpeg-8c/jcprepct.c similarity index 96% rename from code/jpeg-6b/jcprepct.c rename to code/jpeg-8c/jcprepct.c index fa93333d..be44cc4b 100644 --- a/code/jpeg-6b/jcprepct.c +++ b/code/jpeg-8c/jcprepct.c @@ -173,10 +173,12 @@ pre_process_data (j_compress_ptr cinfo, *out_row_group_ctr < out_row_groups_avail) { for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { + numrows = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) / + cinfo->min_DCT_v_scaled_size; expand_bottom_edge(output_buf[ci], - compptr->width_in_blocks * DCTSIZE, - (int) (*out_row_group_ctr * compptr->v_samp_factor), - (int) (out_row_groups_avail * compptr->v_samp_factor)); + compptr->width_in_blocks * compptr->DCT_h_scaled_size, + (int) (*out_row_group_ctr * numrows), + (int) (out_row_groups_avail * numrows)); } *out_row_group_ctr = out_row_groups_avail; break; /* can exit outer loop without test */ @@ -288,7 +290,8 @@ create_context_buffer (j_compress_ptr cinfo) */ true_buffer = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * + (JDIMENSION) (((long) compptr->width_in_blocks * + cinfo->min_DCT_h_scaled_size * cinfo->max_h_samp_factor) / compptr->h_samp_factor), (JDIMENSION) (3 * rgroup_height)); /* Copy true buffer row pointers into the middle of the fake row array */ @@ -346,7 +349,8 @@ jinit_c_prep_controller (j_compress_ptr cinfo, boolean need_full_buffer) ci++, compptr++) { prep->color_buf[ci] = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, - (JDIMENSION) (((long) compptr->width_in_blocks * DCTSIZE * + (JDIMENSION) (((long) compptr->width_in_blocks * + cinfo->min_DCT_h_scaled_size * cinfo->max_h_samp_factor) / compptr->h_samp_factor), (JDIMENSION) cinfo->max_v_samp_factor); } diff --git a/code/jpeg-6b/jcsample.c b/code/jpeg-8c/jcsample.c similarity index 85% rename from code/jpeg-6b/jcsample.c rename to code/jpeg-8c/jcsample.c index 212ec875..4d36f85f 100644 --- a/code/jpeg-6b/jcsample.c +++ b/code/jpeg-8c/jcsample.c @@ -62,6 +62,15 @@ typedef struct { /* Downsampling method pointers, one per component */ downsample1_ptr methods[MAX_COMPONENTS]; + + /* Height of an output row group for each component. */ + int rowgroup_height[MAX_COMPONENTS]; + + /* These arrays save pixel expansion factors so that int_downsample need not + * recompute them each time. They are unused for other downsampling methods. + */ + UINT8 h_expand[MAX_COMPONENTS]; + UINT8 v_expand[MAX_COMPONENTS]; } my_downsampler; typedef my_downsampler * my_downsample_ptr; @@ -123,7 +132,8 @@ sep_downsample (j_compress_ptr cinfo, for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { in_ptr = input_buf[ci] + in_row_index; - out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor); + out_ptr = output_buf[ci] + + (out_row_group_index * downsample->rowgroup_height[ci]); (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr); } } @@ -140,14 +150,15 @@ METHODDEF(void) int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { + my_downsample_ptr downsample = (my_downsample_ptr) cinfo->downsample; int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v; JDIMENSION outcol, outcol_h; /* outcol_h == outcol*h_expand */ - JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; + JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; JSAMPROW inptr, outptr; INT32 outvalue; - h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor; - v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor; + h_expand = downsample->h_expand[compptr->component_index]; + v_expand = downsample->v_expand[compptr->component_index]; numpix = h_expand * v_expand; numpix2 = numpix/2; @@ -158,8 +169,8 @@ int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, output_cols * h_expand); - inrow = 0; - for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { + inrow = outrow = 0; + while (inrow < cinfo->max_v_samp_factor) { outptr = output_data[outrow]; for (outcol = 0, outcol_h = 0; outcol < output_cols; outcol++, outcol_h += h_expand) { @@ -173,6 +184,7 @@ int_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, *outptr++ = (JSAMPLE) ((outvalue + numpix2) / numpix); } inrow += v_expand; + outrow++; } } @@ -191,8 +203,8 @@ fullsize_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, jcopy_sample_rows(input_data, 0, output_data, 0, cinfo->max_v_samp_factor, cinfo->image_width); /* Edge-expand */ - expand_right_edge(output_data, cinfo->max_v_samp_factor, - cinfo->image_width, compptr->width_in_blocks * DCTSIZE); + expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width, + compptr->width_in_blocks * compptr->DCT_h_scaled_size); } @@ -212,9 +224,9 @@ METHODDEF(void) h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { - int outrow; + int inrow; JDIMENSION outcol; - JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; + JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; register JSAMPROW inptr, outptr; register int bias; @@ -225,9 +237,9 @@ h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, output_cols * 2); - for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { - outptr = output_data[outrow]; - inptr = input_data[outrow]; + for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) { + outptr = output_data[inrow]; + inptr = input_data[inrow]; bias = 0; /* bias = 0,1,0,1,... for successive samples */ for (outcol = 0; outcol < output_cols; outcol++) { *outptr++ = (JSAMPLE) ((GETJSAMPLE(*inptr) + GETJSAMPLE(inptr[1]) @@ -251,7 +263,7 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, { int inrow, outrow; JDIMENSION outcol; - JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; + JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; register JSAMPROW inptr0, inptr1, outptr; register int bias; @@ -262,8 +274,8 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width, output_cols * 2); - inrow = 0; - for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { + inrow = outrow = 0; + while (inrow < cinfo->max_v_samp_factor) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow+1]; @@ -276,6 +288,7 @@ h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, inptr0 += 2; inptr1 += 2; } inrow += 2; + outrow++; } } @@ -294,7 +307,7 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, { int inrow, outrow; JDIMENSION colctr; - JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; + JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; register JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr; INT32 membersum, neighsum, memberscale, neighscale; @@ -321,8 +334,8 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */ neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */ - inrow = 0; - for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { + inrow = outrow = 0; + while (inrow < cinfo->max_v_samp_factor) { outptr = output_data[outrow]; inptr0 = input_data[inrow]; inptr1 = input_data[inrow+1]; @@ -378,6 +391,7 @@ h2v2_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr, *outptr = (JSAMPLE) ((membersum + 32768) >> 16); inrow += 2; + outrow++; } } @@ -392,9 +406,9 @@ METHODDEF(void) fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, JSAMPARRAY input_data, JSAMPARRAY output_data) { - int outrow; + int inrow; JDIMENSION colctr; - JDIMENSION output_cols = compptr->width_in_blocks * DCTSIZE; + JDIMENSION output_cols = compptr->width_in_blocks * compptr->DCT_h_scaled_size; register JSAMPROW inptr, above_ptr, below_ptr, outptr; INT32 membersum, neighsum, memberscale, neighscale; int colsum, lastcolsum, nextcolsum; @@ -415,11 +429,11 @@ fullsize_smooth_downsample (j_compress_ptr cinfo, jpeg_component_info *compptr, memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */ neighscale = cinfo->smoothing_factor * 64; /* scaled SF */ - for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) { - outptr = output_data[outrow]; - inptr = input_data[outrow]; - above_ptr = input_data[outrow-1]; - below_ptr = input_data[outrow+1]; + for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) { + outptr = output_data[inrow]; + inptr = input_data[inrow]; + above_ptr = input_data[inrow-1]; + below_ptr = input_data[inrow+1]; /* Special case for first column */ colsum = GETJSAMPLE(*above_ptr++) + GETJSAMPLE(*below_ptr++) + @@ -467,6 +481,7 @@ jinit_downsampler (j_compress_ptr cinfo) int ci; jpeg_component_info * compptr; boolean smoothok = TRUE; + int h_in_group, v_in_group, h_out_group, v_out_group; downsample = (my_downsample_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, @@ -482,8 +497,17 @@ jinit_downsampler (j_compress_ptr cinfo) /* Verify we can handle the sampling factors, and set up method pointers */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - if (compptr->h_samp_factor == cinfo->max_h_samp_factor && - compptr->v_samp_factor == cinfo->max_v_samp_factor) { + /* Compute size of an "output group" for DCT scaling. This many samples + * are to be converted from max_h_samp_factor * max_v_samp_factor pixels. + */ + h_out_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) / + cinfo->min_DCT_h_scaled_size; + v_out_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) / + cinfo->min_DCT_v_scaled_size; + h_in_group = cinfo->max_h_samp_factor; + v_in_group = cinfo->max_v_samp_factor; + downsample->rowgroup_height[ci] = v_out_group; /* save for use later */ + if (h_in_group == h_out_group && v_in_group == v_out_group) { #ifdef INPUT_SMOOTHING_SUPPORTED if (cinfo->smoothing_factor) { downsample->methods[ci] = fullsize_smooth_downsample; @@ -491,12 +515,12 @@ jinit_downsampler (j_compress_ptr cinfo) } else #endif downsample->methods[ci] = fullsize_downsample; - } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && - compptr->v_samp_factor == cinfo->max_v_samp_factor) { + } else if (h_in_group == h_out_group * 2 && + v_in_group == v_out_group) { smoothok = FALSE; downsample->methods[ci] = h2v1_downsample; - } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor && - compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) { + } else if (h_in_group == h_out_group * 2 && + v_in_group == v_out_group * 2) { #ifdef INPUT_SMOOTHING_SUPPORTED if (cinfo->smoothing_factor) { downsample->methods[ci] = h2v2_smooth_downsample; @@ -504,10 +528,12 @@ jinit_downsampler (j_compress_ptr cinfo) } else #endif downsample->methods[ci] = h2v2_downsample; - } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 && - (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) { + } else if ((h_in_group % h_out_group) == 0 && + (v_in_group % v_out_group) == 0) { smoothok = FALSE; downsample->methods[ci] = int_downsample; + downsample->h_expand[ci] = (UINT8) (h_in_group / h_out_group); + downsample->v_expand[ci] = (UINT8) (v_in_group / v_out_group); } else ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL); } diff --git a/code/jpeg-6b/jctrans.c b/code/jpeg-8c/jctrans.c similarity index 96% rename from code/jpeg-6b/jctrans.c rename to code/jpeg-8c/jctrans.c index 0e6d7076..cee6b0f3 100644 --- a/code/jpeg-6b/jctrans.c +++ b/code/jpeg-8c/jctrans.c @@ -2,6 +2,7 @@ * jctrans.c * * Copyright (C) 1995-1998, Thomas G. Lane. + * Modified 2000-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -76,6 +77,10 @@ jpeg_copy_critical_parameters (j_decompress_ptr srcinfo, dstinfo->image_height = srcinfo->image_height; dstinfo->input_components = srcinfo->num_components; dstinfo->in_color_space = srcinfo->jpeg_color_space; + dstinfo->jpeg_width = srcinfo->output_width; + dstinfo->jpeg_height = srcinfo->output_height; + dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size; + dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size; /* Initialize all parameters to default values */ jpeg_set_defaults(dstinfo); /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB. @@ -158,25 +163,14 @@ LOCAL(void) transencode_master_selection (j_compress_ptr cinfo, jvirt_barray_ptr * coef_arrays) { - /* Although we don't actually use input_components for transcoding, - * jcmaster.c's initial_setup will complain if input_components is 0. - */ - cinfo->input_components = 1; /* Initialize master control (includes parameter checking/processing) */ jinit_c_master_control(cinfo, TRUE /* transcode only */); /* Entropy encoding: either Huffman or arithmetic coding. */ - if (cinfo->arith_code) { - ERREXIT(cinfo, JERR_ARITH_NOTIMPL); - } else { - if (cinfo->progressive_mode) { -#ifdef C_PROGRESSIVE_SUPPORTED - jinit_phuff_encoder(cinfo); -#else - ERREXIT(cinfo, JERR_NOT_COMPILED); -#endif - } else - jinit_huff_encoder(cinfo); + if (cinfo->arith_code) + jinit_arith_encoder(cinfo); + else { + jinit_huff_encoder(cinfo); } /* We need a special coefficient buffer controller. */ diff --git a/code/jpeg-6b/jdapimin.c b/code/jpeg-8c/jdapimin.c similarity index 98% rename from code/jpeg-6b/jdapimin.c rename to code/jpeg-8c/jdapimin.c index cadb59fc..7f1ce4c0 100644 --- a/code/jpeg-6b/jdapimin.c +++ b/code/jpeg-8c/jdapimin.c @@ -2,6 +2,7 @@ * jdapimin.c * * Copyright (C) 1994-1998, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -185,8 +186,8 @@ default_decompress_parms (j_decompress_ptr cinfo) } /* Set defaults for other decompression parameters. */ - cinfo->scale_num = 1; /* 1:1 scaling */ - cinfo->scale_denom = 1; + cinfo->scale_num = cinfo->block_size; /* 1:1 scaling */ + cinfo->scale_denom = cinfo->block_size; cinfo->output_gamma = 1.0; cinfo->buffered_image = FALSE; cinfo->raw_data_out = FALSE; diff --git a/code/jpeg-6b/jdapistd.c b/code/jpeg-8c/jdapistd.c similarity index 99% rename from code/jpeg-6b/jdapistd.c rename to code/jpeg-8c/jdapistd.c index c8e3fa0c..9d745377 100644 --- a/code/jpeg-6b/jdapistd.c +++ b/code/jpeg-8c/jdapistd.c @@ -202,7 +202,7 @@ jpeg_read_raw_data (j_decompress_ptr cinfo, JSAMPIMAGE data, } /* Verify that at least one iMCU row can be returned. */ - lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size; + lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->min_DCT_v_scaled_size; if (max_lines < lines_per_iMCU_row) ERREXIT(cinfo, JERR_BUFFER_SIZE); diff --git a/code/jpeg-8c/jdarith.c b/code/jpeg-8c/jdarith.c new file mode 100644 index 00000000..c858b248 --- /dev/null +++ b/code/jpeg-8c/jdarith.c @@ -0,0 +1,772 @@ +/* + * jdarith.c + * + * Developed 1997-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains portable arithmetic entropy decoding routines for JPEG + * (implementing the ISO/IEC IS 10918-1 and CCITT Recommendation ITU-T T.81). + * + * Both sequential and progressive modes are supported in this single module. + * + * Suspension is not currently supported in this module. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + + +/* Expanded entropy decoder object for arithmetic decoding. */ + +typedef struct { + struct jpeg_entropy_decoder pub; /* public fields */ + + INT32 c; /* C register, base of coding interval + input bit buffer */ + INT32 a; /* A register, normalized size of coding interval */ + int ct; /* bit shift counter, # of bits left in bit buffer part of C */ + /* init: ct = -16 */ + /* run: ct = 0..7 */ + /* error: ct = -1 */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ + int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */ + + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + + /* Pointers to statistics areas (these workspaces have image lifespan) */ + unsigned char * dc_stats[NUM_ARITH_TBLS]; + unsigned char * ac_stats[NUM_ARITH_TBLS]; + + /* Statistics bin for coding with fixed probability 0.5 */ + unsigned char fixed_bin[4]; +} arith_entropy_decoder; + +typedef arith_entropy_decoder * arith_entropy_ptr; + +/* The following two definitions specify the allocation chunk size + * for the statistics area. + * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least + * 49 statistics bins for DC, and 245 statistics bins for AC coding. + * + * We use a compact representation with 1 byte per statistics bin, + * thus the numbers directly represent byte sizes. + * This 1 byte per statistics bin contains the meaning of the MPS + * (more probable symbol) in the highest bit (mask 0x80), and the + * index into the probability estimation state machine table + * in the lower bits (mask 0x7F). + */ + +#define DC_STAT_BINS 64 +#define AC_STAT_BINS 256 + + +LOCAL(int) +get_byte (j_decompress_ptr cinfo) +/* Read next input byte; we do not support suspension in this module. */ +{ + struct jpeg_source_mgr * src = cinfo->src; + + if (src->bytes_in_buffer == 0) + if (! (*src->fill_input_buffer) (cinfo)) + ERREXIT(cinfo, JERR_CANT_SUSPEND); + src->bytes_in_buffer--; + return GETJOCTET(*src->next_input_byte++); +} + + +/* + * The core arithmetic decoding routine (common in JPEG and JBIG). + * This needs to go as fast as possible. + * Machine-dependent optimization facilities + * are not utilized in this portable implementation. + * However, this code should be fairly efficient and + * may be a good base for further optimizations anyway. + * + * Return value is 0 or 1 (binary decision). + * + * Note: I've changed the handling of the code base & bit + * buffer register C compared to other implementations + * based on the standards layout & procedures. + * While it also contains both the actual base of the + * coding interval (16 bits) and the next-bits buffer, + * the cut-point between these two parts is floating + * (instead of fixed) with the bit shift counter CT. + * Thus, we also need only one (variable instead of + * fixed size) shift for the LPS/MPS decision, and + * we can get away with any renormalization update + * of C (except for new data insertion, of course). + * + * I've also introduced a new scheme for accessing + * the probability estimation state machine table, + * derived from Markus Kuhn's JBIG implementation. + */ + +LOCAL(int) +arith_decode (j_decompress_ptr cinfo, unsigned char *st) +{ + register arith_entropy_ptr e = (arith_entropy_ptr) cinfo->entropy; + register unsigned char nl, nm; + register INT32 qe, temp; + register int sv, data; + + /* Renormalization & data input per section D.2.6 */ + while (e->a < 0x8000L) { + if (--e->ct < 0) { + /* Need to fetch next data byte */ + if (cinfo->unread_marker) + data = 0; /* stuff zero data */ + else { + data = get_byte(cinfo); /* read next input byte */ + if (data == 0xFF) { /* zero stuff or marker code */ + do data = get_byte(cinfo); + while (data == 0xFF); /* swallow extra 0xFF bytes */ + if (data == 0) + data = 0xFF; /* discard stuffed zero byte */ + else { + /* Note: Different from the Huffman decoder, hitting + * a marker while processing the compressed data + * segment is legal in arithmetic coding. + * The convention is to supply zero data + * then until decoding is complete. + */ + cinfo->unread_marker = data; + data = 0; + } + } + } + e->c = (e->c << 8) | data; /* insert data into C register */ + if ((e->ct += 8) < 0) /* update bit shift counter */ + /* Need more initial bytes */ + if (++e->ct == 0) + /* Got 2 initial bytes -> re-init A and exit loop */ + e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */ + } + e->a <<= 1; + } + + /* Fetch values from our compact representation of Table D.2: + * Qe values and probability estimation state machine + */ + sv = *st; + qe = jpeg_aritab[sv & 0x7F]; /* => Qe_Value */ + nl = qe & 0xFF; qe >>= 8; /* Next_Index_LPS + Switch_MPS */ + nm = qe & 0xFF; qe >>= 8; /* Next_Index_MPS */ + + /* Decode & estimation procedures per sections D.2.4 & D.2.5 */ + temp = e->a - qe; + e->a = temp; + temp <<= e->ct; + if (e->c >= temp) { + e->c -= temp; + /* Conditional LPS (less probable symbol) exchange */ + if (e->a < qe) { + e->a = qe; + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + } else { + e->a = qe; + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + sv ^= 0x80; /* Exchange LPS/MPS */ + } + } else if (e->a < 0x8000L) { + /* Conditional MPS (more probable symbol) exchange */ + if (e->a < qe) { + *st = (sv & 0x80) ^ nl; /* Estimate_after_LPS */ + sv ^= 0x80; /* Exchange LPS/MPS */ + } else { + *st = (sv & 0x80) ^ nm; /* Estimate_after_MPS */ + } + } + + return sv >> 7; +} + + +/* + * Check for a restart marker & resynchronize decoder. + */ + +LOCAL(void) +process_restart (j_decompress_ptr cinfo) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + int ci; + jpeg_component_info * compptr; + + /* Advance past the RSTn marker */ + if (! (*cinfo->marker->read_restart_marker) (cinfo)) + ERREXIT(cinfo, JERR_CANT_SUSPEND); + + /* Re-initialize statistics areas */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { + MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS); + /* Reset DC predictions to 0 */ + entropy->last_dc_val[ci] = 0; + entropy->dc_context[ci] = 0; + } + if ((! cinfo->progressive_mode && cinfo->lim_Se) || + (cinfo->progressive_mode && cinfo->Ss)) { + MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS); + } + } + + /* Reset arithmetic decoding variables */ + entropy->c = 0; + entropy->a = 0; + entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + + /* Reset restart counter */ + entropy->restarts_to_go = cinfo->restart_interval; +} + + +/* + * Arithmetic MCU decoding. + * Each of these routines decodes and returns one MCU's worth of + * arithmetic-compressed coefficients. + * The coefficients are reordered from zigzag order into natural array order, + * but are not dequantized. + * + * The i'th block of the MCU is stored into the block pointed to by + * MCU_data[i]. WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER. + */ + +/* + * MCU decoding for DC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + JBLOCKROW block; + unsigned char *st; + int blkn, ci, tbl, sign; + int v, m; + + /* Process restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + process_restart(cinfo); + entropy->restarts_to_go--; + } + + if (entropy->ct == -1) return TRUE; /* if error do nothing */ + + /* Outer loop handles each block in the MCU */ + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + ci = cinfo->MCU_membership[blkn]; + tbl = cinfo->cur_comp_info[ci]->dc_tbl_no; + + /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */ + + /* Table F.4: Point to statistics bin S0 for DC coefficient coding */ + st = entropy->dc_stats[tbl] + entropy->dc_context[ci]; + + /* Figure F.19: Decode_DC_DIFF */ + if (arith_decode(cinfo, st) == 0) + entropy->dc_context[ci] = 0; + else { + /* Figure F.21: Decoding nonzero value v */ + /* Figure F.22: Decoding the sign of v */ + sign = arith_decode(cinfo, st + 1); + st += 2; st += sign; + /* Figure F.23: Decoding the magnitude category of v */ + if ((m = arith_decode(cinfo, st)) != 0) { + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } + } + /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ + if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + entropy->dc_context[ci] = 0; /* zero diff category */ + else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ + else + entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ + v = m; + /* Figure F.24: Decoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + if (arith_decode(cinfo, st)) v |= m; + v += 1; if (sign) v = -v; + entropy->last_dc_val[ci] += v; + } + + /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */ + (*block)[0] = (JCOEF) (entropy->last_dc_val[ci] << cinfo->Al); + } + + return TRUE; +} + + +/* + * MCU decoding for AC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + JBLOCKROW block; + unsigned char *st; + int tbl, sign, k; + int v, m; + const int * natural_order; + + /* Process restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + process_restart(cinfo); + entropy->restarts_to_go--; + } + + if (entropy->ct == -1) return TRUE; /* if error do nothing */ + + natural_order = cinfo->natural_order; + + /* There is always only one block per MCU */ + block = MCU_data[0]; + tbl = cinfo->cur_comp_info[0]->ac_tbl_no; + + /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */ + + /* Figure F.20: Decode_AC_coefficients */ + for (k = cinfo->Ss; k <= cinfo->Se; k++) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + if (arith_decode(cinfo, st)) break; /* EOB flag */ + while (arith_decode(cinfo, st + 1) == 0) { + st += 3; k++; + if (k > cinfo->Se) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; + } + } + /* Figure F.21: Decoding nonzero value v */ + /* Figure F.22: Decoding the sign of v */ + sign = arith_decode(cinfo, entropy->fixed_bin); + st += 2; + /* Figure F.23: Decoding the magnitude category of v */ + if ((m = arith_decode(cinfo, st)) != 0) { + if (arith_decode(cinfo, st)) { + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } + } + } + v = m; + /* Figure F.24: Decoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + if (arith_decode(cinfo, st)) v |= m; + v += 1; if (sign) v = -v; + /* Scale and output coefficient in natural (dezigzagged) order */ + (*block)[natural_order[k]] = (JCOEF) (v << cinfo->Al); + } + + return TRUE; +} + + +/* + * MCU decoding for DC successive approximation refinement scan. + */ + +METHODDEF(boolean) +decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + unsigned char *st; + int p1, blkn; + + /* Process restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + process_restart(cinfo); + entropy->restarts_to_go--; + } + + st = entropy->fixed_bin; /* use fixed probability estimation */ + p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + + /* Outer loop handles each block in the MCU */ + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + /* Encoded data is simply the next bit of the two's-complement DC value */ + if (arith_decode(cinfo, st)) + MCU_data[blkn][0][0] |= p1; + } + + return TRUE; +} + + +/* + * MCU decoding for AC successive approximation refinement scan. + */ + +METHODDEF(boolean) +decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + JBLOCKROW block; + JCOEFPTR thiscoef; + unsigned char *st; + int tbl, k, kex; + int p1, m1; + const int * natural_order; + + /* Process restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + process_restart(cinfo); + entropy->restarts_to_go--; + } + + if (entropy->ct == -1) return TRUE; /* if error do nothing */ + + natural_order = cinfo->natural_order; + + /* There is always only one block per MCU */ + block = MCU_data[0]; + tbl = cinfo->cur_comp_info[0]->ac_tbl_no; + + p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + + /* Establish EOBx (previous stage end-of-block) index */ + for (kex = cinfo->Se; kex > 0; kex--) + if ((*block)[natural_order[kex]]) break; + + for (k = cinfo->Ss; k <= cinfo->Se; k++) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + if (k > kex) + if (arith_decode(cinfo, st)) break; /* EOB flag */ + for (;;) { + thiscoef = *block + natural_order[k]; + if (*thiscoef) { /* previously nonzero coef */ + if (arith_decode(cinfo, st + 2)) { + if (*thiscoef < 0) + *thiscoef += m1; + else + *thiscoef += p1; + } + break; + } + if (arith_decode(cinfo, st + 1)) { /* newly nonzero coef */ + if (arith_decode(cinfo, entropy->fixed_bin)) + *thiscoef = m1; + else + *thiscoef = p1; + break; + } + st += 3; k++; + if (k > cinfo->Se) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; + } + } + } + + return TRUE; +} + + +/* + * Decode one MCU's worth of arithmetic-compressed coefficients. + */ + +METHODDEF(boolean) +decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + jpeg_component_info * compptr; + JBLOCKROW block; + unsigned char *st; + int blkn, ci, tbl, sign, k; + int v, m; + const int * natural_order; + + /* Process restart marker if needed */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + process_restart(cinfo); + entropy->restarts_to_go--; + } + + if (entropy->ct == -1) return TRUE; /* if error do nothing */ + + natural_order = cinfo->natural_order; + + /* Outer loop handles each block in the MCU */ + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + + /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */ + + tbl = compptr->dc_tbl_no; + + /* Table F.4: Point to statistics bin S0 for DC coefficient coding */ + st = entropy->dc_stats[tbl] + entropy->dc_context[ci]; + + /* Figure F.19: Decode_DC_DIFF */ + if (arith_decode(cinfo, st) == 0) + entropy->dc_context[ci] = 0; + else { + /* Figure F.21: Decoding nonzero value v */ + /* Figure F.22: Decoding the sign of v */ + sign = arith_decode(cinfo, st + 1); + st += 2; st += sign; + /* Figure F.23: Decoding the magnitude category of v */ + if ((m = arith_decode(cinfo, st)) != 0) { + st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */ + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } + } + /* Section F.1.4.4.1.2: Establish dc_context conditioning category */ + if (m < (int) ((1L << cinfo->arith_dc_L[tbl]) >> 1)) + entropy->dc_context[ci] = 0; /* zero diff category */ + else if (m > (int) ((1L << cinfo->arith_dc_U[tbl]) >> 1)) + entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */ + else + entropy->dc_context[ci] = 4 + (sign * 4); /* small diff category */ + v = m; + /* Figure F.24: Decoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + if (arith_decode(cinfo, st)) v |= m; + v += 1; if (sign) v = -v; + entropy->last_dc_val[ci] += v; + } + + (*block)[0] = (JCOEF) entropy->last_dc_val[ci]; + + /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */ + + tbl = compptr->ac_tbl_no; + + /* Figure F.20: Decode_AC_coefficients */ + for (k = 1; k <= cinfo->lim_Se; k++) { + st = entropy->ac_stats[tbl] + 3 * (k - 1); + if (arith_decode(cinfo, st)) break; /* EOB flag */ + while (arith_decode(cinfo, st + 1) == 0) { + st += 3; k++; + if (k > cinfo->lim_Se) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* spectral overflow */ + return TRUE; + } + } + /* Figure F.21: Decoding nonzero value v */ + /* Figure F.22: Decoding the sign of v */ + sign = arith_decode(cinfo, entropy->fixed_bin); + st += 2; + /* Figure F.23: Decoding the magnitude category of v */ + if ((m = arith_decode(cinfo, st)) != 0) { + if (arith_decode(cinfo, st)) { + m <<= 1; + st = entropy->ac_stats[tbl] + + (k <= cinfo->arith_ac_K[tbl] ? 189 : 217); + while (arith_decode(cinfo, st)) { + if ((m <<= 1) == 0x8000) { + WARNMS(cinfo, JWRN_ARITH_BAD_CODE); + entropy->ct = -1; /* magnitude overflow */ + return TRUE; + } + st += 1; + } + } + } + v = m; + /* Figure F.24: Decoding the magnitude bit pattern of v */ + st += 14; + while (m >>= 1) + if (arith_decode(cinfo, st)) v |= m; + v += 1; if (sign) v = -v; + (*block)[natural_order[k]] = (JCOEF) v; + } + } + + return TRUE; +} + + +/* + * Initialize for an arithmetic-compressed scan. + */ + +METHODDEF(void) +start_pass (j_decompress_ptr cinfo) +{ + arith_entropy_ptr entropy = (arith_entropy_ptr) cinfo->entropy; + int ci, tbl; + jpeg_component_info * compptr; + + if (cinfo->progressive_mode) { + /* Validate progressive scan parameters */ + if (cinfo->Ss == 0) { + if (cinfo->Se != 0) + goto bad; + } else { + /* need not check Ss/Se < 0 since they came from unsigned bytes */ + if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se) + goto bad; + /* AC scans may have only one component */ + if (cinfo->comps_in_scan != 1) + goto bad; + } + if (cinfo->Ah != 0) { + /* Successive approximation refinement scan: must have Al = Ah-1. */ + if (cinfo->Ah-1 != cinfo->Al) + goto bad; + } + if (cinfo->Al > 13) { /* need not check for < 0 */ + bad: + ERREXIT4(cinfo, JERR_BAD_PROGRESSION, + cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); + } + /* Update progression status, and verify that scan order is legal. + * Note that inter-scan inconsistencies are treated as warnings + * not fatal errors ... not clear if this is right way to behave. + */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + int coefi, cindex = cinfo->cur_comp_info[ci]->component_index; + int *coef_bit_ptr = & cinfo->coef_bits[cindex][0]; + if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { + int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; + if (cinfo->Ah != expected) + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); + coef_bit_ptr[coefi] = cinfo->Al; + } + } + /* Select MCU decoding routine */ + if (cinfo->Ah == 0) { + if (cinfo->Ss == 0) + entropy->pub.decode_mcu = decode_mcu_DC_first; + else + entropy->pub.decode_mcu = decode_mcu_AC_first; + } else { + if (cinfo->Ss == 0) + entropy->pub.decode_mcu = decode_mcu_DC_refine; + else + entropy->pub.decode_mcu = decode_mcu_AC_refine; + } + } else { + /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG. + * This ought to be an error condition, but we make it a warning. + */ + if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 || + (cinfo->Se < DCTSIZE2 && cinfo->Se != cinfo->lim_Se)) + WARNMS(cinfo, JWRN_NOT_SEQUENTIAL); + /* Select MCU decoding routine */ + entropy->pub.decode_mcu = decode_mcu; + } + + /* Allocate & initialize requested statistics areas */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + if (! cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) { + tbl = compptr->dc_tbl_no; + if (tbl < 0 || tbl >= NUM_ARITH_TBLS) + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + if (entropy->dc_stats[tbl] == NULL) + entropy->dc_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, DC_STAT_BINS); + MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS); + /* Initialize DC predictions to 0 */ + entropy->last_dc_val[ci] = 0; + entropy->dc_context[ci] = 0; + } + if ((! cinfo->progressive_mode && cinfo->lim_Se) || + (cinfo->progressive_mode && cinfo->Ss)) { + tbl = compptr->ac_tbl_no; + if (tbl < 0 || tbl >= NUM_ARITH_TBLS) + ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl); + if (entropy->ac_stats[tbl] == NULL) + entropy->ac_stats[tbl] = (unsigned char *) (*cinfo->mem->alloc_small) + ((j_common_ptr) cinfo, JPOOL_IMAGE, AC_STAT_BINS); + MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS); + } + } + + /* Initialize arithmetic decoding variables */ + entropy->c = 0; + entropy->a = 0; + entropy->ct = -16; /* force reading 2 initial bytes to fill C */ + + /* Initialize restart counter */ + entropy->restarts_to_go = cinfo->restart_interval; +} + + +/* + * Module initialization routine for arithmetic entropy decoding. + */ + +GLOBAL(void) +jinit_arith_decoder (j_decompress_ptr cinfo) +{ + arith_entropy_ptr entropy; + int i; + + entropy = (arith_entropy_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(arith_entropy_decoder)); + cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; + entropy->pub.start_pass = start_pass; + + /* Mark tables unallocated */ + for (i = 0; i < NUM_ARITH_TBLS; i++) { + entropy->dc_stats[i] = NULL; + entropy->ac_stats[i] = NULL; + } + + /* Initialize index for fixed probability estimation */ + entropy->fixed_bin[0] = 113; + + if (cinfo->progressive_mode) { + /* Create progression status table */ + int *coef_bit_ptr, ci; + cinfo->coef_bits = (int (*)[DCTSIZE2]) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->num_components*DCTSIZE2*SIZEOF(int)); + coef_bit_ptr = & cinfo->coef_bits[0][0]; + for (ci = 0; ci < cinfo->num_components; ci++) + for (i = 0; i < DCTSIZE2; i++) + *coef_bit_ptr++ = -1; + } +} diff --git a/code/jpeg-6b/jdatadst.c b/code/jpeg-8c/jdatadst.c similarity index 58% rename from code/jpeg-6b/jdatadst.c rename to code/jpeg-8c/jdatadst.c index a8f6fb0e..472d5f32 100644 --- a/code/jpeg-6b/jdatadst.c +++ b/code/jpeg-8c/jdatadst.c @@ -2,13 +2,14 @@ * jdatadst.c * * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains compression data destination routines for the case of - * emitting JPEG data to a file (or any stdio stream). While these routines - * are sufficient for most applications, some will want to use a different - * destination manager. + * emitting JPEG data to memory or to a file (or any stdio stream). + * While these routines are sufficient for most applications, + * some will want to use a different destination manager. * IMPORTANT: we assume that fwrite() will correctly transcribe an array of * JOCTETs into 8-bit-wide elements on external storage. If char is wider * than 8 bits on your machine, you may need to do some tweaking. @@ -19,6 +20,11 @@ #include "jpeglib.h" #include "jerror.h" +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void * malloc JPP((size_t size)); +extern void free JPP((void *ptr)); +#endif + /* Expanded data destination object for stdio output */ @@ -34,6 +40,21 @@ typedef my_destination_mgr * my_dest_ptr; #define OUTPUT_BUF_SIZE 4096 /* choose an efficiently fwrite'able size */ +/* Expanded data destination object for memory output */ + +typedef struct { + struct jpeg_destination_mgr pub; /* public fields */ + + unsigned char ** outbuffer; /* target buffer */ + unsigned long * outsize; + unsigned char * newbuffer; /* newly allocated buffer */ + JOCTET * buffer; /* start of buffer */ + size_t bufsize; +} my_mem_destination_mgr; + +typedef my_mem_destination_mgr * my_mem_dest_ptr; + + /* * Initialize destination --- called by jpeg_start_compress * before any data is actually written. @@ -53,6 +74,12 @@ init_destination (j_compress_ptr cinfo) dest->pub.free_in_buffer = OUTPUT_BUF_SIZE; } +METHODDEF(void) +init_mem_destination (j_compress_ptr cinfo) +{ + /* no work necessary here */ +} + /* * Empty the output buffer --- called whenever buffer fills up. @@ -92,6 +119,36 @@ empty_output_buffer (j_compress_ptr cinfo) return TRUE; } +METHODDEF(boolean) +empty_mem_output_buffer (j_compress_ptr cinfo) +{ + size_t nextsize; + JOCTET * nextbuffer; + my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + + /* Try to allocate new buffer with double size */ + nextsize = dest->bufsize * 2; + nextbuffer = malloc(nextsize); + + if (nextbuffer == NULL) + ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); + + MEMCOPY(nextbuffer, dest->buffer, dest->bufsize); + + if (dest->newbuffer != NULL) + free(dest->newbuffer); + + dest->newbuffer = nextbuffer; + + dest->pub.next_output_byte = nextbuffer + dest->bufsize; + dest->pub.free_in_buffer = dest->bufsize; + + dest->buffer = nextbuffer; + dest->bufsize = nextsize; + + return TRUE; +} + /* * Terminate destination --- called by jpeg_finish_compress @@ -119,6 +176,15 @@ term_destination (j_compress_ptr cinfo) ERREXIT(cinfo, JERR_FILE_WRITE); } +METHODDEF(void) +term_mem_destination (j_compress_ptr cinfo) +{ + my_mem_dest_ptr dest = (my_mem_dest_ptr) cinfo->dest; + + *dest->outbuffer = dest->buffer; + *dest->outsize = dest->bufsize - dest->pub.free_in_buffer; +} + /* * Prepare for output to a stdio stream. @@ -149,3 +215,53 @@ jpeg_stdio_dest (j_compress_ptr cinfo, FILE * outfile) dest->pub.term_destination = term_destination; dest->outfile = outfile; } + + +/* + * Prepare for output to a memory buffer. + * The caller may supply an own initial buffer with appropriate size. + * Otherwise, or when the actual data output exceeds the given size, + * the library adapts the buffer size as necessary. + * The standard library functions malloc/free are used for allocating + * larger memory, so the buffer is available to the application after + * finishing compression, and then the application is responsible for + * freeing the requested memory. + */ + +GLOBAL(void) +jpeg_mem_dest (j_compress_ptr cinfo, + unsigned char ** outbuffer, unsigned long * outsize) +{ + my_mem_dest_ptr dest; + + if (outbuffer == NULL || outsize == NULL) /* sanity check */ + ERREXIT(cinfo, JERR_BUFFER_SIZE); + + /* The destination object is made permanent so that multiple JPEG images + * can be written to the same buffer without re-executing jpeg_mem_dest. + */ + if (cinfo->dest == NULL) { /* first time for this JPEG object? */ + cinfo->dest = (struct jpeg_destination_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + SIZEOF(my_mem_destination_mgr)); + } + + dest = (my_mem_dest_ptr) cinfo->dest; + dest->pub.init_destination = init_mem_destination; + dest->pub.empty_output_buffer = empty_mem_output_buffer; + dest->pub.term_destination = term_mem_destination; + dest->outbuffer = outbuffer; + dest->outsize = outsize; + dest->newbuffer = NULL; + + if (*outbuffer == NULL || *outsize == 0) { + /* Allocate initial buffer */ + dest->newbuffer = *outbuffer = malloc(OUTPUT_BUF_SIZE); + if (dest->newbuffer == NULL) + ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10); + *outsize = OUTPUT_BUF_SIZE; + } + + dest->pub.next_output_byte = dest->buffer = *outbuffer; + dest->pub.free_in_buffer = dest->bufsize = *outsize; +} diff --git a/code/jpeg-6b/jdatasrc.c b/code/jpeg-8c/jdatasrc.c similarity index 72% rename from code/jpeg-6b/jdatasrc.c rename to code/jpeg-8c/jdatasrc.c index 795f65ed..c8fe3daf 100644 --- a/code/jpeg-6b/jdatasrc.c +++ b/code/jpeg-8c/jdatasrc.c @@ -2,13 +2,14 @@ * jdatasrc.c * * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2009-2010 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains decompression data source routines for the case of - * reading JPEG data from a file (or any stdio stream). While these routines - * are sufficient for most applications, some will want to use a different - * source manager. + * reading JPEG data from memory or from a file (or any stdio stream). + * While these routines are sufficient for most applications, + * some will want to use a different source manager. * IMPORTANT: we assume that fread() will correctly transcribe an array of * JOCTETs from 8-bit-wide elements on external storage. If char is wider * than 8 bits on your machine, you may need to do some tweaking. @@ -19,17 +20,13 @@ #include "jpeglib.h" #include "jerror.h" -#ifndef MIN -#define MIN(a, b) ((a)<(b)?(a):(b)) -#endif /* Expanded data source object for stdio input */ typedef struct { struct jpeg_source_mgr pub; /* public fields */ - unsigned char *inbuf; /* source stream */ - size_t inbufbytes; + FILE * infile; /* source stream */ JOCTET * buffer; /* start of buffer */ boolean start_of_file; /* have we gotten any data yet? */ } my_source_mgr; @@ -56,6 +53,12 @@ init_source (j_decompress_ptr cinfo) src->start_of_file = TRUE; } +METHODDEF(void) +init_mem_source (j_decompress_ptr cinfo) +{ + /* no work necessary here */ +} + /* * Fill the input buffer --- called whenever buffer is emptied. @@ -94,19 +97,18 @@ METHODDEF(boolean) fill_input_buffer (j_decompress_ptr cinfo) { my_src_ptr src = (my_src_ptr) cinfo->src; - size_t nbytes = MIN(src->inbufbytes, INPUT_BUF_SIZE); + size_t nbytes; + + nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE); if (nbytes <= 0) { + if (src->start_of_file) /* Treat empty input file as fatal error */ + ERREXIT(cinfo, JERR_INPUT_EMPTY); WARNMS(cinfo, JWRN_JPEG_EOF); /* Insert a fake EOI marker */ src->buffer[0] = (JOCTET) 0xFF; src->buffer[1] = (JOCTET) JPEG_EOI; nbytes = 2; - } else { - memcpy( src->buffer, src->inbuf, nbytes); - - src->inbuf += nbytes; - src->inbufbytes -= nbytes; } src->pub.next_input_byte = src->buffer; @@ -116,6 +118,26 @@ fill_input_buffer (j_decompress_ptr cinfo) return TRUE; } +METHODDEF(boolean) +fill_mem_input_buffer (j_decompress_ptr cinfo) +{ + static JOCTET mybuffer[4]; + + /* The whole JPEG data is expected to reside in the supplied memory + * buffer, so any request for more data beyond the given buffer size + * is treated as an error. + */ + WARNMS(cinfo, JWRN_JPEG_EOF); + /* Insert a fake EOI marker */ + mybuffer[0] = (JOCTET) 0xFF; + mybuffer[1] = (JOCTET) JPEG_EOI; + + cinfo->src->next_input_byte = mybuffer; + cinfo->src->bytes_in_buffer = 2; + + return TRUE; +} + /* * Skip data --- used to skip over a potentially large amount of @@ -132,22 +154,22 @@ fill_input_buffer (j_decompress_ptr cinfo) METHODDEF(void) skip_input_data (j_decompress_ptr cinfo, long num_bytes) { - my_src_ptr src = (my_src_ptr) cinfo->src; + struct jpeg_source_mgr * src = cinfo->src; /* Just a dumb implementation for now. Could use fseek() except * it doesn't work on pipes. Not clear that being smart is worth * any trouble anyway --- large skips are infrequent. */ if (num_bytes > 0) { - while (num_bytes > (long) src->pub.bytes_in_buffer) { - num_bytes -= (long) src->pub.bytes_in_buffer; - (void) fill_input_buffer(cinfo); + while (num_bytes > (long) src->bytes_in_buffer) { + num_bytes -= (long) src->bytes_in_buffer; + (void) (*src->fill_input_buffer) (cinfo); /* note we assume that fill_input_buffer will never return FALSE, * so suspension need not be handled. */ } - src->pub.next_input_byte += (size_t) num_bytes; - src->pub.bytes_in_buffer -= (size_t) num_bytes; + src->next_input_byte += (size_t) num_bytes; + src->bytes_in_buffer -= (size_t) num_bytes; } } @@ -184,7 +206,7 @@ term_source (j_decompress_ptr cinfo) */ GLOBAL(void) -jpeg_mem_src (j_decompress_ptr cinfo, unsigned char *inbuf, size_t size) +jpeg_stdio_src (j_decompress_ptr cinfo, FILE * infile) { my_src_ptr src; @@ -211,8 +233,42 @@ jpeg_mem_src (j_decompress_ptr cinfo, unsigned char *inbuf, size_t size) src->pub.skip_input_data = skip_input_data; src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */ src->pub.term_source = term_source; - src->inbuf = inbuf; - src->inbufbytes = size; + src->infile = infile; src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */ src->pub.next_input_byte = NULL; /* until buffer loaded */ } + + +/* + * Prepare for input from a supplied memory buffer. + * The buffer must contain the whole JPEG data. + */ + +GLOBAL(void) +jpeg_mem_src (j_decompress_ptr cinfo, + unsigned char * inbuffer, unsigned long insize) +{ + struct jpeg_source_mgr * src; + + if (inbuffer == NULL || insize == 0) /* Treat empty input as fatal error */ + ERREXIT(cinfo, JERR_INPUT_EMPTY); + + /* The source object is made permanent so that a series of JPEG images + * can be read from the same buffer by calling jpeg_mem_src only before + * the first one. + */ + if (cinfo->src == NULL) { /* first time for this JPEG object? */ + cinfo->src = (struct jpeg_source_mgr *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + SIZEOF(struct jpeg_source_mgr)); + } + + src = cinfo->src; + src->init_source = init_mem_source; + src->fill_input_buffer = fill_mem_input_buffer; + src->skip_input_data = skip_input_data; + src->resync_to_restart = jpeg_resync_to_restart; /* use default method */ + src->term_source = term_source; + src->bytes_in_buffer = (size_t) insize; + src->next_input_byte = (JOCTET *) inbuffer; +} diff --git a/code/jpeg-6b/jdcoefct.c b/code/jpeg-8c/jdcoefct.c similarity index 98% rename from code/jpeg-6b/jdcoefct.c rename to code/jpeg-8c/jdcoefct.c index 4938d20f..462e92c6 100644 --- a/code/jpeg-6b/jdcoefct.c +++ b/code/jpeg-8c/jdcoefct.c @@ -187,7 +187,7 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) useful_width = (MCU_col_num < last_MCU_col) ? compptr->MCU_width : compptr->last_col_width; output_ptr = output_buf[compptr->component_index] + - yoffset * compptr->DCT_scaled_size; + yoffset * compptr->DCT_v_scaled_size; start_col = MCU_col_num * compptr->MCU_sample_width; for (yindex = 0; yindex < compptr->MCU_height; yindex++) { if (cinfo->input_iMCU_row < last_iMCU_row || @@ -197,11 +197,11 @@ decompress_onepass (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) coef->MCU_buffer[blkn+xindex], output_ptr, output_col); - output_col += compptr->DCT_scaled_size; + output_col += compptr->DCT_h_scaled_size; } } blkn += compptr->MCU_width; - output_ptr += compptr->DCT_scaled_size; + output_ptr += compptr->DCT_v_scaled_size; } } } @@ -362,9 +362,9 @@ decompress_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) (*inverse_DCT) (cinfo, compptr, (JCOEFPTR) buffer_ptr, output_ptr, output_col); buffer_ptr++; - output_col += compptr->DCT_scaled_size; + output_col += compptr->DCT_h_scaled_size; } - output_ptr += compptr->DCT_scaled_size; + output_ptr += compptr->DCT_v_scaled_size; } } @@ -654,9 +654,9 @@ decompress_smooth_data (j_decompress_ptr cinfo, JSAMPIMAGE output_buf) DC4 = DC5; DC5 = DC6; DC7 = DC8; DC8 = DC9; buffer_ptr++, prev_block_row++, next_block_row++; - output_col += compptr->DCT_scaled_size; + output_col += compptr->DCT_h_scaled_size; } - output_ptr += compptr->DCT_scaled_size; + output_ptr += compptr->DCT_v_scaled_size; } } diff --git a/code/jpeg-6b/jdcolor.c b/code/jpeg-8c/jdcolor.c similarity index 100% rename from code/jpeg-6b/jdcolor.c rename to code/jpeg-8c/jdcolor.c diff --git a/code/jpeg-8c/jdct.h b/code/jpeg-8c/jdct.h new file mode 100644 index 00000000..360dec80 --- /dev/null +++ b/code/jpeg-8c/jdct.h @@ -0,0 +1,393 @@ +/* + * jdct.h + * + * Copyright (C) 1994-1996, Thomas G. Lane. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This include file contains common declarations for the forward and + * inverse DCT modules. These declarations are private to the DCT managers + * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms. + * The individual DCT algorithms are kept in separate files to ease + * machine-dependent tuning (e.g., assembly coding). + */ + + +/* + * A forward DCT routine is given a pointer to an input sample array and + * a pointer to a work area of type DCTELEM[]; the DCT is to be performed + * in-place in that buffer. Type DCTELEM is int for 8-bit samples, INT32 + * for 12-bit samples. (NOTE: Floating-point DCT implementations use an + * array of type FAST_FLOAT, instead.) + * The input data is to be fetched from the sample array starting at a + * specified column. (Any row offset needed will be applied to the array + * pointer before it is passed to the FDCT code.) + * Note that the number of samples fetched by the FDCT routine is + * DCT_h_scaled_size * DCT_v_scaled_size. + * The DCT outputs are returned scaled up by a factor of 8; they therefore + * have a range of +-8K for 8-bit data, +-128K for 12-bit data. This + * convention improves accuracy in integer implementations and saves some + * work in floating-point ones. + * Quantization of the output coefficients is done by jcdctmgr.c. + */ + +#if BITS_IN_JSAMPLE == 8 +typedef int DCTELEM; /* 16 or 32 bits is fine */ +#else +typedef INT32 DCTELEM; /* must have 32 bits */ +#endif + +typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data, + JSAMPARRAY sample_data, + JDIMENSION start_col)); +typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data, + JSAMPARRAY sample_data, + JDIMENSION start_col)); + + +/* + * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer + * to an output sample array. The routine must dequantize the input data as + * well as perform the IDCT; for dequantization, it uses the multiplier table + * pointed to by compptr->dct_table. The output data is to be placed into the + * sample array starting at a specified column. (Any row offset needed will + * be applied to the array pointer before it is passed to the IDCT code.) + * Note that the number of samples emitted by the IDCT routine is + * DCT_h_scaled_size * DCT_v_scaled_size. + */ + +/* typedef inverse_DCT_method_ptr is declared in jpegint.h */ + +/* + * Each IDCT routine has its own ideas about the best dct_table element type. + */ + +typedef MULTIPLIER ISLOW_MULT_TYPE; /* short or int, whichever is faster */ +#if BITS_IN_JSAMPLE == 8 +typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */ +#define IFAST_SCALE_BITS 2 /* fractional bits in scale factors */ +#else +typedef INT32 IFAST_MULT_TYPE; /* need 32 bits for scaled quantizers */ +#define IFAST_SCALE_BITS 13 /* fractional bits in scale factors */ +#endif +typedef FAST_FLOAT FLOAT_MULT_TYPE; /* preferred floating type */ + + +/* + * Each IDCT routine is responsible for range-limiting its results and + * converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could + * be quite far out of range if the input data is corrupt, so a bulletproof + * range-limiting step is required. We use a mask-and-table-lookup method + * to do the combined operations quickly. See the comments with + * prepare_range_limit_table (in jdmaster.c) for more info. + */ + +#define IDCT_range_limit(cinfo) ((cinfo)->sample_range_limit + CENTERJSAMPLE) + +#define RANGE_MASK (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */ + + +/* Short forms of external names for systems with brain-damaged linkers. */ + +#ifdef NEED_SHORT_EXTERNAL_NAMES +#define jpeg_fdct_islow jFDislow +#define jpeg_fdct_ifast jFDifast +#define jpeg_fdct_float jFDfloat +#define jpeg_fdct_7x7 jFD7x7 +#define jpeg_fdct_6x6 jFD6x6 +#define jpeg_fdct_5x5 jFD5x5 +#define jpeg_fdct_4x4 jFD4x4 +#define jpeg_fdct_3x3 jFD3x3 +#define jpeg_fdct_2x2 jFD2x2 +#define jpeg_fdct_1x1 jFD1x1 +#define jpeg_fdct_9x9 jFD9x9 +#define jpeg_fdct_10x10 jFD10x10 +#define jpeg_fdct_11x11 jFD11x11 +#define jpeg_fdct_12x12 jFD12x12 +#define jpeg_fdct_13x13 jFD13x13 +#define jpeg_fdct_14x14 jFD14x14 +#define jpeg_fdct_15x15 jFD15x15 +#define jpeg_fdct_16x16 jFD16x16 +#define jpeg_fdct_16x8 jFD16x8 +#define jpeg_fdct_14x7 jFD14x7 +#define jpeg_fdct_12x6 jFD12x6 +#define jpeg_fdct_10x5 jFD10x5 +#define jpeg_fdct_8x4 jFD8x4 +#define jpeg_fdct_6x3 jFD6x3 +#define jpeg_fdct_4x2 jFD4x2 +#define jpeg_fdct_2x1 jFD2x1 +#define jpeg_fdct_8x16 jFD8x16 +#define jpeg_fdct_7x14 jFD7x14 +#define jpeg_fdct_6x12 jFD6x12 +#define jpeg_fdct_5x10 jFD5x10 +#define jpeg_fdct_4x8 jFD4x8 +#define jpeg_fdct_3x6 jFD3x6 +#define jpeg_fdct_2x4 jFD2x4 +#define jpeg_fdct_1x2 jFD1x2 +#define jpeg_idct_islow jRDislow +#define jpeg_idct_ifast jRDifast +#define jpeg_idct_float jRDfloat +#define jpeg_idct_7x7 jRD7x7 +#define jpeg_idct_6x6 jRD6x6 +#define jpeg_idct_5x5 jRD5x5 +#define jpeg_idct_4x4 jRD4x4 +#define jpeg_idct_3x3 jRD3x3 +#define jpeg_idct_2x2 jRD2x2 +#define jpeg_idct_1x1 jRD1x1 +#define jpeg_idct_9x9 jRD9x9 +#define jpeg_idct_10x10 jRD10x10 +#define jpeg_idct_11x11 jRD11x11 +#define jpeg_idct_12x12 jRD12x12 +#define jpeg_idct_13x13 jRD13x13 +#define jpeg_idct_14x14 jRD14x14 +#define jpeg_idct_15x15 jRD15x15 +#define jpeg_idct_16x16 jRD16x16 +#define jpeg_idct_16x8 jRD16x8 +#define jpeg_idct_14x7 jRD14x7 +#define jpeg_idct_12x6 jRD12x6 +#define jpeg_idct_10x5 jRD10x5 +#define jpeg_idct_8x4 jRD8x4 +#define jpeg_idct_6x3 jRD6x3 +#define jpeg_idct_4x2 jRD4x2 +#define jpeg_idct_2x1 jRD2x1 +#define jpeg_idct_8x16 jRD8x16 +#define jpeg_idct_7x14 jRD7x14 +#define jpeg_idct_6x12 jRD6x12 +#define jpeg_idct_5x10 jRD5x10 +#define jpeg_idct_4x8 jRD4x8 +#define jpeg_idct_3x6 jRD3x8 +#define jpeg_idct_2x4 jRD2x4 +#define jpeg_idct_1x2 jRD1x2 +#endif /* NEED_SHORT_EXTERNAL_NAMES */ + +/* Extern declarations for the forward and inverse DCT routines. */ + +EXTERN(void) jpeg_fdct_islow + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_ifast + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_float + JPP((FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_7x7 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_6x6 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_5x5 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_4x4 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_3x3 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_2x2 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_1x1 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_9x9 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_10x10 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_11x11 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_12x12 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_13x13 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_14x14 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_15x15 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_16x16 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_16x8 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_14x7 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_12x6 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_10x5 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_8x4 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_6x3 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_4x2 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_2x1 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_8x16 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_7x14 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_6x12 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_5x10 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_4x8 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_3x6 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_2x4 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); +EXTERN(void) jpeg_fdct_1x2 + JPP((DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)); + +EXTERN(void) jpeg_idct_islow + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_ifast + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_float + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_7x7 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_6x6 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_5x5 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_4x4 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_3x3 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_2x2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_1x1 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_9x9 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_10x10 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_11x11 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_12x12 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_13x13 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_14x14 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_15x15 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_16x16 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_16x8 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_14x7 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_12x6 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_10x5 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_8x4 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_6x3 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_4x2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_2x1 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_8x16 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_7x14 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_6x12 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_5x10 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_4x8 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_3x6 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_2x4 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); +EXTERN(void) jpeg_idct_1x2 + JPP((j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, JSAMPARRAY output_buf, JDIMENSION output_col)); + + +/* + * Macros for handling fixed-point arithmetic; these are used by many + * but not all of the DCT/IDCT modules. + * + * All values are expected to be of type INT32. + * Fractional constants are scaled left by CONST_BITS bits. + * CONST_BITS is defined within each module using these macros, + * and may differ from one module to the next. + */ + +#define ONE ((INT32) 1) +#define CONST_SCALE (ONE << CONST_BITS) + +/* Convert a positive real constant to an integer scaled by CONST_SCALE. + * Caution: some C compilers fail to reduce "FIX(constant)" at compile time, + * thus causing a lot of useless floating-point operations at run time. + */ + +#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) + +/* Descale and correctly round an INT32 value that's scaled by N bits. + * We assume RIGHT_SHIFT rounds towards minus infinity, so adding + * the fudge factor is correct for either sign of X. + */ + +#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n) + +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. + * This macro is used only when the two inputs will actually be no more than + * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a + * full 32x32 multiply. This provides a useful speedup on many machines. + * Unfortunately there is no way to specify a 16x16->32 multiply portably + * in C, but some C compilers will do the right thing if you provide the + * correct combination of casts. + */ + +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT16) (const))) +#endif +#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ +#define MULTIPLY16C16(var,const) (((INT16) (var)) * ((INT32) (const))) +#endif + +#ifndef MULTIPLY16C16 /* default definition */ +#define MULTIPLY16C16(var,const) ((var) * (const)) +#endif + +/* Same except both inputs are variables. */ + +#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ +#define MULTIPLY16V16(var1,var2) (((INT16) (var1)) * ((INT16) (var2))) +#endif + +#ifndef MULTIPLY16V16 /* default definition */ +#define MULTIPLY16V16(var1,var2) ((var1) * (var2)) +#endif diff --git a/code/jpeg-6b/jddctmgr.c b/code/jpeg-8c/jddctmgr.c similarity index 63% rename from code/jpeg-6b/jddctmgr.c rename to code/jpeg-8c/jddctmgr.c index bbf8d0e9..0ded9d57 100644 --- a/code/jpeg-6b/jddctmgr.c +++ b/code/jpeg-8c/jddctmgr.c @@ -2,6 +2,7 @@ * jddctmgr.c * * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2002-2010 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -98,22 +99,134 @@ start_pass (j_decompress_ptr cinfo) for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* Select the proper IDCT routine for this component's scaling */ - switch (compptr->DCT_scaled_size) { + switch ((compptr->DCT_h_scaled_size << 8) + compptr->DCT_v_scaled_size) { #ifdef IDCT_SCALING_SUPPORTED - case 1: + case ((1 << 8) + 1): method_ptr = jpeg_idct_1x1; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctint uses islow-style table */ break; - case 2: + case ((2 << 8) + 2): method_ptr = jpeg_idct_2x2; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctint uses islow-style table */ break; - case 4: + case ((3 << 8) + 3): + method_ptr = jpeg_idct_3x3; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((4 << 8) + 4): method_ptr = jpeg_idct_4x4; - method = JDCT_ISLOW; /* jidctred uses islow-style table */ + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((5 << 8) + 5): + method_ptr = jpeg_idct_5x5; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((6 << 8) + 6): + method_ptr = jpeg_idct_6x6; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((7 << 8) + 7): + method_ptr = jpeg_idct_7x7; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((9 << 8) + 9): + method_ptr = jpeg_idct_9x9; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((10 << 8) + 10): + method_ptr = jpeg_idct_10x10; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((11 << 8) + 11): + method_ptr = jpeg_idct_11x11; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((12 << 8) + 12): + method_ptr = jpeg_idct_12x12; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((13 << 8) + 13): + method_ptr = jpeg_idct_13x13; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((14 << 8) + 14): + method_ptr = jpeg_idct_14x14; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((15 << 8) + 15): + method_ptr = jpeg_idct_15x15; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((16 << 8) + 16): + method_ptr = jpeg_idct_16x16; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((16 << 8) + 8): + method_ptr = jpeg_idct_16x8; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((14 << 8) + 7): + method_ptr = jpeg_idct_14x7; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((12 << 8) + 6): + method_ptr = jpeg_idct_12x6; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((10 << 8) + 5): + method_ptr = jpeg_idct_10x5; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((8 << 8) + 4): + method_ptr = jpeg_idct_8x4; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((6 << 8) + 3): + method_ptr = jpeg_idct_6x3; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((4 << 8) + 2): + method_ptr = jpeg_idct_4x2; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((2 << 8) + 1): + method_ptr = jpeg_idct_2x1; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((8 << 8) + 16): + method_ptr = jpeg_idct_8x16; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((7 << 8) + 14): + method_ptr = jpeg_idct_7x14; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((6 << 8) + 12): + method_ptr = jpeg_idct_6x12; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((5 << 8) + 10): + method_ptr = jpeg_idct_5x10; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((4 << 8) + 8): + method_ptr = jpeg_idct_4x8; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((3 << 8) + 6): + method_ptr = jpeg_idct_3x6; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((2 << 8) + 4): + method_ptr = jpeg_idct_2x4; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ + break; + case ((1 << 8) + 2): + method_ptr = jpeg_idct_1x2; + method = JDCT_ISLOW; /* jidctint uses islow-style table */ break; #endif - case DCTSIZE: + case ((DCTSIZE << 8) + DCTSIZE): switch (cinfo->dct_method) { #ifdef DCT_ISLOW_SUPPORTED case JDCT_ISLOW: @@ -139,7 +252,8 @@ start_pass (j_decompress_ptr cinfo) } break; default: - ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->DCT_scaled_size); + ERREXIT2(cinfo, JERR_BAD_DCTSIZE, + compptr->DCT_h_scaled_size, compptr->DCT_v_scaled_size); break; } idct->pub.inverse_DCT[ci] = method_ptr; @@ -211,6 +325,7 @@ start_pass (j_decompress_ptr cinfo) * coefficients scaled by scalefactor[row]*scalefactor[col], where * scalefactor[0] = 1 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 + * We apply a further scale factor of 1/8. */ FLOAT_MULT_TYPE * fmtbl = (FLOAT_MULT_TYPE *) compptr->dct_table; int row, col; @@ -224,7 +339,7 @@ start_pass (j_decompress_ptr cinfo) for (col = 0; col < DCTSIZE; col++) { fmtbl[i] = (FLOAT_MULT_TYPE) ((double) qtbl->quantval[i] * - aanscalefactor[row] * aanscalefactor[col]); + aanscalefactor[row] * aanscalefactor[col] * 0.125); i++; } } diff --git a/code/jpeg-8c/jdhuff.c b/code/jpeg-8c/jdhuff.c new file mode 100644 index 00000000..06f92fe4 --- /dev/null +++ b/code/jpeg-8c/jdhuff.c @@ -0,0 +1,1541 @@ +/* + * jdhuff.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2006-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains Huffman entropy decoding routines. + * Both sequential and progressive modes are supported in this single module. + * + * Much of the complexity here has to do with supporting input suspension. + * If the data source module demands suspension, we want to be able to back + * up to the start of the current MCU. To do this, we copy state variables + * into local working storage, and update them back to the permanent + * storage only upon successful completion of an MCU. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + + +/* Derived data constructed for each Huffman table */ + +#define HUFF_LOOKAHEAD 8 /* # of bits of lookahead */ + +typedef struct { + /* Basic tables: (element [0] of each array is unused) */ + INT32 maxcode[18]; /* largest code of length k (-1 if none) */ + /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */ + INT32 valoffset[17]; /* huffval[] offset for codes of length k */ + /* valoffset[k] = huffval[] index of 1st symbol of code length k, less + * the smallest code of length k; so given a code of length k, the + * corresponding symbol is huffval[code + valoffset[k]] + */ + + /* Link to public Huffman table (needed only in jpeg_huff_decode) */ + JHUFF_TBL *pub; + + /* Lookahead tables: indexed by the next HUFF_LOOKAHEAD bits of + * the input data stream. If the next Huffman code is no more + * than HUFF_LOOKAHEAD bits long, we can obtain its length and + * the corresponding symbol directly from these tables. + */ + int look_nbits[1< 32 bits on your machine, and shifting/masking longs is + * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE + * appropriately should be a win. Unfortunately we can't define the size + * with something like #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8) + * because not all machines measure sizeof in 8-bit bytes. + */ + +typedef struct { /* Bitreading state saved across MCUs */ + bit_buf_type get_buffer; /* current bit-extraction buffer */ + int bits_left; /* # of unused bits in it */ +} bitread_perm_state; + +typedef struct { /* Bitreading working state within an MCU */ + /* Current data source location */ + /* We need a copy, rather than munging the original, in case of suspension */ + const JOCTET * next_input_byte; /* => next byte to read from source */ + size_t bytes_in_buffer; /* # of bytes remaining in source buffer */ + /* Bit input buffer --- note these values are kept in register variables, + * not in this struct, inside the inner loops. + */ + bit_buf_type get_buffer; /* current bit-extraction buffer */ + int bits_left; /* # of unused bits in it */ + /* Pointer needed by jpeg_fill_bit_buffer. */ + j_decompress_ptr cinfo; /* back link to decompress master record */ +} bitread_working_state; + +/* Macros to declare and load/save bitread local variables. */ +#define BITREAD_STATE_VARS \ + register bit_buf_type get_buffer; \ + register int bits_left; \ + bitread_working_state br_state + +#define BITREAD_LOAD_STATE(cinfop,permstate) \ + br_state.cinfo = cinfop; \ + br_state.next_input_byte = cinfop->src->next_input_byte; \ + br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \ + get_buffer = permstate.get_buffer; \ + bits_left = permstate.bits_left; + +#define BITREAD_SAVE_STATE(cinfop,permstate) \ + cinfop->src->next_input_byte = br_state.next_input_byte; \ + cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \ + permstate.get_buffer = get_buffer; \ + permstate.bits_left = bits_left + +/* + * These macros provide the in-line portion of bit fetching. + * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer + * before using GET_BITS, PEEK_BITS, or DROP_BITS. + * The variables get_buffer and bits_left are assumed to be locals, + * but the state struct might not be (jpeg_huff_decode needs this). + * CHECK_BIT_BUFFER(state,n,action); + * Ensure there are N bits in get_buffer; if suspend, take action. + * val = GET_BITS(n); + * Fetch next N bits. + * val = PEEK_BITS(n); + * Fetch next N bits without removing them from the buffer. + * DROP_BITS(n); + * Discard next N bits. + * The value N should be a simple variable, not an expression, because it + * is evaluated multiple times. + */ + +#define CHECK_BIT_BUFFER(state,nbits,action) \ + { if (bits_left < (nbits)) { \ + if (! jpeg_fill_bit_buffer(&(state),get_buffer,bits_left,nbits)) \ + { action; } \ + get_buffer = (state).get_buffer; bits_left = (state).bits_left; } } + +#define GET_BITS(nbits) \ + (((int) (get_buffer >> (bits_left -= (nbits)))) & BIT_MASK(nbits)) + +#define PEEK_BITS(nbits) \ + (((int) (get_buffer >> (bits_left - (nbits)))) & BIT_MASK(nbits)) + +#define DROP_BITS(nbits) \ + (bits_left -= (nbits)) + + +/* + * Code for extracting next Huffman-coded symbol from input bit stream. + * Again, this is time-critical and we make the main paths be macros. + * + * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits + * without looping. Usually, more than 95% of the Huffman codes will be 8 + * or fewer bits long. The few overlength codes are handled with a loop, + * which need not be inline code. + * + * Notes about the HUFF_DECODE macro: + * 1. Near the end of the data segment, we may fail to get enough bits + * for a lookahead. In that case, we do it the hard way. + * 2. If the lookahead table contains no entry, the next code must be + * more than HUFF_LOOKAHEAD bits long. + * 3. jpeg_huff_decode returns -1 if forced to suspend. + */ + +#define HUFF_DECODE(result,state,htbl,failaction,slowlabel) \ +{ register int nb, look; \ + if (bits_left < HUFF_LOOKAHEAD) { \ + if (! jpeg_fill_bit_buffer(&state,get_buffer,bits_left, 0)) {failaction;} \ + get_buffer = state.get_buffer; bits_left = state.bits_left; \ + if (bits_left < HUFF_LOOKAHEAD) { \ + nb = 1; goto slowlabel; \ + } \ + } \ + look = PEEK_BITS(HUFF_LOOKAHEAD); \ + if ((nb = htbl->look_nbits[look]) != 0) { \ + DROP_BITS(nb); \ + result = htbl->look_sym[look]; \ + } else { \ + nb = HUFF_LOOKAHEAD+1; \ +slowlabel: \ + if ((result=jpeg_huff_decode(&state,get_buffer,bits_left,htbl,nb)) < 0) \ + { failaction; } \ + get_buffer = state.get_buffer; bits_left = state.bits_left; \ + } \ +} + + +/* + * Expanded entropy decoder object for Huffman decoding. + * + * The savable_state subrecord contains fields that change within an MCU, + * but must not be updated permanently until we complete the MCU. + */ + +typedef struct { + unsigned int EOBRUN; /* remaining EOBs in EOBRUN */ + int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ +} savable_state; + +/* This macro is to work around compilers with missing or broken + * structure assignment. You'll need to fix this code if you have + * such a compiler and you change MAX_COMPS_IN_SCAN. + */ + +#ifndef NO_STRUCT_ASSIGN +#define ASSIGN_STATE(dest,src) ((dest) = (src)) +#else +#if MAX_COMPS_IN_SCAN == 4 +#define ASSIGN_STATE(dest,src) \ + ((dest).EOBRUN = (src).EOBRUN, \ + (dest).last_dc_val[0] = (src).last_dc_val[0], \ + (dest).last_dc_val[1] = (src).last_dc_val[1], \ + (dest).last_dc_val[2] = (src).last_dc_val[2], \ + (dest).last_dc_val[3] = (src).last_dc_val[3]) +#endif +#endif + + +typedef struct { + struct jpeg_entropy_decoder pub; /* public fields */ + + /* These fields are loaded into local variables at start of each MCU. + * In case of suspension, we exit WITHOUT updating them. + */ + bitread_perm_state bitstate; /* Bit buffer at start of MCU */ + savable_state saved; /* Other state at start of MCU */ + + /* These fields are NOT loaded into local working state. */ + boolean insufficient_data; /* set TRUE after emitting warning */ + unsigned int restarts_to_go; /* MCUs left in this restart interval */ + + /* Following two fields used only in progressive mode */ + + /* Pointers to derived tables (these workspaces have image lifespan) */ + d_derived_tbl * derived_tbls[NUM_HUFF_TBLS]; + + d_derived_tbl * ac_derived_tbl; /* active table during an AC scan */ + + /* Following fields used only in sequential mode */ + + /* Pointers to derived tables (these workspaces have image lifespan) */ + d_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; + d_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; + + /* Precalculated info set up by start_pass for use in decode_mcu: */ + + /* Pointers to derived tables to be used for each block within an MCU */ + d_derived_tbl * dc_cur_tbls[D_MAX_BLOCKS_IN_MCU]; + d_derived_tbl * ac_cur_tbls[D_MAX_BLOCKS_IN_MCU]; + /* Whether we care about the DC and AC coefficient values for each block */ + int coef_limit[D_MAX_BLOCKS_IN_MCU]; +} huff_entropy_decoder; + +typedef huff_entropy_decoder * huff_entropy_ptr; + + +static const int jpeg_zigzag_order[8][8] = { + { 0, 1, 5, 6, 14, 15, 27, 28 }, + { 2, 4, 7, 13, 16, 26, 29, 42 }, + { 3, 8, 12, 17, 25, 30, 41, 43 }, + { 9, 11, 18, 24, 31, 40, 44, 53 }, + { 10, 19, 23, 32, 39, 45, 52, 54 }, + { 20, 22, 33, 38, 46, 51, 55, 60 }, + { 21, 34, 37, 47, 50, 56, 59, 61 }, + { 35, 36, 48, 49, 57, 58, 62, 63 } +}; + +static const int jpeg_zigzag_order7[7][7] = { + { 0, 1, 5, 6, 14, 15, 27 }, + { 2, 4, 7, 13, 16, 26, 28 }, + { 3, 8, 12, 17, 25, 29, 38 }, + { 9, 11, 18, 24, 30, 37, 39 }, + { 10, 19, 23, 31, 36, 40, 45 }, + { 20, 22, 32, 35, 41, 44, 46 }, + { 21, 33, 34, 42, 43, 47, 48 } +}; + +static const int jpeg_zigzag_order6[6][6] = { + { 0, 1, 5, 6, 14, 15 }, + { 2, 4, 7, 13, 16, 25 }, + { 3, 8, 12, 17, 24, 26 }, + { 9, 11, 18, 23, 27, 32 }, + { 10, 19, 22, 28, 31, 33 }, + { 20, 21, 29, 30, 34, 35 } +}; + +static const int jpeg_zigzag_order5[5][5] = { + { 0, 1, 5, 6, 14 }, + { 2, 4, 7, 13, 15 }, + { 3, 8, 12, 16, 21 }, + { 9, 11, 17, 20, 22 }, + { 10, 18, 19, 23, 24 } +}; + +static const int jpeg_zigzag_order4[4][4] = { + { 0, 1, 5, 6 }, + { 2, 4, 7, 12 }, + { 3, 8, 11, 13 }, + { 9, 10, 14, 15 } +}; + +static const int jpeg_zigzag_order3[3][3] = { + { 0, 1, 5 }, + { 2, 4, 6 }, + { 3, 7, 8 } +}; + +static const int jpeg_zigzag_order2[2][2] = { + { 0, 1 }, + { 2, 3 } +}; + + +/* + * Compute the derived values for a Huffman table. + * This routine also performs some validation checks on the table. + */ + +LOCAL(void) +jpeg_make_d_derived_tbl (j_decompress_ptr cinfo, boolean isDC, int tblno, + d_derived_tbl ** pdtbl) +{ + JHUFF_TBL *htbl; + d_derived_tbl *dtbl; + int p, i, l, si, numsymbols; + int lookbits, ctr; + char huffsize[257]; + unsigned int huffcode[257]; + unsigned int code; + + /* Note that huffsize[] and huffcode[] are filled in code-length order, + * paralleling the order of the symbols themselves in htbl->huffval[]. + */ + + /* Find the input Huffman table */ + if (tblno < 0 || tblno >= NUM_HUFF_TBLS) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); + htbl = + isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; + if (htbl == NULL) + ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); + + /* Allocate a workspace if we haven't already done so. */ + if (*pdtbl == NULL) + *pdtbl = (d_derived_tbl *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(d_derived_tbl)); + dtbl = *pdtbl; + dtbl->pub = htbl; /* fill in back link */ + + /* Figure C.1: make table of Huffman code length for each symbol */ + + p = 0; + for (l = 1; l <= 16; l++) { + i = (int) htbl->bits[l]; + if (i < 0 || p + i > 256) /* protect against table overrun */ + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + while (i--) + huffsize[p++] = (char) l; + } + huffsize[p] = 0; + numsymbols = p; + + /* Figure C.2: generate the codes themselves */ + /* We also validate that the counts represent a legal Huffman code tree. */ + + code = 0; + si = huffsize[0]; + p = 0; + while (huffsize[p]) { + while (((int) huffsize[p]) == si) { + huffcode[p++] = code; + code++; + } + /* code is now 1 more than the last code used for codelength si; but + * it must still fit in si bits, since no code is allowed to be all ones. + */ + if (((INT32) code) >= (((INT32) 1) << si)) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + code <<= 1; + si++; + } + + /* Figure F.15: generate decoding tables for bit-sequential decoding */ + + p = 0; + for (l = 1; l <= 16; l++) { + if (htbl->bits[l]) { + /* valoffset[l] = huffval[] index of 1st symbol of code length l, + * minus the minimum code of length l + */ + dtbl->valoffset[l] = (INT32) p - (INT32) huffcode[p]; + p += htbl->bits[l]; + dtbl->maxcode[l] = huffcode[p-1]; /* maximum code of length l */ + } else { + dtbl->maxcode[l] = -1; /* -1 if no codes of this length */ + } + } + dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */ + + /* Compute lookahead tables to speed up decoding. + * First we set all the table entries to 0, indicating "too long"; + * then we iterate through the Huffman codes that are short enough and + * fill in all the entries that correspond to bit sequences starting + * with that code. + */ + + MEMZERO(dtbl->look_nbits, SIZEOF(dtbl->look_nbits)); + + p = 0; + for (l = 1; l <= HUFF_LOOKAHEAD; l++) { + for (i = 1; i <= (int) htbl->bits[l]; i++, p++) { + /* l = current code's length, p = its index in huffcode[] & huffval[]. */ + /* Generate left-justified code followed by all possible bit sequences */ + lookbits = huffcode[p] << (HUFF_LOOKAHEAD-l); + for (ctr = 1 << (HUFF_LOOKAHEAD-l); ctr > 0; ctr--) { + dtbl->look_nbits[lookbits] = l; + dtbl->look_sym[lookbits] = htbl->huffval[p]; + lookbits++; + } + } + } + + /* Validate symbols as being reasonable. + * For AC tables, we make no check, but accept all byte values 0..255. + * For DC tables, we require the symbols to be in range 0..15. + * (Tighter bounds could be applied depending on the data depth and mode, + * but this is sufficient to ensure safe decoding.) + */ + if (isDC) { + for (i = 0; i < numsymbols; i++) { + int sym = htbl->huffval[i]; + if (sym < 0 || sym > 15) + ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); + } + } +} + + +/* + * Out-of-line code for bit fetching. + * Note: current values of get_buffer and bits_left are passed as parameters, + * but are returned in the corresponding fields of the state struct. + * + * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width + * of get_buffer to be used. (On machines with wider words, an even larger + * buffer could be used.) However, on some machines 32-bit shifts are + * quite slow and take time proportional to the number of places shifted. + * (This is true with most PC compilers, for instance.) In this case it may + * be a win to set MIN_GET_BITS to the minimum value of 15. This reduces the + * average shift distance at the cost of more calls to jpeg_fill_bit_buffer. + */ + +#ifdef SLOW_SHIFT_32 +#define MIN_GET_BITS 15 /* minimum allowable value */ +#else +#define MIN_GET_BITS (BIT_BUF_SIZE-7) +#endif + + +LOCAL(boolean) +jpeg_fill_bit_buffer (bitread_working_state * state, + register bit_buf_type get_buffer, register int bits_left, + int nbits) +/* Load up the bit buffer to a depth of at least nbits */ +{ + /* Copy heavily used state fields into locals (hopefully registers) */ + register const JOCTET * next_input_byte = state->next_input_byte; + register size_t bytes_in_buffer = state->bytes_in_buffer; + j_decompress_ptr cinfo = state->cinfo; + + /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */ + /* (It is assumed that no request will be for more than that many bits.) */ + /* We fail to do so only if we hit a marker or are forced to suspend. */ + + if (cinfo->unread_marker == 0) { /* cannot advance past a marker */ + while (bits_left < MIN_GET_BITS) { + register int c; + + /* Attempt to read a byte */ + if (bytes_in_buffer == 0) { + if (! (*cinfo->src->fill_input_buffer) (cinfo)) + return FALSE; + next_input_byte = cinfo->src->next_input_byte; + bytes_in_buffer = cinfo->src->bytes_in_buffer; + } + bytes_in_buffer--; + c = GETJOCTET(*next_input_byte++); + + /* If it's 0xFF, check and discard stuffed zero byte */ + if (c == 0xFF) { + /* Loop here to discard any padding FF's on terminating marker, + * so that we can save a valid unread_marker value. NOTE: we will + * accept multiple FF's followed by a 0 as meaning a single FF data + * byte. This data pattern is not valid according to the standard. + */ + do { + if (bytes_in_buffer == 0) { + if (! (*cinfo->src->fill_input_buffer) (cinfo)) + return FALSE; + next_input_byte = cinfo->src->next_input_byte; + bytes_in_buffer = cinfo->src->bytes_in_buffer; + } + bytes_in_buffer--; + c = GETJOCTET(*next_input_byte++); + } while (c == 0xFF); + + if (c == 0) { + /* Found FF/00, which represents an FF data byte */ + c = 0xFF; + } else { + /* Oops, it's actually a marker indicating end of compressed data. + * Save the marker code for later use. + * Fine point: it might appear that we should save the marker into + * bitread working state, not straight into permanent state. But + * once we have hit a marker, we cannot need to suspend within the + * current MCU, because we will read no more bytes from the data + * source. So it is OK to update permanent state right away. + */ + cinfo->unread_marker = c; + /* See if we need to insert some fake zero bits. */ + goto no_more_bytes; + } + } + + /* OK, load c into get_buffer */ + get_buffer = (get_buffer << 8) | c; + bits_left += 8; + } /* end while */ + } else { + no_more_bytes: + /* We get here if we've read the marker that terminates the compressed + * data segment. There should be enough bits in the buffer register + * to satisfy the request; if so, no problem. + */ + if (nbits > bits_left) { + /* Uh-oh. Report corrupted data to user and stuff zeroes into + * the data stream, so that we can produce some kind of image. + * We use a nonvolatile flag to ensure that only one warning message + * appears per data segment. + */ + if (! ((huff_entropy_ptr) cinfo->entropy)->insufficient_data) { + WARNMS(cinfo, JWRN_HIT_MARKER); + ((huff_entropy_ptr) cinfo->entropy)->insufficient_data = TRUE; + } + /* Fill the buffer with zero bits */ + get_buffer <<= MIN_GET_BITS - bits_left; + bits_left = MIN_GET_BITS; + } + } + + /* Unload the local registers */ + state->next_input_byte = next_input_byte; + state->bytes_in_buffer = bytes_in_buffer; + state->get_buffer = get_buffer; + state->bits_left = bits_left; + + return TRUE; +} + + +/* + * Figure F.12: extend sign bit. + * On some machines, a shift and sub will be faster than a table lookup. + */ + +#ifdef AVOID_TABLES + +#define BIT_MASK(nbits) ((1<<(nbits))-1) +#define HUFF_EXTEND(x,s) ((x) < (1<<((s)-1)) ? (x) - ((1<<(s))-1) : (x)) + +#else + +#define BIT_MASK(nbits) bmask[nbits] +#define HUFF_EXTEND(x,s) ((x) <= bmask[(s) - 1] ? (x) - bmask[s] : (x)) + +static const int bmask[16] = /* bmask[n] is mask for n rightmost bits */ + { 0, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, + 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF }; + +#endif /* AVOID_TABLES */ + + +/* + * Out-of-line code for Huffman code decoding. + */ + +LOCAL(int) +jpeg_huff_decode (bitread_working_state * state, + register bit_buf_type get_buffer, register int bits_left, + d_derived_tbl * htbl, int min_bits) +{ + register int l = min_bits; + register INT32 code; + + /* HUFF_DECODE has determined that the code is at least min_bits */ + /* bits long, so fetch that many bits in one swoop. */ + + CHECK_BIT_BUFFER(*state, l, return -1); + code = GET_BITS(l); + + /* Collect the rest of the Huffman code one bit at a time. */ + /* This is per Figure F.16 in the JPEG spec. */ + + while (code > htbl->maxcode[l]) { + code <<= 1; + CHECK_BIT_BUFFER(*state, 1, return -1); + code |= GET_BITS(1); + l++; + } + + /* Unload the local registers */ + state->get_buffer = get_buffer; + state->bits_left = bits_left; + + /* With garbage input we may reach the sentinel value l = 17. */ + + if (l > 16) { + WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE); + return 0; /* fake a zero as the safest result */ + } + + return htbl->pub->huffval[ (int) (code + htbl->valoffset[l]) ]; +} + + +/* + * Check for a restart marker & resynchronize decoder. + * Returns FALSE if must suspend. + */ + +LOCAL(boolean) +process_restart (j_decompress_ptr cinfo) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int ci; + + /* Throw away any unused bits remaining in bit buffer; */ + /* include any full bytes in next_marker's count of discarded bytes */ + cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8; + entropy->bitstate.bits_left = 0; + + /* Advance past the RSTn marker */ + if (! (*cinfo->marker->read_restart_marker) (cinfo)) + return FALSE; + + /* Re-initialize DC predictions to 0 */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) + entropy->saved.last_dc_val[ci] = 0; + /* Re-init EOB run count, too */ + entropy->saved.EOBRUN = 0; + + /* Reset restart counter */ + entropy->restarts_to_go = cinfo->restart_interval; + + /* Reset out-of-data flag, unless read_restart_marker left us smack up + * against a marker. In that case we will end up treating the next data + * segment as empty, and we can avoid producing bogus output pixels by + * leaving the flag set. + */ + if (cinfo->unread_marker == 0) + entropy->insufficient_data = FALSE; + + return TRUE; +} + + +/* + * Huffman MCU decoding. + * Each of these routines decodes and returns one MCU's worth of + * Huffman-compressed coefficients. + * The coefficients are reordered from zigzag order into natural array order, + * but are not dequantized. + * + * The i'th block of the MCU is stored into the block pointed to by + * MCU_data[i]. WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER. + * (Wholesale zeroing is usually a little faster than retail...) + * + * We return FALSE if data source requested suspension. In that case no + * changes have been made to permanent state. (Exception: some output + * coefficients may already have been assigned. This is harmless for + * spectral selection, since we'll just re-assign them on the next call. + * Successive approximation AC refinement has to be more careful, however.) + */ + +/* + * MCU decoding for DC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +decode_mcu_DC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int Al = cinfo->Al; + register int s, r; + int blkn, ci; + JBLOCKROW block; + BITREAD_STATE_VARS; + savable_state state; + d_derived_tbl * tbl; + jpeg_component_info * compptr; + + /* Process restart marker if needed; may have to suspend */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + if (! process_restart(cinfo)) + return FALSE; + } + + /* If we've run out of data, just leave the MCU set to zeroes. + * This way, we return uniform gray for the remainder of the segment. + */ + if (! entropy->insufficient_data) { + + /* Load up working state */ + BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + ASSIGN_STATE(state, entropy->saved); + + /* Outer loop handles each block in the MCU */ + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + tbl = entropy->derived_tbls[compptr->dc_tbl_no]; + + /* Decode a single block's worth of coefficients */ + + /* Section F.2.2.1: decode the DC coefficient difference */ + HUFF_DECODE(s, br_state, tbl, return FALSE, label1); + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + } + + /* Convert DC difference to actual value, update last_dc_val */ + s += state.last_dc_val[ci]; + state.last_dc_val[ci] = s; + /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */ + (*block)[0] = (JCOEF) (s << Al); + } + + /* Completed MCU, so update state */ + BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + ASSIGN_STATE(entropy->saved, state); + } + + /* Account for restart interval (no-op if not using restarts) */ + entropy->restarts_to_go--; + + return TRUE; +} + + +/* + * MCU decoding for AC initial scan (either spectral selection, + * or first pass of successive approximation). + */ + +METHODDEF(boolean) +decode_mcu_AC_first (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + register int s, k, r; + unsigned int EOBRUN; + int Se, Al; + const int * natural_order; + JBLOCKROW block; + BITREAD_STATE_VARS; + d_derived_tbl * tbl; + + /* Process restart marker if needed; may have to suspend */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + if (! process_restart(cinfo)) + return FALSE; + } + + /* If we've run out of data, just leave the MCU set to zeroes. + * This way, we return uniform gray for the remainder of the segment. + */ + if (! entropy->insufficient_data) { + + Se = cinfo->Se; + Al = cinfo->Al; + natural_order = cinfo->natural_order; + + /* Load up working state. + * We can avoid loading/saving bitread state if in an EOB run. + */ + EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ + + /* There is always only one block per MCU */ + + if (EOBRUN > 0) /* if it's a band of zeroes... */ + EOBRUN--; /* ...process it now (we do nothing) */ + else { + BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + block = MCU_data[0]; + tbl = entropy->ac_derived_tbl; + + for (k = cinfo->Ss; k <= Se; k++) { + HUFF_DECODE(s, br_state, tbl, return FALSE, label2); + r = s >> 4; + s &= 15; + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + /* Scale and output coefficient in natural (dezigzagged) order */ + (*block)[natural_order[k]] = (JCOEF) (s << Al); + } else { + if (r == 15) { /* ZRL */ + k += 15; /* skip 15 zeroes in band */ + } else { /* EOBr, run length is 2^r + appended bits */ + EOBRUN = 1 << r; + if (r) { /* EOBr, r > 0 */ + CHECK_BIT_BUFFER(br_state, r, return FALSE); + r = GET_BITS(r); + EOBRUN += r; + } + EOBRUN--; /* this band is processed at this moment */ + break; /* force end-of-band */ + } + } + } + + BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + } + + /* Completed MCU, so update state */ + entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ + } + + /* Account for restart interval (no-op if not using restarts) */ + entropy->restarts_to_go--; + + return TRUE; +} + + +/* + * MCU decoding for DC successive approximation refinement scan. + * Note: we assume such scans can be multi-component, although the spec + * is not very clear on the point. + */ + +METHODDEF(boolean) +decode_mcu_DC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + int blkn; + JBLOCKROW block; + BITREAD_STATE_VARS; + + /* Process restart marker if needed; may have to suspend */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + if (! process_restart(cinfo)) + return FALSE; + } + + /* Not worth the cycles to check insufficient_data here, + * since we will not change the data anyway if we read zeroes. + */ + + /* Load up working state */ + BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + + /* Outer loop handles each block in the MCU */ + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + block = MCU_data[blkn]; + + /* Encoded data is simply the next bit of the two's-complement DC value */ + CHECK_BIT_BUFFER(br_state, 1, return FALSE); + if (GET_BITS(1)) + (*block)[0] |= p1; + /* Note: since we use |=, repeating the assignment later is safe */ + } + + /* Completed MCU, so update state */ + BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + + /* Account for restart interval (no-op if not using restarts) */ + entropy->restarts_to_go--; + + return TRUE; +} + + +/* + * MCU decoding for AC successive approximation refinement scan. + */ + +METHODDEF(boolean) +decode_mcu_AC_refine (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + register int s, k, r; + unsigned int EOBRUN; + int Se, p1, m1; + const int * natural_order; + JBLOCKROW block; + JCOEFPTR thiscoef; + BITREAD_STATE_VARS; + d_derived_tbl * tbl; + int num_newnz; + int newnz_pos[DCTSIZE2]; + + /* Process restart marker if needed; may have to suspend */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + if (! process_restart(cinfo)) + return FALSE; + } + + /* If we've run out of data, don't modify the MCU. + */ + if (! entropy->insufficient_data) { + + Se = cinfo->Se; + p1 = 1 << cinfo->Al; /* 1 in the bit position being coded */ + m1 = (-1) << cinfo->Al; /* -1 in the bit position being coded */ + natural_order = cinfo->natural_order; + + /* Load up working state */ + BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */ + + /* There is always only one block per MCU */ + block = MCU_data[0]; + tbl = entropy->ac_derived_tbl; + + /* If we are forced to suspend, we must undo the assignments to any newly + * nonzero coefficients in the block, because otherwise we'd get confused + * next time about which coefficients were already nonzero. + * But we need not undo addition of bits to already-nonzero coefficients; + * instead, we can test the current bit to see if we already did it. + */ + num_newnz = 0; + + /* initialize coefficient loop counter to start of band */ + k = cinfo->Ss; + + if (EOBRUN == 0) { + for (; k <= Se; k++) { + HUFF_DECODE(s, br_state, tbl, goto undoit, label3); + r = s >> 4; + s &= 15; + if (s) { + if (s != 1) /* size of new coef should always be 1 */ + WARNMS(cinfo, JWRN_HUFF_BAD_CODE); + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) + s = p1; /* newly nonzero coef is positive */ + else + s = m1; /* newly nonzero coef is negative */ + } else { + if (r != 15) { + EOBRUN = 1 << r; /* EOBr, run length is 2^r + appended bits */ + if (r) { + CHECK_BIT_BUFFER(br_state, r, goto undoit); + r = GET_BITS(r); + EOBRUN += r; + } + break; /* rest of block is handled by EOB logic */ + } + /* note s = 0 for processing ZRL */ + } + /* Advance over already-nonzero coefs and r still-zero coefs, + * appending correction bits to the nonzeroes. A correction bit is 1 + * if the absolute value of the coefficient must be increased. + */ + do { + thiscoef = *block + natural_order[k]; + if (*thiscoef != 0) { + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) { + if ((*thiscoef & p1) == 0) { /* do nothing if already set it */ + if (*thiscoef >= 0) + *thiscoef += p1; + else + *thiscoef += m1; + } + } + } else { + if (--r < 0) + break; /* reached target zero coefficient */ + } + k++; + } while (k <= Se); + if (s) { + int pos = natural_order[k]; + /* Output newly nonzero coefficient */ + (*block)[pos] = (JCOEF) s; + /* Remember its position in case we have to suspend */ + newnz_pos[num_newnz++] = pos; + } + } + } + + if (EOBRUN > 0) { + /* Scan any remaining coefficient positions after the end-of-band + * (the last newly nonzero coefficient, if any). Append a correction + * bit to each already-nonzero coefficient. A correction bit is 1 + * if the absolute value of the coefficient must be increased. + */ + for (; k <= Se; k++) { + thiscoef = *block + natural_order[k]; + if (*thiscoef != 0) { + CHECK_BIT_BUFFER(br_state, 1, goto undoit); + if (GET_BITS(1)) { + if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */ + if (*thiscoef >= 0) + *thiscoef += p1; + else + *thiscoef += m1; + } + } + } + } + /* Count one block completed in EOB run */ + EOBRUN--; + } + + /* Completed MCU, so update state */ + BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */ + } + + /* Account for restart interval (no-op if not using restarts) */ + entropy->restarts_to_go--; + + return TRUE; + +undoit: + /* Re-zero any output coefficients that we made newly nonzero */ + while (num_newnz > 0) + (*block)[newnz_pos[--num_newnz]] = 0; + + return FALSE; +} + + +/* + * Decode one MCU's worth of Huffman-compressed coefficients, + * partial blocks. + */ + +METHODDEF(boolean) +decode_mcu_sub (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + const int * natural_order; + int Se, blkn; + BITREAD_STATE_VARS; + savable_state state; + + /* Process restart marker if needed; may have to suspend */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + if (! process_restart(cinfo)) + return FALSE; + } + + /* If we've run out of data, just leave the MCU set to zeroes. + * This way, we return uniform gray for the remainder of the segment. + */ + if (! entropy->insufficient_data) { + + natural_order = cinfo->natural_order; + Se = cinfo->lim_Se; + + /* Load up working state */ + BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + ASSIGN_STATE(state, entropy->saved); + + /* Outer loop handles each block in the MCU */ + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + JBLOCKROW block = MCU_data[blkn]; + d_derived_tbl * htbl; + register int s, k, r; + int coef_limit, ci; + + /* Decode a single block's worth of coefficients */ + + /* Section F.2.2.1: decode the DC coefficient difference */ + htbl = entropy->dc_cur_tbls[blkn]; + HUFF_DECODE(s, br_state, htbl, return FALSE, label1); + + htbl = entropy->ac_cur_tbls[blkn]; + k = 1; + coef_limit = entropy->coef_limit[blkn]; + if (coef_limit) { + /* Convert DC difference to actual value, update last_dc_val */ + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + } + ci = cinfo->MCU_membership[blkn]; + s += state.last_dc_val[ci]; + state.last_dc_val[ci] = s; + /* Output the DC coefficient */ + (*block)[0] = (JCOEF) s; + + /* Section F.2.2.2: decode the AC coefficients */ + /* Since zeroes are skipped, output area must be cleared beforehand */ + for (; k < coef_limit; k++) { + HUFF_DECODE(s, br_state, htbl, return FALSE, label2); + + r = s >> 4; + s &= 15; + + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + /* Output coefficient in natural (dezigzagged) order. + * Note: the extra entries in natural_order[] will save us + * if k > Se, which could happen if the data is corrupted. + */ + (*block)[natural_order[k]] = (JCOEF) s; + } else { + if (r != 15) + goto EndOfBlock; + k += 15; + } + } + } else { + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + DROP_BITS(s); + } + } + + /* Section F.2.2.2: decode the AC coefficients */ + /* In this path we just discard the values */ + for (; k <= Se; k++) { + HUFF_DECODE(s, br_state, htbl, return FALSE, label3); + + r = s >> 4; + s &= 15; + + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + DROP_BITS(s); + } else { + if (r != 15) + break; + k += 15; + } + } + + EndOfBlock: ; + } + + /* Completed MCU, so update state */ + BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + ASSIGN_STATE(entropy->saved, state); + } + + /* Account for restart interval (no-op if not using restarts) */ + entropy->restarts_to_go--; + + return TRUE; +} + + +/* + * Decode one MCU's worth of Huffman-compressed coefficients, + * full-size blocks. + */ + +METHODDEF(boolean) +decode_mcu (j_decompress_ptr cinfo, JBLOCKROW *MCU_data) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int blkn; + BITREAD_STATE_VARS; + savable_state state; + + /* Process restart marker if needed; may have to suspend */ + if (cinfo->restart_interval) { + if (entropy->restarts_to_go == 0) + if (! process_restart(cinfo)) + return FALSE; + } + + /* If we've run out of data, just leave the MCU set to zeroes. + * This way, we return uniform gray for the remainder of the segment. + */ + if (! entropy->insufficient_data) { + + /* Load up working state */ + BITREAD_LOAD_STATE(cinfo,entropy->bitstate); + ASSIGN_STATE(state, entropy->saved); + + /* Outer loop handles each block in the MCU */ + + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + JBLOCKROW block = MCU_data[blkn]; + d_derived_tbl * htbl; + register int s, k, r; + int coef_limit, ci; + + /* Decode a single block's worth of coefficients */ + + /* Section F.2.2.1: decode the DC coefficient difference */ + htbl = entropy->dc_cur_tbls[blkn]; + HUFF_DECODE(s, br_state, htbl, return FALSE, label1); + + htbl = entropy->ac_cur_tbls[blkn]; + k = 1; + coef_limit = entropy->coef_limit[blkn]; + if (coef_limit) { + /* Convert DC difference to actual value, update last_dc_val */ + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + } + ci = cinfo->MCU_membership[blkn]; + s += state.last_dc_val[ci]; + state.last_dc_val[ci] = s; + /* Output the DC coefficient */ + (*block)[0] = (JCOEF) s; + + /* Section F.2.2.2: decode the AC coefficients */ + /* Since zeroes are skipped, output area must be cleared beforehand */ + for (; k < coef_limit; k++) { + HUFF_DECODE(s, br_state, htbl, return FALSE, label2); + + r = s >> 4; + s &= 15; + + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + r = GET_BITS(s); + s = HUFF_EXTEND(r, s); + /* Output coefficient in natural (dezigzagged) order. + * Note: the extra entries in jpeg_natural_order[] will save us + * if k >= DCTSIZE2, which could happen if the data is corrupted. + */ + (*block)[jpeg_natural_order[k]] = (JCOEF) s; + } else { + if (r != 15) + goto EndOfBlock; + k += 15; + } + } + } else { + if (s) { + CHECK_BIT_BUFFER(br_state, s, return FALSE); + DROP_BITS(s); + } + } + + /* Section F.2.2.2: decode the AC coefficients */ + /* In this path we just discard the values */ + for (; k < DCTSIZE2; k++) { + HUFF_DECODE(s, br_state, htbl, return FALSE, label3); + + r = s >> 4; + s &= 15; + + if (s) { + k += r; + CHECK_BIT_BUFFER(br_state, s, return FALSE); + DROP_BITS(s); + } else { + if (r != 15) + break; + k += 15; + } + } + + EndOfBlock: ; + } + + /* Completed MCU, so update state */ + BITREAD_SAVE_STATE(cinfo,entropy->bitstate); + ASSIGN_STATE(entropy->saved, state); + } + + /* Account for restart interval (no-op if not using restarts) */ + entropy->restarts_to_go--; + + return TRUE; +} + + +/* + * Initialize for a Huffman-compressed scan. + */ + +METHODDEF(void) +start_pass_huff_decoder (j_decompress_ptr cinfo) +{ + huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; + int ci, blkn, tbl, i; + jpeg_component_info * compptr; + + if (cinfo->progressive_mode) { + /* Validate progressive scan parameters */ + if (cinfo->Ss == 0) { + if (cinfo->Se != 0) + goto bad; + } else { + /* need not check Ss/Se < 0 since they came from unsigned bytes */ + if (cinfo->Se < cinfo->Ss || cinfo->Se > cinfo->lim_Se) + goto bad; + /* AC scans may have only one component */ + if (cinfo->comps_in_scan != 1) + goto bad; + } + if (cinfo->Ah != 0) { + /* Successive approximation refinement scan: must have Al = Ah-1. */ + if (cinfo->Ah-1 != cinfo->Al) + goto bad; + } + if (cinfo->Al > 13) { /* need not check for < 0 */ + /* Arguably the maximum Al value should be less than 13 for 8-bit precision, + * but the spec doesn't say so, and we try to be liberal about what we + * accept. Note: large Al values could result in out-of-range DC + * coefficients during early scans, leading to bizarre displays due to + * overflows in the IDCT math. But we won't crash. + */ + bad: + ERREXIT4(cinfo, JERR_BAD_PROGRESSION, + cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); + } + /* Update progression status, and verify that scan order is legal. + * Note that inter-scan inconsistencies are treated as warnings + * not fatal errors ... not clear if this is right way to behave. + */ + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + int coefi, cindex = cinfo->cur_comp_info[ci]->component_index; + int *coef_bit_ptr = & cinfo->coef_bits[cindex][0]; + if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */ + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0); + for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) { + int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi]; + if (cinfo->Ah != expected) + WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi); + coef_bit_ptr[coefi] = cinfo->Al; + } + } + + /* Select MCU decoding routine */ + if (cinfo->Ah == 0) { + if (cinfo->Ss == 0) + entropy->pub.decode_mcu = decode_mcu_DC_first; + else + entropy->pub.decode_mcu = decode_mcu_AC_first; + } else { + if (cinfo->Ss == 0) + entropy->pub.decode_mcu = decode_mcu_DC_refine; + else + entropy->pub.decode_mcu = decode_mcu_AC_refine; + } + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* Make sure requested tables are present, and compute derived tables. + * We may build same derived table more than once, but it's not expensive. + */ + if (cinfo->Ss == 0) { + if (cinfo->Ah == 0) { /* DC refinement needs no table */ + tbl = compptr->dc_tbl_no; + jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, + & entropy->derived_tbls[tbl]); + } + } else { + tbl = compptr->ac_tbl_no; + jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, + & entropy->derived_tbls[tbl]); + /* remember the single active table */ + entropy->ac_derived_tbl = entropy->derived_tbls[tbl]; + } + /* Initialize DC predictions to 0 */ + entropy->saved.last_dc_val[ci] = 0; + } + + /* Initialize private state variables */ + entropy->saved.EOBRUN = 0; + } else { + /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG. + * This ought to be an error condition, but we make it a warning because + * there are some baseline files out there with all zeroes in these bytes. + */ + if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 || + ((cinfo->is_baseline || cinfo->Se < DCTSIZE2) && + cinfo->Se != cinfo->lim_Se)) + WARNMS(cinfo, JWRN_NOT_SEQUENTIAL); + + /* Select MCU decoding routine */ + /* We retain the hard-coded case for full-size blocks. + * This is not necessary, but it appears that this version is slightly + * more performant in the given implementation. + * With an improved implementation we would prefer a single optimized + * function. + */ + if (cinfo->lim_Se != DCTSIZE2-1) + entropy->pub.decode_mcu = decode_mcu_sub; + else + entropy->pub.decode_mcu = decode_mcu; + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* Compute derived values for Huffman tables */ + /* We may do this more than once for a table, but it's not expensive */ + tbl = compptr->dc_tbl_no; + jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, + & entropy->dc_derived_tbls[tbl]); + if (cinfo->lim_Se) { /* AC needs no table when not present */ + tbl = compptr->ac_tbl_no; + jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, + & entropy->ac_derived_tbls[tbl]); + } + /* Initialize DC predictions to 0 */ + entropy->saved.last_dc_val[ci] = 0; + } + + /* Precalculate decoding info for each block in an MCU of this scan */ + for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { + ci = cinfo->MCU_membership[blkn]; + compptr = cinfo->cur_comp_info[ci]; + /* Precalculate which table to use for each block */ + entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no]; + entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no]; + /* Decide whether we really care about the coefficient values */ + if (compptr->component_needed) { + ci = compptr->DCT_v_scaled_size; + i = compptr->DCT_h_scaled_size; + switch (cinfo->lim_Se) { + case (1*1-1): + entropy->coef_limit[blkn] = 1; + break; + case (2*2-1): + if (ci <= 0 || ci > 2) ci = 2; + if (i <= 0 || i > 2) i = 2; + entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order2[ci - 1][i - 1]; + break; + case (3*3-1): + if (ci <= 0 || ci > 3) ci = 3; + if (i <= 0 || i > 3) i = 3; + entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order3[ci - 1][i - 1]; + break; + case (4*4-1): + if (ci <= 0 || ci > 4) ci = 4; + if (i <= 0 || i > 4) i = 4; + entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order4[ci - 1][i - 1]; + break; + case (5*5-1): + if (ci <= 0 || ci > 5) ci = 5; + if (i <= 0 || i > 5) i = 5; + entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order5[ci - 1][i - 1]; + break; + case (6*6-1): + if (ci <= 0 || ci > 6) ci = 6; + if (i <= 0 || i > 6) i = 6; + entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order6[ci - 1][i - 1]; + break; + case (7*7-1): + if (ci <= 0 || ci > 7) ci = 7; + if (i <= 0 || i > 7) i = 7; + entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order7[ci - 1][i - 1]; + break; + default: + if (ci <= 0 || ci > 8) ci = 8; + if (i <= 0 || i > 8) i = 8; + entropy->coef_limit[blkn] = 1 + jpeg_zigzag_order[ci - 1][i - 1]; + break; + } + } else { + entropy->coef_limit[blkn] = 0; + } + } + } + + /* Initialize bitread state variables */ + entropy->bitstate.bits_left = 0; + entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */ + entropy->insufficient_data = FALSE; + + /* Initialize restart counter */ + entropy->restarts_to_go = cinfo->restart_interval; +} + + +/* + * Module initialization routine for Huffman entropy decoding. + */ + +GLOBAL(void) +jinit_huff_decoder (j_decompress_ptr cinfo) +{ + huff_entropy_ptr entropy; + int i; + + entropy = (huff_entropy_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(huff_entropy_decoder)); + cinfo->entropy = (struct jpeg_entropy_decoder *) entropy; + entropy->pub.start_pass = start_pass_huff_decoder; + + if (cinfo->progressive_mode) { + /* Create progression status table */ + int *coef_bit_ptr, ci; + cinfo->coef_bits = (int (*)[DCTSIZE2]) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + cinfo->num_components*DCTSIZE2*SIZEOF(int)); + coef_bit_ptr = & cinfo->coef_bits[0][0]; + for (ci = 0; ci < cinfo->num_components; ci++) + for (i = 0; i < DCTSIZE2; i++) + *coef_bit_ptr++ = -1; + + /* Mark derived tables unallocated */ + for (i = 0; i < NUM_HUFF_TBLS; i++) { + entropy->derived_tbls[i] = NULL; + } + } else { + /* Mark tables unallocated */ + for (i = 0; i < NUM_HUFF_TBLS; i++) { + entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL; + } + } +} diff --git a/code/jpeg-8c/jdinput.c b/code/jpeg-8c/jdinput.c new file mode 100644 index 00000000..2c5c717b --- /dev/null +++ b/code/jpeg-8c/jdinput.c @@ -0,0 +1,661 @@ +/* + * jdinput.c + * + * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2002-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains input control logic for the JPEG decompressor. + * These routines are concerned with controlling the decompressor's input + * processing (marker reading and coefficient decoding). The actual input + * reading is done in jdmarker.c, jdhuff.c, and jdarith.c. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" + + +/* Private state */ + +typedef struct { + struct jpeg_input_controller pub; /* public fields */ + + int inheaders; /* Nonzero until first SOS is reached */ +} my_input_controller; + +typedef my_input_controller * my_inputctl_ptr; + + +/* Forward declarations */ +METHODDEF(int) consume_markers JPP((j_decompress_ptr cinfo)); + + +/* + * Routines to calculate various quantities related to the size of the image. + */ + + +/* + * Compute output image dimensions and related values. + * NOTE: this is exported for possible use by application. + * Hence it mustn't do anything that can't be done twice. + */ + +GLOBAL(void) +jpeg_core_output_dimensions (j_decompress_ptr cinfo) +/* Do computations that are needed before master selection phase. + * This function is used for transcoding and full decompression. + */ +{ +#ifdef IDCT_SCALING_SUPPORTED + int ci; + jpeg_component_info *compptr; + + /* Compute actual output image dimensions and DCT scaling choices. */ + if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom) { + /* Provide 1/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 1; + cinfo->min_DCT_v_scaled_size = 1; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 2) { + /* Provide 2/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 2L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 2L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 2; + cinfo->min_DCT_v_scaled_size = 2; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 3) { + /* Provide 3/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 3L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 3L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 3; + cinfo->min_DCT_v_scaled_size = 3; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 4) { + /* Provide 4/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 4L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 4L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 4; + cinfo->min_DCT_v_scaled_size = 4; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 5) { + /* Provide 5/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 5L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 5L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 5; + cinfo->min_DCT_v_scaled_size = 5; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 6) { + /* Provide 6/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 6L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 6L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 6; + cinfo->min_DCT_v_scaled_size = 6; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 7) { + /* Provide 7/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 7L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 7L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 7; + cinfo->min_DCT_v_scaled_size = 7; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 8) { + /* Provide 8/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 8L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 8L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 8; + cinfo->min_DCT_v_scaled_size = 8; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 9) { + /* Provide 9/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 9L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 9L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 9; + cinfo->min_DCT_v_scaled_size = 9; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 10) { + /* Provide 10/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 10L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 10L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 10; + cinfo->min_DCT_v_scaled_size = 10; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 11) { + /* Provide 11/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 11L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 11L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 11; + cinfo->min_DCT_v_scaled_size = 11; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 12) { + /* Provide 12/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 12L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 12L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 12; + cinfo->min_DCT_v_scaled_size = 12; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 13) { + /* Provide 13/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 13L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 13L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 13; + cinfo->min_DCT_v_scaled_size = 13; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 14) { + /* Provide 14/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 14L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 14L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 14; + cinfo->min_DCT_v_scaled_size = 14; + } else if (cinfo->scale_num * cinfo->block_size <= cinfo->scale_denom * 15) { + /* Provide 15/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 15L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 15L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 15; + cinfo->min_DCT_v_scaled_size = 15; + } else { + /* Provide 16/block_size scaling */ + cinfo->output_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * 16L, (long) cinfo->block_size); + cinfo->output_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * 16L, (long) cinfo->block_size); + cinfo->min_DCT_h_scaled_size = 16; + cinfo->min_DCT_v_scaled_size = 16; + } + + /* Recompute dimensions of components */ + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size; + compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size; + } + +#else /* !IDCT_SCALING_SUPPORTED */ + + /* Hardwire it to "no scaling" */ + cinfo->output_width = cinfo->image_width; + cinfo->output_height = cinfo->image_height; + /* jdinput.c has already initialized DCT_scaled_size, + * and has computed unscaled downsampled_width and downsampled_height. + */ + +#endif /* IDCT_SCALING_SUPPORTED */ +} + + +LOCAL(void) +initial_setup (j_decompress_ptr cinfo) +/* Called once, when first SOS marker is reached */ +{ + int ci; + jpeg_component_info *compptr; + + /* Make sure image isn't bigger than I can handle */ + if ((long) cinfo->image_height > (long) JPEG_MAX_DIMENSION || + (long) cinfo->image_width > (long) JPEG_MAX_DIMENSION) + ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int) JPEG_MAX_DIMENSION); + + /* For now, precision must match compiled-in value... */ + if (cinfo->data_precision != BITS_IN_JSAMPLE) + ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision); + + /* Check that number of components won't exceed internal array sizes */ + if (cinfo->num_components > MAX_COMPONENTS) + ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components, + MAX_COMPONENTS); + + /* Compute maximum sampling factors; check factor validity */ + cinfo->max_h_samp_factor = 1; + cinfo->max_v_samp_factor = 1; + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + if (compptr->h_samp_factor<=0 || compptr->h_samp_factor>MAX_SAMP_FACTOR || + compptr->v_samp_factor<=0 || compptr->v_samp_factor>MAX_SAMP_FACTOR) + ERREXIT(cinfo, JERR_BAD_SAMPLING); + cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor, + compptr->h_samp_factor); + cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor, + compptr->v_samp_factor); + } + + /* Derive block_size, natural_order, and lim_Se */ + if (cinfo->is_baseline || (cinfo->progressive_mode && + cinfo->comps_in_scan)) { /* no pseudo SOS marker */ + cinfo->block_size = DCTSIZE; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + } else + switch (cinfo->Se) { + case (1*1-1): + cinfo->block_size = 1; + cinfo->natural_order = jpeg_natural_order; /* not needed */ + cinfo->lim_Se = cinfo->Se; + break; + case (2*2-1): + cinfo->block_size = 2; + cinfo->natural_order = jpeg_natural_order2; + cinfo->lim_Se = cinfo->Se; + break; + case (3*3-1): + cinfo->block_size = 3; + cinfo->natural_order = jpeg_natural_order3; + cinfo->lim_Se = cinfo->Se; + break; + case (4*4-1): + cinfo->block_size = 4; + cinfo->natural_order = jpeg_natural_order4; + cinfo->lim_Se = cinfo->Se; + break; + case (5*5-1): + cinfo->block_size = 5; + cinfo->natural_order = jpeg_natural_order5; + cinfo->lim_Se = cinfo->Se; + break; + case (6*6-1): + cinfo->block_size = 6; + cinfo->natural_order = jpeg_natural_order6; + cinfo->lim_Se = cinfo->Se; + break; + case (7*7-1): + cinfo->block_size = 7; + cinfo->natural_order = jpeg_natural_order7; + cinfo->lim_Se = cinfo->Se; + break; + case (8*8-1): + cinfo->block_size = 8; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (9*9-1): + cinfo->block_size = 9; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (10*10-1): + cinfo->block_size = 10; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (11*11-1): + cinfo->block_size = 11; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (12*12-1): + cinfo->block_size = 12; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (13*13-1): + cinfo->block_size = 13; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (14*14-1): + cinfo->block_size = 14; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (15*15-1): + cinfo->block_size = 15; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + case (16*16-1): + cinfo->block_size = 16; + cinfo->natural_order = jpeg_natural_order; + cinfo->lim_Se = DCTSIZE2-1; + break; + default: + ERREXIT4(cinfo, JERR_BAD_PROGRESSION, + cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al); + break; + } + + /* We initialize DCT_scaled_size and min_DCT_scaled_size to block_size. + * In the full decompressor, + * this will be overridden by jpeg_calc_output_dimensions in jdmaster.c; + * but in the transcoder, + * jpeg_calc_output_dimensions is not used, so we must do it here. + */ + cinfo->min_DCT_h_scaled_size = cinfo->block_size; + cinfo->min_DCT_v_scaled_size = cinfo->block_size; + + /* Compute dimensions of components */ + for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; + ci++, compptr++) { + compptr->DCT_h_scaled_size = cinfo->block_size; + compptr->DCT_v_scaled_size = cinfo->block_size; + /* Size in DCT blocks */ + compptr->width_in_blocks = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, + (long) (cinfo->max_h_samp_factor * cinfo->block_size)); + compptr->height_in_blocks = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); + /* downsampled_width and downsampled_height will also be overridden by + * jdmaster.c if we are doing full decompression. The transcoder library + * doesn't use these values, but the calling application might. + */ + /* Size in samples */ + compptr->downsampled_width = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width * (long) compptr->h_samp_factor, + (long) cinfo->max_h_samp_factor); + compptr->downsampled_height = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height * (long) compptr->v_samp_factor, + (long) cinfo->max_v_samp_factor); + /* Mark component needed, until color conversion says otherwise */ + compptr->component_needed = TRUE; + /* Mark no quantization table yet saved for component */ + compptr->quant_table = NULL; + } + + /* Compute number of fully interleaved MCU rows. */ + cinfo->total_iMCU_rows = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height, + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); + + /* Decide whether file contains multiple scans */ + if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode) + cinfo->inputctl->has_multiple_scans = TRUE; + else + cinfo->inputctl->has_multiple_scans = FALSE; +} + + +LOCAL(void) +per_scan_setup (j_decompress_ptr cinfo) +/* Do computations that are needed before processing a JPEG scan */ +/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */ +{ + int ci, mcublks, tmp; + jpeg_component_info *compptr; + + if (cinfo->comps_in_scan == 1) { + + /* Noninterleaved (single-component) scan */ + compptr = cinfo->cur_comp_info[0]; + + /* Overall image size in MCUs */ + cinfo->MCUs_per_row = compptr->width_in_blocks; + cinfo->MCU_rows_in_scan = compptr->height_in_blocks; + + /* For noninterleaved scan, always one block per MCU */ + compptr->MCU_width = 1; + compptr->MCU_height = 1; + compptr->MCU_blocks = 1; + compptr->MCU_sample_width = compptr->DCT_h_scaled_size; + compptr->last_col_width = 1; + /* For noninterleaved scans, it is convenient to define last_row_height + * as the number of block rows present in the last iMCU row. + */ + tmp = (int) (compptr->height_in_blocks % compptr->v_samp_factor); + if (tmp == 0) tmp = compptr->v_samp_factor; + compptr->last_row_height = tmp; + + /* Prepare array describing MCU composition */ + cinfo->blocks_in_MCU = 1; + cinfo->MCU_membership[0] = 0; + + } else { + + /* Interleaved (multi-component) scan */ + if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN) + ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan, + MAX_COMPS_IN_SCAN); + + /* Overall image size in MCUs */ + cinfo->MCUs_per_row = (JDIMENSION) + jdiv_round_up((long) cinfo->image_width, + (long) (cinfo->max_h_samp_factor * cinfo->block_size)); + cinfo->MCU_rows_in_scan = (JDIMENSION) + jdiv_round_up((long) cinfo->image_height, + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); + + cinfo->blocks_in_MCU = 0; + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* Sampling factors give # of blocks of component in each MCU */ + compptr->MCU_width = compptr->h_samp_factor; + compptr->MCU_height = compptr->v_samp_factor; + compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height; + compptr->MCU_sample_width = compptr->MCU_width * compptr->DCT_h_scaled_size; + /* Figure number of non-dummy blocks in last MCU column & row */ + tmp = (int) (compptr->width_in_blocks % compptr->MCU_width); + if (tmp == 0) tmp = compptr->MCU_width; + compptr->last_col_width = tmp; + tmp = (int) (compptr->height_in_blocks % compptr->MCU_height); + if (tmp == 0) tmp = compptr->MCU_height; + compptr->last_row_height = tmp; + /* Prepare array describing MCU composition */ + mcublks = compptr->MCU_blocks; + if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU) + ERREXIT(cinfo, JERR_BAD_MCU_SIZE); + while (mcublks-- > 0) { + cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci; + } + } + + } +} + + +/* + * Save away a copy of the Q-table referenced by each component present + * in the current scan, unless already saved during a prior scan. + * + * In a multiple-scan JPEG file, the encoder could assign different components + * the same Q-table slot number, but change table definitions between scans + * so that each component uses a different Q-table. (The IJG encoder is not + * currently capable of doing this, but other encoders might.) Since we want + * to be able to dequantize all the components at the end of the file, this + * means that we have to save away the table actually used for each component. + * We do this by copying the table at the start of the first scan containing + * the component. + * The JPEG spec prohibits the encoder from changing the contents of a Q-table + * slot between scans of a component using that slot. If the encoder does so + * anyway, this decoder will simply use the Q-table values that were current + * at the start of the first scan for the component. + * + * The decompressor output side looks only at the saved quant tables, + * not at the current Q-table slots. + */ + +LOCAL(void) +latch_quant_tables (j_decompress_ptr cinfo) +{ + int ci, qtblno; + jpeg_component_info *compptr; + JQUANT_TBL * qtbl; + + for (ci = 0; ci < cinfo->comps_in_scan; ci++) { + compptr = cinfo->cur_comp_info[ci]; + /* No work if we already saved Q-table for this component */ + if (compptr->quant_table != NULL) + continue; + /* Make sure specified quantization table is present */ + qtblno = compptr->quant_tbl_no; + if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || + cinfo->quant_tbl_ptrs[qtblno] == NULL) + ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); + /* OK, save away the quantization table */ + qtbl = (JQUANT_TBL *) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, + SIZEOF(JQUANT_TBL)); + MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], SIZEOF(JQUANT_TBL)); + compptr->quant_table = qtbl; + } +} + + +/* + * Initialize the input modules to read a scan of compressed data. + * The first call to this is done by jdmaster.c after initializing + * the entire decompressor (during jpeg_start_decompress). + * Subsequent calls come from consume_markers, below. + */ + +METHODDEF(void) +start_input_pass (j_decompress_ptr cinfo) +{ + per_scan_setup(cinfo); + latch_quant_tables(cinfo); + (*cinfo->entropy->start_pass) (cinfo); + (*cinfo->coef->start_input_pass) (cinfo); + cinfo->inputctl->consume_input = cinfo->coef->consume_data; +} + + +/* + * Finish up after inputting a compressed-data scan. + * This is called by the coefficient controller after it's read all + * the expected data of the scan. + */ + +METHODDEF(void) +finish_input_pass (j_decompress_ptr cinfo) +{ + cinfo->inputctl->consume_input = consume_markers; +} + + +/* + * Read JPEG markers before, between, or after compressed-data scans. + * Change state as necessary when a new scan is reached. + * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI. + * + * The consume_input method pointer points either here or to the + * coefficient controller's consume_data routine, depending on whether + * we are reading a compressed data segment or inter-segment markers. + * + * Note: This function should NOT return a pseudo SOS marker (with zero + * component number) to the caller. A pseudo marker received by + * read_markers is processed and then skipped for other markers. + */ + +METHODDEF(int) +consume_markers (j_decompress_ptr cinfo) +{ + my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl; + int val; + + if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */ + return JPEG_REACHED_EOI; + + for (;;) { /* Loop to pass pseudo SOS marker */ + val = (*cinfo->marker->read_markers) (cinfo); + + switch (val) { + case JPEG_REACHED_SOS: /* Found SOS */ + if (inputctl->inheaders) { /* 1st SOS */ + if (inputctl->inheaders == 1) + initial_setup(cinfo); + if (cinfo->comps_in_scan == 0) { /* pseudo SOS marker */ + inputctl->inheaders = 2; + break; + } + inputctl->inheaders = 0; + /* Note: start_input_pass must be called by jdmaster.c + * before any more input can be consumed. jdapimin.c is + * responsible for enforcing this sequencing. + */ + } else { /* 2nd or later SOS marker */ + if (! inputctl->pub.has_multiple_scans) + ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */ + if (cinfo->comps_in_scan == 0) /* unexpected pseudo SOS marker */ + break; + start_input_pass(cinfo); + } + return val; + case JPEG_REACHED_EOI: /* Found EOI */ + inputctl->pub.eoi_reached = TRUE; + if (inputctl->inheaders) { /* Tables-only datastream, apparently */ + if (cinfo->marker->saw_SOF) + ERREXIT(cinfo, JERR_SOF_NO_SOS); + } else { + /* Prevent infinite loop in coef ctlr's decompress_data routine + * if user set output_scan_number larger than number of scans. + */ + if (cinfo->output_scan_number > cinfo->input_scan_number) + cinfo->output_scan_number = cinfo->input_scan_number; + } + return val; + case JPEG_SUSPENDED: + return val; + default: + return val; + } + } +} + + +/* + * Reset state to begin a fresh datastream. + */ + +METHODDEF(void) +reset_input_controller (j_decompress_ptr cinfo) +{ + my_inputctl_ptr inputctl = (my_inputctl_ptr) cinfo->inputctl; + + inputctl->pub.consume_input = consume_markers; + inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */ + inputctl->pub.eoi_reached = FALSE; + inputctl->inheaders = 1; + /* Reset other modules */ + (*cinfo->err->reset_error_mgr) ((j_common_ptr) cinfo); + (*cinfo->marker->reset_marker_reader) (cinfo); + /* Reset progression state -- would be cleaner if entropy decoder did this */ + cinfo->coef_bits = NULL; +} + + +/* + * Initialize the input controller module. + * This is called only once, when the decompression object is created. + */ + +GLOBAL(void) +jinit_input_controller (j_decompress_ptr cinfo) +{ + my_inputctl_ptr inputctl; + + /* Create subobject in permanent pool */ + inputctl = (my_inputctl_ptr) + (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_PERMANENT, + SIZEOF(my_input_controller)); + cinfo->inputctl = (struct jpeg_input_controller *) inputctl; + /* Initialize method pointers */ + inputctl->pub.consume_input = consume_markers; + inputctl->pub.reset_input_controller = reset_input_controller; + inputctl->pub.start_input_pass = start_input_pass; + inputctl->pub.finish_input_pass = finish_input_pass; + /* Initialize state: can't use reset_input_controller since we don't + * want to try to reset other modules yet. + */ + inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */ + inputctl->pub.eoi_reached = FALSE; + inputctl->inheaders = 1; +} diff --git a/code/jpeg-6b/jdmainct.c b/code/jpeg-8c/jdmainct.c similarity index 79% rename from code/jpeg-6b/jdmainct.c rename to code/jpeg-8c/jdmainct.c index 2f265336..02723ca7 100644 --- a/code/jpeg-6b/jdmainct.c +++ b/code/jpeg-8c/jdmainct.c @@ -159,24 +159,24 @@ alloc_funny_pointers (j_decompress_ptr cinfo) * This is done only once, not once per pass. */ { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; int ci, rgroup; - int M = cinfo->min_DCT_scaled_size; + int M = cinfo->min_DCT_v_scaled_size; jpeg_component_info *compptr; JSAMPARRAY xbuf; /* Get top-level space for component array pointers. * We alloc both arrays with one call to save a few cycles. */ - jmain->xbuffer[0] = (JSAMPIMAGE) + main->xbuffer[0] = (JSAMPIMAGE) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, cinfo->num_components * 2 * SIZEOF(JSAMPARRAY)); - jmain->xbuffer[1] = jmain->xbuffer[0] + cinfo->num_components; + main->xbuffer[1] = main->xbuffer[0] + cinfo->num_components; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; /* height of a row group of component */ + rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) / + cinfo->min_DCT_v_scaled_size; /* height of a row group of component */ /* Get space for pointer lists --- M+4 row groups in each list. * We alloc both pointer lists with one call to save a few cycles. */ @@ -184,9 +184,9 @@ alloc_funny_pointers (j_decompress_ptr cinfo) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 2 * (rgroup * (M + 4)) * SIZEOF(JSAMPROW)); xbuf += rgroup; /* want one row group at negative offsets */ - jmain->xbuffer[0][ci] = xbuf; + main->xbuffer[0][ci] = xbuf; xbuf += rgroup * (M + 4); - jmain->xbuffer[1][ci] = xbuf; + main->xbuffer[1][ci] = xbuf; } } @@ -194,26 +194,26 @@ alloc_funny_pointers (j_decompress_ptr cinfo) LOCAL(void) make_funny_pointers (j_decompress_ptr cinfo) /* Create the funny pointer lists discussed in the comments above. - * The actual workspace is already allocated (in jmain->buffer), + * The actual workspace is already allocated (in main->buffer), * and the space for the pointer lists is allocated too. * This routine just fills in the curiously ordered lists. * This will be repeated at the beginning of each pass. */ { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; int ci, i, rgroup; - int M = cinfo->min_DCT_scaled_size; + int M = cinfo->min_DCT_v_scaled_size; jpeg_component_info *compptr; JSAMPARRAY buf, xbuf0, xbuf1; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; /* height of a row group of component */ - xbuf0 = jmain->xbuffer[0][ci]; - xbuf1 = jmain->xbuffer[1][ci]; + rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) / + cinfo->min_DCT_v_scaled_size; /* height of a row group of component */ + xbuf0 = main->xbuffer[0][ci]; + xbuf1 = main->xbuffer[1][ci]; /* First copy the workspace pointers as-is */ - buf = jmain->buffer[ci]; + buf = main->buffer[ci]; for (i = 0; i < rgroup * (M + 2); i++) { xbuf0[i] = xbuf1[i] = buf[i]; } @@ -240,18 +240,18 @@ set_wraparound_pointers (j_decompress_ptr cinfo) * This changes the pointer list state from top-of-image to the normal state. */ { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; int ci, i, rgroup; - int M = cinfo->min_DCT_scaled_size; + int M = cinfo->min_DCT_v_scaled_size; jpeg_component_info *compptr; JSAMPARRAY xbuf0, xbuf1; for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; /* height of a row group of component */ - xbuf0 = jmain->xbuffer[0][ci]; - xbuf1 = jmain->xbuffer[1][ci]; + rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) / + cinfo->min_DCT_v_scaled_size; /* height of a row group of component */ + xbuf0 = main->xbuffer[0][ci]; + xbuf1 = main->xbuffer[1][ci]; for (i = 0; i < rgroup; i++) { xbuf0[i - rgroup] = xbuf0[rgroup*(M+1) + i]; xbuf1[i - rgroup] = xbuf1[rgroup*(M+1) + i]; @@ -269,7 +269,7 @@ set_bottom_pointers (j_decompress_ptr cinfo) * Also sets rowgroups_avail to indicate number of nondummy row groups in row. */ { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; int ci, i, rgroup, iMCUheight, rows_left; jpeg_component_info *compptr; JSAMPARRAY xbuf; @@ -277,8 +277,8 @@ set_bottom_pointers (j_decompress_ptr cinfo) for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { /* Count sample rows in one iMCU row and in one row group */ - iMCUheight = compptr->v_samp_factor * compptr->DCT_scaled_size; - rgroup = iMCUheight / cinfo->min_DCT_scaled_size; + iMCUheight = compptr->v_samp_factor * compptr->DCT_v_scaled_size; + rgroup = iMCUheight / cinfo->min_DCT_v_scaled_size; /* Count nondummy sample rows remaining for this component */ rows_left = (int) (compptr->downsampled_height % (JDIMENSION) iMCUheight); if (rows_left == 0) rows_left = iMCUheight; @@ -286,12 +286,12 @@ set_bottom_pointers (j_decompress_ptr cinfo) * so we need only do it once. */ if (ci == 0) { - jmain->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1); + main->rowgroups_avail = (JDIMENSION) ((rows_left-1) / rgroup + 1); } /* Duplicate the last real sample row rgroup*2 times; this pads out the * last partial rowgroup and ensures at least one full rowgroup of context. */ - xbuf = jmain->xbuffer[jmain->whichptr][ci]; + xbuf = main->xbuffer[main->whichptr][ci]; for (i = 0; i < rgroup * 2; i++) { xbuf[rows_left + i] = xbuf[rows_left-1]; } @@ -306,27 +306,27 @@ set_bottom_pointers (j_decompress_ptr cinfo) METHODDEF(void) start_pass_main (j_decompress_ptr cinfo, J_BUF_MODE pass_mode) { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; switch (pass_mode) { case JBUF_PASS_THRU: if (cinfo->upsample->need_context_rows) { - jmain->pub.process_data = process_data_context_main; + main->pub.process_data = process_data_context_main; make_funny_pointers(cinfo); /* Create the xbuffer[] lists */ - jmain->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ - jmain->context_state = CTX_PREPARE_FOR_IMCU; - jmain->iMCU_row_ctr = 0; + main->whichptr = 0; /* Read first iMCU row into xbuffer[0] */ + main->context_state = CTX_PREPARE_FOR_IMCU; + main->iMCU_row_ctr = 0; } else { /* Simple case with no context needed */ - jmain->pub.process_data = process_data_simple_main; + main->pub.process_data = process_data_simple_main; } - jmain->buffer_full = FALSE; /* Mark buffer empty */ - jmain->rowgroup_ctr = 0; + main->buffer_full = FALSE; /* Mark buffer empty */ + main->rowgroup_ctr = 0; break; #ifdef QUANT_2PASS_SUPPORTED case JBUF_CRANK_DEST: /* For last pass of 2-pass quantization, just crank the postprocessor */ - jmain->pub.process_data = process_data_crank_post; + main->pub.process_data = process_data_crank_post; break; #endif default: @@ -346,32 +346,32 @@ process_data_simple_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; JDIMENSION rowgroups_avail; - /* Read input data if we haven't filled the jmain buffer yet */ - if (! jmain->buffer_full) { - if (! (*cinfo->coef->decompress_data) (cinfo, jmain->buffer)) + /* Read input data if we haven't filled the main buffer yet */ + if (! main->buffer_full) { + if (! (*cinfo->coef->decompress_data) (cinfo, main->buffer)) return; /* suspension forced, can do nothing more */ - jmain->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ + main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ } /* There are always min_DCT_scaled_size row groups in an iMCU row. */ - rowgroups_avail = (JDIMENSION) cinfo->min_DCT_scaled_size; + rowgroups_avail = (JDIMENSION) cinfo->min_DCT_v_scaled_size; /* Note: at the bottom of the image, we may pass extra garbage row groups * to the postprocessor. The postprocessor has to check for bottom * of image anyway (at row resolution), so no point in us doing it too. */ /* Feed the postprocessor */ - (*cinfo->post->post_process_data) (cinfo, jmain->buffer, - &jmain->rowgroup_ctr, rowgroups_avail, + (*cinfo->post->post_process_data) (cinfo, main->buffer, + &main->rowgroup_ctr, rowgroups_avail, output_buf, out_row_ctr, out_rows_avail); /* Has postprocessor consumed all the data yet? If so, mark buffer empty */ - if (jmain->rowgroup_ctr >= rowgroups_avail) { - jmain->buffer_full = FALSE; - jmain->rowgroup_ctr = 0; + if (main->rowgroup_ctr >= rowgroups_avail) { + main->buffer_full = FALSE; + main->rowgroup_ctr = 0; } } @@ -386,15 +386,15 @@ process_data_context_main (j_decompress_ptr cinfo, JSAMPARRAY output_buf, JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail) { - my_main_ptr jmain = (my_main_ptr) cinfo->main; + my_main_ptr main = (my_main_ptr) cinfo->main; - /* Read input data if we haven't filled the jmain buffer yet */ - if (! jmain->buffer_full) { + /* Read input data if we haven't filled the main buffer yet */ + if (! main->buffer_full) { if (! (*cinfo->coef->decompress_data) (cinfo, - jmain->xbuffer[jmain->whichptr])) + main->xbuffer[main->whichptr])) return; /* suspension forced, can do nothing more */ - jmain->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ - jmain->iMCU_row_ctr++; /* count rows received */ + main->buffer_full = TRUE; /* OK, we have an iMCU row to work with */ + main->iMCU_row_ctr++; /* count rows received */ } /* Postprocessor typically will not swallow all the input data it is handed @@ -402,47 +402,47 @@ process_data_context_main (j_decompress_ptr cinfo, * to exit and restart. This switch lets us keep track of how far we got. * Note that each case falls through to the next on successful completion. */ - switch (jmain->context_state) { + switch (main->context_state) { case CTX_POSTPONED_ROW: /* Call postprocessor using previously set pointers for postponed row */ - (*cinfo->post->post_process_data) (cinfo, jmain->xbuffer[jmain->whichptr], - &jmain->rowgroup_ctr, jmain->rowgroups_avail, + (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr], + &main->rowgroup_ctr, main->rowgroups_avail, output_buf, out_row_ctr, out_rows_avail); - if (jmain->rowgroup_ctr < jmain->rowgroups_avail) + if (main->rowgroup_ctr < main->rowgroups_avail) return; /* Need to suspend */ - jmain->context_state = CTX_PREPARE_FOR_IMCU; + main->context_state = CTX_PREPARE_FOR_IMCU; if (*out_row_ctr >= out_rows_avail) return; /* Postprocessor exactly filled output buf */ /*FALLTHROUGH*/ case CTX_PREPARE_FOR_IMCU: /* Prepare to process first M-1 row groups of this iMCU row */ - jmain->rowgroup_ctr = 0; - jmain->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size - 1); + main->rowgroup_ctr = 0; + main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size - 1); /* Check for bottom of image: if so, tweak pointers to "duplicate" * the last sample row, and adjust rowgroups_avail to ignore padding rows. */ - if (jmain->iMCU_row_ctr == cinfo->total_iMCU_rows) + if (main->iMCU_row_ctr == cinfo->total_iMCU_rows) set_bottom_pointers(cinfo); - jmain->context_state = CTX_PROCESS_IMCU; + main->context_state = CTX_PROCESS_IMCU; /*FALLTHROUGH*/ case CTX_PROCESS_IMCU: /* Call postprocessor using previously set pointers */ - (*cinfo->post->post_process_data) (cinfo, jmain->xbuffer[jmain->whichptr], - &jmain->rowgroup_ctr, jmain->rowgroups_avail, + (*cinfo->post->post_process_data) (cinfo, main->xbuffer[main->whichptr], + &main->rowgroup_ctr, main->rowgroups_avail, output_buf, out_row_ctr, out_rows_avail); - if (jmain->rowgroup_ctr < jmain->rowgroups_avail) + if (main->rowgroup_ctr < main->rowgroups_avail) return; /* Need to suspend */ /* After the first iMCU, change wraparound pointers to normal state */ - if (jmain->iMCU_row_ctr == 1) + if (main->iMCU_row_ctr == 1) set_wraparound_pointers(cinfo); /* Prepare to load new iMCU row using other xbuffer list */ - jmain->whichptr ^= 1; /* 0=>1 or 1=>0 */ - jmain->buffer_full = FALSE; + main->whichptr ^= 1; /* 0=>1 or 1=>0 */ + main->buffer_full = FALSE; /* Still need to process last row group of this iMCU row, */ /* which is saved at index M+1 of the other xbuffer */ - jmain->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_scaled_size + 1); - jmain->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_scaled_size + 2); - jmain->context_state = CTX_POSTPONED_ROW; + main->rowgroup_ctr = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 1); + main->rowgroups_avail = (JDIMENSION) (cinfo->min_DCT_v_scaled_size + 2); + main->context_state = CTX_POSTPONED_ROW; } } @@ -469,21 +469,21 @@ process_data_crank_post (j_decompress_ptr cinfo, /* - * Initialize jmain buffer controller. + * Initialize main buffer controller. */ GLOBAL(void) jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer) { - my_main_ptr jmain; + my_main_ptr main; int ci, rgroup, ngroups; jpeg_component_info *compptr; - jmain = (my_main_ptr) + main = (my_main_ptr) (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, SIZEOF(my_main_controller)); - cinfo->main = (struct jpeg_d_main_controller *) jmain; - jmain->pub.start_pass = start_pass_main; + cinfo->main = (struct jpeg_d_main_controller *) main; + main->pub.start_pass = start_pass_main; if (need_full_buffer) /* shouldn't happen */ ERREXIT(cinfo, JERR_BAD_BUFFER_MODE); @@ -492,21 +492,21 @@ jinit_d_main_controller (j_decompress_ptr cinfo, boolean need_full_buffer) * ngroups is the number of row groups we need. */ if (cinfo->upsample->need_context_rows) { - if (cinfo->min_DCT_scaled_size < 2) /* unsupported, see comments above */ + if (cinfo->min_DCT_v_scaled_size < 2) /* unsupported, see comments above */ ERREXIT(cinfo, JERR_NOTIMPL); alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */ - ngroups = cinfo->min_DCT_scaled_size + 2; + ngroups = cinfo->min_DCT_v_scaled_size + 2; } else { - ngroups = cinfo->min_DCT_scaled_size; + ngroups = cinfo->min_DCT_v_scaled_size; } for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - rgroup = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; /* height of a row group of component */ - jmain->buffer[ci] = (*cinfo->mem->alloc_sarray) + rgroup = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) / + cinfo->min_DCT_v_scaled_size; /* height of a row group of component */ + main->buffer[ci] = (*cinfo->mem->alloc_sarray) ((j_common_ptr) cinfo, JPOOL_IMAGE, - compptr->width_in_blocks * compptr->DCT_scaled_size, + compptr->width_in_blocks * compptr->DCT_h_scaled_size, (JDIMENSION) (rgroup * ngroups)); } } diff --git a/code/jpeg-6b/jdmarker.c b/code/jpeg-8c/jdmarker.c similarity index 95% rename from code/jpeg-6b/jdmarker.c rename to code/jpeg-8c/jdmarker.c index f4cca8cc..f2a9cc42 100644 --- a/code/jpeg-6b/jdmarker.c +++ b/code/jpeg-8c/jdmarker.c @@ -2,6 +2,7 @@ * jdmarker.c * * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -234,7 +235,8 @@ get_soi (j_decompress_ptr cinfo) LOCAL(boolean) -get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) +get_sof (j_decompress_ptr cinfo, boolean is_baseline, boolean is_prog, + boolean is_arith) /* Process a SOFn marker */ { INT32 length; @@ -242,6 +244,7 @@ get_sof (j_decompress_ptr cinfo, boolean is_prog, boolean is_arith) jpeg_component_info * compptr; INPUT_VARS(cinfo); + cinfo->is_baseline = is_baseline; cinfo->progressive_mode = is_prog; cinfo->arith_code = is_arith; @@ -315,7 +318,9 @@ get_sos (j_decompress_ptr cinfo) TRACEMS1(cinfo, 1, JTRC_SOS, n); - if (length != (n * 2 + 6) || n < 1 || n > MAX_COMPS_IN_SCAN) + if (length != (n * 2 + 6) || n > MAX_COMPS_IN_SCAN || + (n == 0 && !cinfo->progressive_mode)) + /* pseudo SOS marker only allowed in progressive mode */ ERREXIT(cinfo, JERR_BAD_LENGTH); cinfo->comps_in_scan = n; @@ -359,8 +364,8 @@ get_sos (j_decompress_ptr cinfo) /* Prepare to scan data & restart markers */ cinfo->marker->next_restart_num = 0; - /* Count another SOS marker */ - cinfo->input_scan_number++; + /* Count another (non-pseudo) SOS marker */ + if (n) cinfo->input_scan_number++; INPUT_SYNC(cinfo); return TRUE; @@ -490,16 +495,18 @@ LOCAL(boolean) get_dqt (j_decompress_ptr cinfo) /* Process a DQT marker */ { - INT32 length; - int n, i, prec; + INT32 length, count, i; + int n, prec; unsigned int tmp; JQUANT_TBL *quant_ptr; + const int *natural_order; INPUT_VARS(cinfo); INPUT_2BYTES(cinfo, length, return FALSE); length -= 2; while (length > 0) { + length--; INPUT_BYTE(cinfo, n, return FALSE); prec = n >> 4; n &= 0x0F; @@ -513,13 +520,43 @@ get_dqt (j_decompress_ptr cinfo) cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr) cinfo); quant_ptr = cinfo->quant_tbl_ptrs[n]; - for (i = 0; i < DCTSIZE2; i++) { + if (prec) { + if (length < DCTSIZE2 * 2) { + /* Initialize full table for safety. */ + for (i = 0; i < DCTSIZE2; i++) { + quant_ptr->quantval[i] = 1; + } + count = length >> 1; + } else + count = DCTSIZE2; + } else { + if (length < DCTSIZE2) { + /* Initialize full table for safety. */ + for (i = 0; i < DCTSIZE2; i++) { + quant_ptr->quantval[i] = 1; + } + count = length; + } else + count = DCTSIZE2; + } + + switch (count) { + case (2*2): natural_order = jpeg_natural_order2; break; + case (3*3): natural_order = jpeg_natural_order3; break; + case (4*4): natural_order = jpeg_natural_order4; break; + case (5*5): natural_order = jpeg_natural_order5; break; + case (6*6): natural_order = jpeg_natural_order6; break; + case (7*7): natural_order = jpeg_natural_order7; break; + default: natural_order = jpeg_natural_order; break; + } + + for (i = 0; i < count; i++) { if (prec) INPUT_2BYTES(cinfo, tmp, return FALSE); else INPUT_BYTE(cinfo, tmp, return FALSE); /* We convert the zigzag-order table to natural array order. */ - quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16) tmp; + quant_ptr->quantval[natural_order[i]] = (UINT16) tmp; } if (cinfo->err->trace_level >= 2) { @@ -532,8 +569,8 @@ get_dqt (j_decompress_ptr cinfo) } } - length -= DCTSIZE2+1; - if (prec) length -= DCTSIZE2; + length -= count; + if (prec) length -= count; } if (length != 0) @@ -946,6 +983,11 @@ first_marker (j_decompress_ptr cinfo) * * Returns same codes as are defined for jpeg_consume_input: * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI. + * + * Note: This function may return a pseudo SOS marker (with zero + * component number) for treat by input controller's consume_input. + * consume_input itself should filter out (skip) the pseudo marker + * after processing for the caller. */ METHODDEF(int) @@ -975,23 +1017,27 @@ read_markers (j_decompress_ptr cinfo) break; case M_SOF0: /* Baseline */ + if (! get_sof(cinfo, TRUE, FALSE, FALSE)) + return JPEG_SUSPENDED; + break; + case M_SOF1: /* Extended sequential, Huffman */ - if (! get_sof(cinfo, FALSE, FALSE)) + if (! get_sof(cinfo, FALSE, FALSE, FALSE)) return JPEG_SUSPENDED; break; case M_SOF2: /* Progressive, Huffman */ - if (! get_sof(cinfo, TRUE, FALSE)) + if (! get_sof(cinfo, FALSE, TRUE, FALSE)) return JPEG_SUSPENDED; break; case M_SOF9: /* Extended sequential, arithmetic */ - if (! get_sof(cinfo, FALSE, TRUE)) + if (! get_sof(cinfo, FALSE, FALSE, TRUE)) return JPEG_SUSPENDED; break; case M_SOF10: /* Progressive, arithmetic */ - if (! get_sof(cinfo, TRUE, TRUE)) + if (! get_sof(cinfo, FALSE, TRUE, TRUE)) return JPEG_SUSPENDED; break; diff --git a/code/jpeg-6b/jdmaster.c b/code/jpeg-8c/jdmaster.c similarity index 86% rename from code/jpeg-6b/jdmaster.c rename to code/jpeg-8c/jdmaster.c index 2802c5b7..8c1146e4 100644 --- a/code/jpeg-6b/jdmaster.c +++ b/code/jpeg-8c/jdmaster.c @@ -2,6 +2,7 @@ * jdmaster.c * * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 2002-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -61,9 +62,12 @@ use_merged_upsample (j_decompress_ptr cinfo) cinfo->comp_info[2].v_samp_factor != 1) return FALSE; /* furthermore, it doesn't work if we've scaled the IDCTs differently */ - if (cinfo->comp_info[0].DCT_scaled_size != cinfo->min_DCT_scaled_size || - cinfo->comp_info[1].DCT_scaled_size != cinfo->min_DCT_scaled_size || - cinfo->comp_info[2].DCT_scaled_size != cinfo->min_DCT_scaled_size) + if (cinfo->comp_info[0].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size || + cinfo->comp_info[1].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size || + cinfo->comp_info[2].DCT_h_scaled_size != cinfo->min_DCT_h_scaled_size || + cinfo->comp_info[0].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size || + cinfo->comp_info[1].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size || + cinfo->comp_info[2].DCT_v_scaled_size != cinfo->min_DCT_v_scaled_size) return FALSE; /* ??? also need to test for upsample-time rescaling, when & if supported */ return TRUE; /* by golly, it'll work... */ @@ -82,7 +86,9 @@ use_merged_upsample (j_decompress_ptr cinfo) GLOBAL(void) jpeg_calc_output_dimensions (j_decompress_ptr cinfo) -/* Do computations that are needed before master selection phase */ +/* Do computations that are needed before master selection phase. + * This function is used for full decompression. + */ { #ifdef IDCT_SCALING_SUPPORTED int ci; @@ -93,52 +99,38 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo) if (cinfo->global_state != DSTATE_READY) ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state); + /* Compute core output image dimensions and DCT scaling choices. */ + jpeg_core_output_dimensions(cinfo); + #ifdef IDCT_SCALING_SUPPORTED - /* Compute actual output image dimensions and DCT scaling choices. */ - if (cinfo->scale_num * 8 <= cinfo->scale_denom) { - /* Provide 1/8 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 8L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 8L); - cinfo->min_DCT_scaled_size = 1; - } else if (cinfo->scale_num * 4 <= cinfo->scale_denom) { - /* Provide 1/4 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 4L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 4L); - cinfo->min_DCT_scaled_size = 2; - } else if (cinfo->scale_num * 2 <= cinfo->scale_denom) { - /* Provide 1/2 scaling */ - cinfo->output_width = (JDIMENSION) - jdiv_round_up((long) cinfo->image_width, 2L); - cinfo->output_height = (JDIMENSION) - jdiv_round_up((long) cinfo->image_height, 2L); - cinfo->min_DCT_scaled_size = 4; - } else { - /* Provide 1/1 scaling */ - cinfo->output_width = cinfo->image_width; - cinfo->output_height = cinfo->image_height; - cinfo->min_DCT_scaled_size = DCTSIZE; - } /* In selecting the actual DCT scaling for each component, we try to * scale up the chroma components via IDCT scaling rather than upsampling. * This saves time if the upsampler gets to use 1:1 scaling. - * Note this code assumes that the supported DCT scalings are powers of 2. + * Note this code adapts subsampling ratios which are powers of 2. */ for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; ci++, compptr++) { - int ssize = cinfo->min_DCT_scaled_size; - while (ssize < DCTSIZE && - (compptr->h_samp_factor * ssize * 2 <= - cinfo->max_h_samp_factor * cinfo->min_DCT_scaled_size) && - (compptr->v_samp_factor * ssize * 2 <= - cinfo->max_v_samp_factor * cinfo->min_DCT_scaled_size)) { + int ssize = 1; + while (cinfo->min_DCT_h_scaled_size * ssize <= + (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) && + (cinfo->max_h_samp_factor % (compptr->h_samp_factor * ssize * 2)) == 0) { ssize = ssize * 2; } - compptr->DCT_scaled_size = ssize; + compptr->DCT_h_scaled_size = cinfo->min_DCT_h_scaled_size * ssize; + ssize = 1; + while (cinfo->min_DCT_v_scaled_size * ssize <= + (cinfo->do_fancy_upsampling ? DCTSIZE : DCTSIZE / 2) && + (cinfo->max_v_samp_factor % (compptr->v_samp_factor * ssize * 2)) == 0) { + ssize = ssize * 2; + } + compptr->DCT_v_scaled_size = cinfo->min_DCT_v_scaled_size * ssize; + + /* We don't support IDCT ratios larger than 2. */ + if (compptr->DCT_h_scaled_size > compptr->DCT_v_scaled_size * 2) + compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size * 2; + else if (compptr->DCT_v_scaled_size > compptr->DCT_h_scaled_size * 2) + compptr->DCT_v_scaled_size = compptr->DCT_h_scaled_size * 2; } /* Recompute downsampled dimensions of components; @@ -149,23 +141,14 @@ jpeg_calc_output_dimensions (j_decompress_ptr cinfo) /* Size in samples, after IDCT scaling */ compptr->downsampled_width = (JDIMENSION) jdiv_round_up((long) cinfo->image_width * - (long) (compptr->h_samp_factor * compptr->DCT_scaled_size), - (long) (cinfo->max_h_samp_factor * DCTSIZE)); + (long) (compptr->h_samp_factor * compptr->DCT_h_scaled_size), + (long) (cinfo->max_h_samp_factor * cinfo->block_size)); compptr->downsampled_height = (JDIMENSION) jdiv_round_up((long) cinfo->image_height * - (long) (compptr->v_samp_factor * compptr->DCT_scaled_size), - (long) (cinfo->max_v_samp_factor * DCTSIZE)); + (long) (compptr->v_samp_factor * compptr->DCT_v_scaled_size), + (long) (cinfo->max_v_samp_factor * cinfo->block_size)); } -#else /* !IDCT_SCALING_SUPPORTED */ - - /* Hardwire it to "no scaling" */ - cinfo->output_width = cinfo->image_width; - cinfo->output_height = cinfo->image_height; - /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE, - * and has computed unscaled downsampled_width and downsampled_height. - */ - #endif /* IDCT_SCALING_SUPPORTED */ /* Report number of components in selected colorspace. */ @@ -372,17 +355,10 @@ master_selection (j_decompress_ptr cinfo) /* Inverse DCT */ jinit_inverse_dct(cinfo); /* Entropy decoding: either Huffman or arithmetic coding. */ - if (cinfo->arith_code) { - ERREXIT(cinfo, JERR_ARITH_NOTIMPL); - } else { - if (cinfo->progressive_mode) { -#ifdef D_PROGRESSIVE_SUPPORTED - jinit_phuff_decoder(cinfo); -#else - ERREXIT(cinfo, JERR_NOT_COMPILED); -#endif - } else - jinit_huff_decoder(cinfo); + if (cinfo->arith_code) + jinit_arith_decoder(cinfo); + else { + jinit_huff_decoder(cinfo); } /* Initialize principal buffer controllers. */ diff --git a/code/jpeg-6b/jdmerge.c b/code/jpeg-8c/jdmerge.c similarity index 100% rename from code/jpeg-6b/jdmerge.c rename to code/jpeg-8c/jdmerge.c diff --git a/code/jpeg-6b/jdpostct.c b/code/jpeg-8c/jdpostct.c similarity index 100% rename from code/jpeg-6b/jdpostct.c rename to code/jpeg-8c/jdpostct.c diff --git a/code/jpeg-6b/jdsample.c b/code/jpeg-8c/jdsample.c similarity index 68% rename from code/jpeg-6b/jdsample.c rename to code/jpeg-8c/jdsample.c index 80ffefb2..7bc8885b 100644 --- a/code/jpeg-6b/jdsample.c +++ b/code/jpeg-8c/jdsample.c @@ -2,13 +2,14 @@ * jdsample.c * * Copyright (C) 1991-1996, Thomas G. Lane. + * Modified 2002-2008 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * * This file contains upsampling routines. * * Upsampling input data is counted in "row groups". A row group - * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size) + * is defined to be (v_samp_factor * DCT_v_scaled_size / min_DCT_v_scaled_size) * sample rows of each component. Upsampling will normally produce * max_v_samp_factor pixel rows from each row group (but this could vary * if the upsampler is applying a scale factor of its own). @@ -237,11 +238,11 @@ h2v1_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, register JSAMPROW inptr, outptr; register JSAMPLE invalue; JSAMPROW outend; - int inrow; + int outrow; - for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) { - inptr = input_data[inrow]; - outptr = output_data[inrow]; + for (outrow = 0; outrow < cinfo->max_v_samp_factor; outrow++) { + inptr = input_data[outrow]; + outptr = output_data[outrow]; outend = outptr + cinfo->output_width; while (outptr < outend) { invalue = *inptr++; /* don't need GETJSAMPLE() here */ @@ -285,112 +286,6 @@ h2v2_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, } -/* - * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical. - * - * The upsampling algorithm is linear interpolation between pixel centers, - * also known as a "triangle filter". This is a good compromise between - * speed and visual quality. The centers of the output pixels are 1/4 and 3/4 - * of the way between input pixel centers. - * - * A note about the "bias" calculations: when rounding fractional values to - * integer, we do not want to always round 0.5 up to the next integer. - * If we did that, we'd introduce a noticeable bias towards larger values. - * Instead, this code is arranged so that 0.5 will be rounded up or down at - * alternate pixel locations (a simple ordered dither pattern). - */ - -METHODDEF(void) -h2v1_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) -{ - JSAMPARRAY output_data = *output_data_ptr; - register JSAMPROW inptr, outptr; - register int invalue; - register JDIMENSION colctr; - int inrow; - - for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) { - inptr = input_data[inrow]; - outptr = output_data[inrow]; - /* Special case for first column */ - invalue = GETJSAMPLE(*inptr++); - *outptr++ = (JSAMPLE) invalue; - *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(*inptr) + 2) >> 2); - - for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { - /* General case: 3/4 * nearer pixel + 1/4 * further pixel */ - invalue = GETJSAMPLE(*inptr++) * 3; - *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(inptr[-2]) + 1) >> 2); - *outptr++ = (JSAMPLE) ((invalue + GETJSAMPLE(*inptr) + 2) >> 2); - } - - /* Special case for last column */ - invalue = GETJSAMPLE(*inptr); - *outptr++ = (JSAMPLE) ((invalue * 3 + GETJSAMPLE(inptr[-1]) + 1) >> 2); - *outptr++ = (JSAMPLE) invalue; - } -} - - -/* - * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical. - * Again a triangle filter; see comments for h2v1 case, above. - * - * It is OK for us to reference the adjacent input rows because we demanded - * context from the main buffer controller (see initialization code). - */ - -METHODDEF(void) -h2v2_fancy_upsample (j_decompress_ptr cinfo, jpeg_component_info * compptr, - JSAMPARRAY input_data, JSAMPARRAY * output_data_ptr) -{ - JSAMPARRAY output_data = *output_data_ptr; - register JSAMPROW inptr0, inptr1, outptr; -#if BITS_IN_JSAMPLE == 8 - register int thiscolsum, lastcolsum, nextcolsum; -#else - register INT32 thiscolsum, lastcolsum, nextcolsum; -#endif - register JDIMENSION colctr; - int inrow, outrow, v; - - inrow = outrow = 0; - while (outrow < cinfo->max_v_samp_factor) { - for (v = 0; v < 2; v++) { - /* inptr0 points to nearest input row, inptr1 points to next nearest */ - inptr0 = input_data[inrow]; - if (v == 0) /* next nearest is row above */ - inptr1 = input_data[inrow-1]; - else /* next nearest is row below */ - inptr1 = input_data[inrow+1]; - outptr = output_data[outrow++]; - - /* Special case for first column */ - thiscolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); - lastcolsum = thiscolsum; thiscolsum = nextcolsum; - - for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) { - /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */ - /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */ - nextcolsum = GETJSAMPLE(*inptr0++) * 3 + GETJSAMPLE(*inptr1++); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + nextcolsum + 7) >> 4); - lastcolsum = thiscolsum; thiscolsum = nextcolsum; - } - - /* Special case for last column */ - *outptr++ = (JSAMPLE) ((thiscolsum * 3 + lastcolsum + 8) >> 4); - *outptr++ = (JSAMPLE) ((thiscolsum * 4 + 7) >> 4); - } - inrow++; - } -} - - /* * Module initialization routine for upsampling. */ @@ -401,7 +296,7 @@ jinit_upsampler (j_decompress_ptr cinfo) my_upsample_ptr upsample; int ci; jpeg_component_info * compptr; - boolean need_buffer, do_fancy; + boolean need_buffer; int h_in_group, v_in_group, h_out_group, v_out_group; upsample = (my_upsample_ptr) @@ -415,11 +310,6 @@ jinit_upsampler (j_decompress_ptr cinfo) if (cinfo->CCIR601_sampling) /* this isn't supported */ ERREXIT(cinfo, JERR_CCIR601_NOTIMPL); - /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1, - * so don't ask for it. - */ - do_fancy = cinfo->do_fancy_upsampling && cinfo->min_DCT_scaled_size > 1; - /* Verify we can handle the sampling factors, select per-component methods, * and create storage as needed. */ @@ -428,10 +318,10 @@ jinit_upsampler (j_decompress_ptr cinfo) /* Compute size of an "input group" after IDCT scaling. This many samples * are to be converted to max_h_samp_factor * max_v_samp_factor pixels. */ - h_in_group = (compptr->h_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; - v_in_group = (compptr->v_samp_factor * compptr->DCT_scaled_size) / - cinfo->min_DCT_scaled_size; + h_in_group = (compptr->h_samp_factor * compptr->DCT_h_scaled_size) / + cinfo->min_DCT_h_scaled_size; + v_in_group = (compptr->v_samp_factor * compptr->DCT_v_scaled_size) / + cinfo->min_DCT_v_scaled_size; h_out_group = cinfo->max_h_samp_factor; v_out_group = cinfo->max_v_samp_factor; upsample->rowgroup_height[ci] = v_in_group; /* save for use later */ @@ -446,19 +336,12 @@ jinit_upsampler (j_decompress_ptr cinfo) need_buffer = FALSE; } else if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) { - /* Special cases for 2h1v upsampling */ - if (do_fancy && compptr->downsampled_width > 2) - upsample->methods[ci] = h2v1_fancy_upsample; - else - upsample->methods[ci] = h2v1_upsample; + /* Special case for 2h1v upsampling */ + upsample->methods[ci] = h2v1_upsample; } else if (h_in_group * 2 == h_out_group && v_in_group * 2 == v_out_group) { - /* Special cases for 2h2v upsampling */ - if (do_fancy && compptr->downsampled_width > 2) { - upsample->methods[ci] = h2v2_fancy_upsample; - upsample->pub.need_context_rows = TRUE; - } else - upsample->methods[ci] = h2v2_upsample; + /* Special case for 2h2v upsampling */ + upsample->methods[ci] = h2v2_upsample; } else if ((h_out_group % h_in_group) == 0 && (v_out_group % v_in_group) == 0) { /* Generic integral-factors upsampling method */ diff --git a/code/jpeg-6b/jdtrans.c b/code/jpeg-8c/jdtrans.c similarity index 94% rename from code/jpeg-6b/jdtrans.c rename to code/jpeg-8c/jdtrans.c index 6c0ab715..22dd47fb 100644 --- a/code/jpeg-6b/jdtrans.c +++ b/code/jpeg-8c/jdtrans.c @@ -2,6 +2,7 @@ * jdtrans.c * * Copyright (C) 1995-1997, Thomas G. Lane. + * Modified 2000-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -99,18 +100,14 @@ transdecode_master_selection (j_decompress_ptr cinfo) /* This is effectively a buffered-image operation. */ cinfo->buffered_image = TRUE; + /* Compute output image dimensions and related values. */ + jpeg_core_output_dimensions(cinfo); + /* Entropy decoding: either Huffman or arithmetic coding. */ - if (cinfo->arith_code) { - ERREXIT(cinfo, JERR_ARITH_NOTIMPL); - } else { - if (cinfo->progressive_mode) { -#ifdef D_PROGRESSIVE_SUPPORTED - jinit_phuff_decoder(cinfo); -#else - ERREXIT(cinfo, JERR_NOT_COMPILED); -#endif - } else - jinit_huff_decoder(cinfo); + if (cinfo->arith_code) + jinit_arith_decoder(cinfo); + else { + jinit_huff_decoder(cinfo); } /* Always get a full-image coefficient buffer. */ diff --git a/code/jpeg-6b/jerror.c b/code/jpeg-8c/jerror.c similarity index 97% rename from code/jpeg-6b/jerror.c rename to code/jpeg-8c/jerror.c index 9df35d69..3da7be86 100644 --- a/code/jpeg-6b/jerror.c +++ b/code/jpeg-8c/jerror.c @@ -18,8 +18,6 @@ * These routines are used by both the compression and decompression code. */ -#include "../renderer/tr_local.h" - /* this is not a core library module, so it doesn't define JPEG_INTERNALS */ #include "jinclude.h" #include "jpeglib.h" @@ -71,15 +69,13 @@ const char * const jpeg_std_message_table[] = { METHODDEF(void) error_exit (j_common_ptr cinfo) { - char buffer[JMSG_LENGTH_MAX]; - - /* Create the message */ - (*cinfo->err->format_message) (cinfo, buffer); + /* Always display the message */ + (*cinfo->err->output_message) (cinfo); /* Let the memory manager delete any temp files before we die */ jpeg_destroy(cinfo); - ri.Error( ERR_FATAL, "%s\n", buffer ); + exit(EXIT_FAILURE); } @@ -112,7 +108,7 @@ output_message (j_common_ptr cinfo) MB_OK | MB_ICONERROR); #else /* Send it to stderr, adding a newline */ - ri.Printf(PRINT_ALL, "%s\n", buffer); + fprintf(stderr, "%s\n", buffer); #endif } diff --git a/code/jpeg-6b/jerror.h b/code/jpeg-8c/jerror.h similarity index 94% rename from code/jpeg-6b/jerror.h rename to code/jpeg-8c/jerror.h index fc2fffea..1cfb2b19 100644 --- a/code/jpeg-6b/jerror.h +++ b/code/jpeg-8c/jerror.h @@ -2,6 +2,7 @@ * jerror.h * * Copyright (C) 1994-1997, Thomas G. Lane. + * Modified 1997-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -39,14 +40,15 @@ typedef enum { JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */ /* For maintenance convenience, list is alphabetical by message code name */ -JMESSAGE(JERR_ARITH_NOTIMPL, - "Sorry, there are legal restrictions on arithmetic coding") JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix") JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix") JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode") JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS") +JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request") JMESSAGE(JERR_BAD_DCT_COEF, "DCT coefficient out of range") -JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported") +JMESSAGE(JERR_BAD_DCTSIZE, "DCT scaled block size %dx%d not supported") +JMESSAGE(JERR_BAD_DROP_SAMPLING, + "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c") JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition") JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace") JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace") @@ -93,6 +95,7 @@ JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data") JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change") JMESSAGE(JERR_NOTIMPL, "Not implemented yet") JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time") +JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined") JMESSAGE(JERR_NO_BACKING_STORE, "Backing store not supported") JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined") JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image") @@ -170,6 +173,7 @@ JMESSAGE(JTRC_UNKNOWN_IDS, JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u") JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u") JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d") +JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code") JMESSAGE(JWRN_BOGUS_PROGRESSION, "Inconsistent progression sequence for component %d coefficient %d") JMESSAGE(JWRN_EXTRANEOUS_DATA, @@ -227,6 +231,15 @@ JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines") (cinfo)->err->msg_parm.i[2] = (p3), \ (cinfo)->err->msg_parm.i[3] = (p4), \ (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) +#define ERREXIT6(cinfo,code,p1,p2,p3,p4,p5,p6) \ + ((cinfo)->err->msg_code = (code), \ + (cinfo)->err->msg_parm.i[0] = (p1), \ + (cinfo)->err->msg_parm.i[1] = (p2), \ + (cinfo)->err->msg_parm.i[2] = (p3), \ + (cinfo)->err->msg_parm.i[3] = (p4), \ + (cinfo)->err->msg_parm.i[4] = (p5), \ + (cinfo)->err->msg_parm.i[5] = (p6), \ + (*(cinfo)->err->error_exit) ((j_common_ptr) (cinfo))) #define ERREXITS(cinfo,code,str) \ ((cinfo)->err->msg_code = (code), \ strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \ diff --git a/code/jpeg-6b/jfdctflt.c b/code/jpeg-8c/jfdctflt.c similarity index 83% rename from code/jpeg-6b/jfdctflt.c rename to code/jpeg-8c/jfdctflt.c index 79d7a007..74d0d862 100644 --- a/code/jpeg-6b/jfdctflt.c +++ b/code/jpeg-8c/jfdctflt.c @@ -2,6 +2,7 @@ * jfdctflt.c * * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2003-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -56,41 +57,46 @@ */ GLOBAL(void) -jpeg_fdct_float (FAST_FLOAT * data) +jpeg_fdct_float (FAST_FLOAT * data, JSAMPARRAY sample_data, JDIMENSION start_col) { FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; FAST_FLOAT tmp10, tmp11, tmp12, tmp13; FAST_FLOAT z1, z2, z3, z4, z5, z11, z13; FAST_FLOAT *dataptr; + JSAMPROW elemptr; int ctr; /* Pass 1: process rows. */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[0] + dataptr[7]; - tmp7 = dataptr[0] - dataptr[7]; - tmp1 = dataptr[1] + dataptr[6]; - tmp6 = dataptr[1] - dataptr[6]; - tmp2 = dataptr[2] + dataptr[5]; - tmp5 = dataptr[2] - dataptr[5]; - tmp3 = dataptr[3] + dataptr[4]; - tmp4 = dataptr[3] - dataptr[4]; - + for (ctr = 0; ctr < DCTSIZE; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Load data into workspace */ + tmp0 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7])); + tmp7 = (FAST_FLOAT) (GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7])); + tmp1 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6])); + tmp6 = (FAST_FLOAT) (GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6])); + tmp2 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5])); + tmp5 = (FAST_FLOAT) (GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5])); + tmp3 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4])); + tmp4 = (FAST_FLOAT) (GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4])); + /* Even part */ - + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - - dataptr[0] = tmp10 + tmp11; /* phase 3 */ + + /* Apply unsigned->signed conversion */ + dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; - + /* Odd part */ tmp10 = tmp4 + tmp5; /* phase 2 */ @@ -126,21 +132,21 @@ jpeg_fdct_float (FAST_FLOAT * data) tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part */ - + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ dataptr[DCTSIZE*4] = tmp10 - tmp11; - + z1 = (tmp12 + tmp13) * ((FAST_FLOAT) 0.707106781); /* c4 */ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ dataptr[DCTSIZE*6] = tmp13 - z1; - + /* Odd part */ tmp10 = tmp4 + tmp5; /* phase 2 */ diff --git a/code/jpeg-6b/jfdctfst.c b/code/jpeg-8c/jfdctfst.c similarity index 89% rename from code/jpeg-6b/jfdctfst.c rename to code/jpeg-8c/jfdctfst.c index ccb378a3..8cad5f22 100644 --- a/code/jpeg-6b/jfdctfst.c +++ b/code/jpeg-8c/jfdctfst.c @@ -2,6 +2,7 @@ * jfdctfst.c * * Copyright (C) 1994-1996, Thomas G. Lane. + * Modified 2003-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -111,42 +112,47 @@ */ GLOBAL(void) -jpeg_fdct_ifast (DCTELEM * data) +jpeg_fdct_ifast (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) { DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; DCTELEM tmp10, tmp11, tmp12, tmp13; DCTELEM z1, z2, z3, z4, z5, z11, z13; DCTELEM *dataptr; + JSAMPROW elemptr; int ctr; SHIFT_TEMPS /* Pass 1: process rows. */ dataptr = data; - for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { - tmp0 = dataptr[0] + dataptr[7]; - tmp7 = dataptr[0] - dataptr[7]; - tmp1 = dataptr[1] + dataptr[6]; - tmp6 = dataptr[1] - dataptr[6]; - tmp2 = dataptr[2] + dataptr[5]; - tmp5 = dataptr[2] - dataptr[5]; - tmp3 = dataptr[3] + dataptr[4]; - tmp4 = dataptr[3] - dataptr[4]; - + for (ctr = 0; ctr < DCTSIZE; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Load data into workspace */ + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); + tmp7 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); + tmp6 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); + tmp5 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); + tmp4 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); + /* Even part */ - + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - - dataptr[0] = tmp10 + tmp11; /* phase 3 */ + + /* Apply unsigned->signed conversion */ + dataptr[0] = tmp10 + tmp11 - 8 * CENTERJSAMPLE; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; - + /* Odd part */ tmp10 = tmp4 + tmp5; /* phase 2 */ @@ -182,21 +188,21 @@ jpeg_fdct_ifast (DCTELEM * data) tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part */ - + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ dataptr[DCTSIZE*4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ dataptr[DCTSIZE*6] = tmp13 - z1; - + /* Odd part */ tmp10 = tmp4 + tmp5; /* phase 2 */ diff --git a/code/jpeg-8c/jfdctint.c b/code/jpeg-8c/jfdctint.c new file mode 100644 index 00000000..1dde58c4 --- /dev/null +++ b/code/jpeg-8c/jfdctint.c @@ -0,0 +1,4348 @@ +/* + * jfdctint.c + * + * Copyright (C) 1991-1996, Thomas G. Lane. + * Modification developed 2003-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains a slow-but-accurate integer implementation of the + * forward DCT (Discrete Cosine Transform). + * + * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT + * on each column. Direct algorithms are also available, but they are + * much more complex and seem not to be any faster when reduced to code. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + * + * We also provide FDCT routines with various input sample block sizes for + * direct resolution reduction or enlargement and for direct resolving the + * common 2x1 and 1x2 subsampling cases without additional resampling: NxN + * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block. + * + * For N<8 we fill the remaining block coefficients with zero. + * For N>8 we apply a partial N-point FDCT on the input samples, computing + * just the lower 8 frequency coefficients and discarding the rest. + * + * We must scale the output coefficients of the N-point FDCT appropriately + * to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling + * is folded into the constant multipliers (pass 2) and/or final/initial + * shifting. + * + * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases + * since there would be too many additional constants to pre-calculate. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" +#include "jdct.h" /* Private declarations for DCT subsystem */ + +#ifdef DCT_ISLOW_SUPPORTED + + +/* + * This module is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ +#endif + + +/* + * The poop on this scaling stuff is as follows: + * + * Each 1-D DCT step produces outputs which are a factor of sqrt(N) + * larger than the true DCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D DCT, + * because the y0 and y4 outputs need not be divided by sqrt(N). + * In the IJG code, this factor of 8 is removed by the quantization step + * (in jcdctmgr.c), NOT in this module. + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (For 12-bit sample data, the intermediate + * array is INT32 anyway.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#if BITS_IN_JSAMPLE == 8 +#define CONST_BITS 13 +#define PASS1_BITS 2 +#else +#define CONST_BITS 13 +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#endif + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 13 +#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ +#else +#define FIX_0_298631336 FIX(0.298631336) +#define FIX_0_390180644 FIX(0.390180644) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_765366865 FIX(0.765366865) +#define FIX_0_899976223 FIX(0.899976223) +#define FIX_1_175875602 FIX(1.175875602) +#define FIX_1_501321110 FIX(1.501321110) +#define FIX_1_847759065 FIX(1.847759065) +#define FIX_1_961570560 FIX(1.961570560) +#define FIX_2_053119869 FIX(2.053119869) +#define FIX_2_562915447 FIX(2.562915447) +#define FIX_3_072711026 FIX(3.072711026) +#endif + + +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. + * For 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#if BITS_IN_JSAMPLE == 8 +#define MULTIPLY(var,const) MULTIPLY16C16(var,const) +#else +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +/* + * Perform the forward DCT on one block of samples. + */ + +GLOBAL(void) +jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + for (ctr = 0; ctr < DCTSIZE; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); + + tmp10 = tmp0 + tmp3; + tmp12 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp13 = tmp1 - tmp2; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); + dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), + CONST_BITS-PASS1_BITS); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * cK represents sqrt(2) * cos(K*pi/16). + * i0..i3 in the paper are tmp0..tmp3 here. + */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ + tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + + tmp12 += z1; + tmp13 += z1; + + dataptr[1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) + RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) + RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) + RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + + /* Add fudge factor here for final descale. */ + tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); + tmp12 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp13 = tmp1 - tmp2; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS+PASS1_BITS-1); + dataptr[DCTSIZE*2] = (DCTELEM) + RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) + RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * cK represents sqrt(2) * cos(K*pi/16). + * i0..i3 in the paper are tmp0..tmp3 here. + */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS+PASS1_BITS-1); + + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ + tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + + tmp12 += z1; + tmp13 += z1; + + dataptr[DCTSIZE*1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) + RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*7] = (DCTELEM) + RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + +#ifdef DCT_SCALING_SUPPORTED + + +/* + * Perform the forward DCT on a 7x7 sample block. + */ + +GLOBAL(void) +jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12; + INT32 z1, z2, z3; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* cK represents sqrt(2) * cos(K*pi/14). */ + + dataptr = data; + for (ctr = 0; ctr < 7; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); + tmp3 = GETJSAMPLE(elemptr[3]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); + tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); + + z1 = tmp0 + tmp2; + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); + tmp3 += tmp3; + z1 -= tmp3; + z1 -= tmp3; + z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ + z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ + dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); + z1 -= z2; + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ + dataptr[4] = (DCTELEM) + DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ + tmp0 += tmp3; + tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/7)**2 = 64/49, which we fold + * into the constant multipliers: + * cK now represents sqrt(2) * cos(K*pi/14) * 64/49. + */ + + dataptr = data; + for (ctr = 0; ctr < 7; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; + tmp3 = dataptr[DCTSIZE*3]; + + tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; + tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; + tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; + + z1 = tmp0 + tmp2; + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ + CONST_BITS+PASS1_BITS); + tmp3 += tmp3; + z1 -= tmp3; + z1 -= tmp3; + z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ + z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS); + z1 -= z2; + z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ + tmp1 += tmp2; + tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ + tmp0 += tmp3; + tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 6x6 sample block. + */ + +GLOBAL(void) +jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2; + INT32 tmp10, tmp11, tmp12; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* cK represents sqrt(2) * cos(K*pi/12). */ + + dataptr = data; + for (ctr = 0; ctr < 6; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); + tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); + + tmp10 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); + dataptr[2] = (DCTELEM) + DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ + CONST_BITS-PASS1_BITS); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ + CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ + CONST_BITS-PASS1_BITS); + + dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); + dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); + dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/6)**2 = 16/9, which we fold + * into the constant multipliers: + * cK now represents sqrt(2) * cos(K*pi/12) * 16/9. + */ + + dataptr = data; + for (ctr = 0; ctr < 6; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; + tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; + + tmp10 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ + CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 5x5 sample block. + */ + +GLOBAL(void) +jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2; + INT32 tmp10, tmp11; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We scale the results further by 2 as part of output adaption */ + /* scaling for different DCT size. */ + /* cK represents sqrt(2) * cos(K*pi/10). */ + + dataptr = data; + for (ctr = 0; ctr < 5; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); + tmp2 = GETJSAMPLE(elemptr[2]); + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1)); + tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ + tmp10 -= tmp2 << 2; + tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ + dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1); + dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ + + dataptr[1] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ + CONST_BITS-PASS1_BITS-1); + dataptr[3] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ + CONST_BITS-PASS1_BITS-1); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/5)**2 = 64/25, which we partially + * fold into the constant multipliers (other part was done in pass 1): + * cK now represents sqrt(2) * cos(K*pi/10) * 32/25. + */ + + dataptr = data; + for (ctr = 0; ctr < 5; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; + tmp2 = dataptr[DCTSIZE*2]; + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ + CONST_BITS+PASS1_BITS); + tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ + tmp10 -= tmp2 << 2; + tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 4x4 sample block. + */ + +GLOBAL(void) +jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1; + INT32 tmp10, tmp11; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */ + /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ + + dataptr = data; + for (ctr = 0; ctr < 4; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2)); + dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2)); + + /* Odd part */ + + tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-3); + + dataptr[1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS-2); + dataptr[3] = (DCTELEM) + RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS-2); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = 0; ctr < 4; ctr++) { + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; + + tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; + tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; + + dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); + dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); + + /* Odd part */ + + tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); + + dataptr[DCTSIZE*1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 3x3 sample block. + */ + +GLOBAL(void) +jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We scale the results further by 2**2 as part of output adaption */ + /* scaling for different DCT size. */ + /* cK represents sqrt(2) * cos(K*pi/6). */ + + dataptr = data; + for (ctr = 0; ctr < 3; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); + tmp1 = GETJSAMPLE(elemptr[1]); + + tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2)); + dataptr[2] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ + CONST_BITS-PASS1_BITS-2); + + /* Odd part */ + + dataptr[1] = (DCTELEM) + DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ + CONST_BITS-PASS1_BITS-2); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/3)**2 = 64/9, which we partially + * fold into the constant multipliers (other part was done in pass 1): + * cK now represents sqrt(2) * cos(K*pi/6) * 16/9. + */ + + dataptr = data; + for (ctr = 0; ctr < 3; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; + tmp1 = dataptr[DCTSIZE*1]; + + tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ + CONST_BITS+PASS1_BITS); + + /* Odd part */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 2x2 sample block. + */ + +GLOBAL(void) +jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + JSAMPROW elemptr; + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT. */ + + /* Row 0 */ + elemptr = sample_data[0] + start_col; + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); + tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); + + /* Row 1 */ + elemptr = sample_data[1] + start_col; + + tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); + tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/2)**2 = 2**4. + */ + + /* Column 0 */ + /* Apply unsigned->signed conversion */ + data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4); + data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4); + + /* Column 1 */ + data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4); + data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4); +} + + +/* + * Perform the forward DCT on a 1x1 sample block. + */ + +GLOBAL(void) +jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* We leave the result scaled up by an overall factor of 8. */ + /* We must also scale the output by (8/1)**2 = 2**6. */ + /* Apply unsigned->signed conversion */ + data[0] = (DCTELEM) + ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6); +} + + +/* + * Perform the forward DCT on a 9x9 sample block. + */ + +GLOBAL(void) +jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2; + DCTELEM workspace[8]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* we scale the results further by 2 as part of output adaption */ + /* scaling for different DCT size. */ + /* cK represents sqrt(2) * cos(K*pi/18). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]); + tmp4 = GETJSAMPLE(elemptr[4]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]); + tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]); + tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]); + + z1 = tmp0 + tmp2 + tmp3; + z2 = tmp1 + tmp4; + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1); + dataptr[6] = (DCTELEM) + DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */ + CONST_BITS-1); + z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */ + z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */ + dataptr[2] = (DCTELEM) + DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */ + + z1 + z2, CONST_BITS-1); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */ + + z1 - z2, CONST_BITS-1); + + /* Odd part */ + + dataptr[3] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */ + CONST_BITS-1); + + tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */ + tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */ + tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1); + + tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */ + + dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1); + dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 9) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/9)**2 = 64/81, which we partially + * fold into the constant multipliers and final/initial shifting: + * cK now represents sqrt(2) * cos(K*pi/18) * 128/81. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5]; + tmp4 = dataptr[DCTSIZE*4]; + + tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0]; + tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7]; + tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6]; + tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5]; + + z1 = tmp0 + tmp2 + tmp3; + z2 = tmp1 + tmp4; + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */ + CONST_BITS+2); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */ + CONST_BITS+2); + z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */ + z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */ + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */ + + z1 + z2, CONST_BITS+2); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */ + + z1 - z2, CONST_BITS+2); + + /* Odd part */ + + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */ + CONST_BITS+2); + + tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */ + tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */ + tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2); + + tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */ + + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2); + dataptr[DCTSIZE*7] = (DCTELEM) + DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 10x10 sample block. + */ + +GLOBAL(void) +jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + DCTELEM workspace[8*2]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* we scale the results further by 2 as part of output adaption */ + /* scaling for different DCT size. */ + /* cK represents sqrt(2) * cos(K*pi/20). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); + tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); + + tmp10 = tmp0 + tmp4; + tmp13 = tmp0 - tmp4; + tmp11 = tmp1 + tmp3; + tmp14 = tmp1 - tmp3; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1); + tmp12 += tmp12; + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ + MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ + CONST_BITS-1); + tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ + dataptr[2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ + CONST_BITS-1); + dataptr[6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ + CONST_BITS-1); + + /* Odd part */ + + tmp10 = tmp0 + tmp4; + tmp11 = tmp1 - tmp3; + dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1); + tmp2 <<= CONST_BITS; + dataptr[1] = (DCTELEM) + DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ + MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ + MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ + MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ + CONST_BITS-1); + tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ + MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ + tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ + (tmp11 << (CONST_BITS - 1)) - tmp2; + dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1); + dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 10) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/10)**2 = 16/25, which we partially + * fold into the constant multipliers and final/initial shifting: + * cK now represents sqrt(2) * cos(K*pi/20) * 32/25. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; + tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; + tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; + + tmp10 = tmp0 + tmp4; + tmp13 = tmp0 - tmp4; + tmp11 = tmp1 + tmp3; + tmp14 = tmp1 - tmp3; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; + tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; + tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ + CONST_BITS+2); + tmp12 += tmp12; + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ + MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ + CONST_BITS+2); + tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ + CONST_BITS+2); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ + CONST_BITS+2); + + /* Odd part */ + + tmp10 = tmp0 + tmp4; + tmp11 = tmp1 - tmp3; + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ + CONST_BITS+2); + tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ + MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ + MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ + MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ + CONST_BITS+2); + tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ + MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ + tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ + MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on an 11x11 sample block. + */ + +GLOBAL(void) +jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + INT32 z1, z2, z3; + DCTELEM workspace[8*3]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* we scale the results further by 2 as part of output adaption */ + /* scaling for different DCT size. */ + /* cK represents sqrt(2) * cos(K*pi/22). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]); + tmp5 = GETJSAMPLE(elemptr[5]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]); + tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]); + tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]); + tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1); + tmp5 += tmp5; + tmp0 -= tmp5; + tmp1 -= tmp5; + tmp2 -= tmp5; + tmp3 -= tmp5; + tmp4 -= tmp5; + z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */ + MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */ + z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */ + z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */ + dataptr[2] = (DCTELEM) + DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */ + - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */ + CONST_BITS-1); + dataptr[4] = (DCTELEM) + DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */ + - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */ + + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */ + CONST_BITS-1); + dataptr[6] = (DCTELEM) + DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */ + - MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */ + CONST_BITS-1); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */ + tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */ + tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */ + tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */ + + MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */ + tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */ + tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */ + tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */ + - MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */ + tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */ + tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */ + + MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */ + tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */ + - MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1); + dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1); + dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1); + dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 11) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/11)**2 = 64/121, which we partially + * fold into the constant multipliers and final/initial shifting: + * cK now represents sqrt(2) * cos(K*pi/22) * 128/121. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7]; + tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6]; + tmp5 = dataptr[DCTSIZE*5]; + + tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2]; + tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1]; + tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0]; + tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7]; + tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5, + FIX(1.057851240)), /* 128/121 */ + CONST_BITS+2); + tmp5 += tmp5; + tmp0 -= tmp5; + tmp1 -= tmp5; + tmp2 -= tmp5; + tmp3 -= tmp5; + tmp4 -= tmp5; + z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */ + MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */ + z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */ + z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */ + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */ + - MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */ + CONST_BITS+2); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */ + - MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */ + + MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */ + CONST_BITS+2); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */ + - MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */ + CONST_BITS+2); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */ + tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */ + tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */ + tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */ + + MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */ + tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */ + tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */ + tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */ + - MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */ + tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */ + tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */ + + MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */ + tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */ + - MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 12x12 sample block. + */ + +GLOBAL(void) +jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + DCTELEM workspace[8*4]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT. */ + /* cK represents sqrt(2) * cos(K*pi/24). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); + + tmp10 = tmp0 + tmp5; + tmp13 = tmp0 - tmp5; + tmp11 = tmp1 + tmp4; + tmp14 = tmp1 - tmp4; + tmp12 = tmp2 + tmp3; + tmp15 = tmp2 - tmp3; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); + tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE); + dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ + CONST_BITS); + dataptr[2] = (DCTELEM) + DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ + CONST_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ + tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ + tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ + tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ + + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ + tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ + tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ + + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ + tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ + - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ + tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ + - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 12) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/12)**2 = 4/9, which we partially + * fold into the constant multipliers and final shifting: + * cK now represents sqrt(2) * cos(K*pi/24) * 8/9. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; + tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; + tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; + tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; + + tmp10 = tmp0 + tmp5; + tmp13 = tmp0 - tmp5; + tmp11 = tmp1 + tmp4; + tmp14 = tmp1 - tmp4; + tmp12 = tmp2 + tmp3; + tmp15 = tmp2 - tmp3; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; + tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; + tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; + tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; + tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ + CONST_BITS+1); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ + CONST_BITS+1); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ + CONST_BITS+1); + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ + MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ + CONST_BITS+1); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ + tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ + tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ + tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ + + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ + tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ + tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ + + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ + tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ + - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ + tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ + - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 13x13 sample block. + */ + +GLOBAL(void) +jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + INT32 z1, z2; + DCTELEM workspace[8*5]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT. */ + /* cK represents sqrt(2) * cos(K*pi/26). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]); + tmp6 = GETJSAMPLE(elemptr[6]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]); + tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]); + tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]); + tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]); + tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE); + tmp6 += tmp6; + tmp0 -= tmp6; + tmp1 -= tmp6; + tmp2 -= tmp6; + tmp3 -= tmp6; + tmp4 -= tmp6; + tmp5 -= tmp6; + dataptr[2] = (DCTELEM) + DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */ + MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */ + MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */ + MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */ + MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */ + MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */ + CONST_BITS); + z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */ + MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */ + MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */ + z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */ + MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */ + MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */ + + dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS); + dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */ + tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */ + tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */ + MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */ + MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */ + tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */ + MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */ + tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */ + tmp1 += tmp4 + tmp5 + + MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */ + MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */ + tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */ + tmp2 += tmp4 + tmp6 - + MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */ + MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */ + tmp3 += tmp5 + tmp6 + + MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */ + MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 13) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/13)**2 = 64/169, which we partially + * fold into the constant multipliers and final shifting: + * cK now represents sqrt(2) * cos(K*pi/26) * 128/169. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2]; + tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1]; + tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0]; + tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7]; + tmp6 = dataptr[DCTSIZE*6]; + + tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4]; + tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3]; + tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2]; + tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1]; + tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0]; + tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6, + FIX(0.757396450)), /* 128/169 */ + CONST_BITS+1); + tmp6 += tmp6; + tmp0 -= tmp6; + tmp1 -= tmp6; + tmp2 -= tmp6; + tmp3 -= tmp6; + tmp4 -= tmp6; + tmp5 -= tmp6; + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */ + MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */ + MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */ + MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */ + MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */ + MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */ + CONST_BITS+1); + z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */ + MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */ + MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */ + z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */ + MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */ + MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */ + + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */ + tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */ + tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */ + MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */ + MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */ + tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */ + MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */ + tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */ + tmp1 += tmp4 + tmp5 + + MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */ + MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */ + tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */ + tmp2 += tmp4 + tmp6 - + MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */ + MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */ + tmp3 += tmp5 + tmp6 + + MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */ + MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 14x14 sample block. + */ + +GLOBAL(void) +jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + DCTELEM workspace[8*6]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT. */ + /* cK represents sqrt(2) * cos(K*pi/28). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); + tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); + tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); + + tmp10 = tmp0 + tmp6; + tmp14 = tmp0 - tmp6; + tmp11 = tmp1 + tmp5; + tmp15 = tmp1 - tmp5; + tmp12 = tmp2 + tmp4; + tmp16 = tmp2 - tmp4; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); + tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); + tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE); + tmp13 += tmp13; + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ + MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ + MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ + CONST_BITS); + + tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ + + dataptr[2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ + + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ + CONST_BITS); + dataptr[6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ + - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ + CONST_BITS); + + /* Odd part */ + + tmp10 = tmp1 + tmp2; + tmp11 = tmp5 - tmp4; + dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6); + tmp3 <<= CONST_BITS; + tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ + tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ + tmp10 += tmp11 - tmp3; + tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ + MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ + dataptr[5] = (DCTELEM) + DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ + + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ + CONST_BITS); + tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ + MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ + dataptr[3] = (DCTELEM) + DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ + - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ + CONST_BITS); + dataptr[1] = (DCTELEM) + DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - + MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ + CONST_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 14) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/14)**2 = 16/49, which we partially + * fold into the constant multipliers and final shifting: + * cK now represents sqrt(2) * cos(K*pi/28) * 32/49. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; + tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; + tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; + tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; + + tmp10 = tmp0 + tmp6; + tmp14 = tmp0 - tmp6; + tmp11 = tmp1 + tmp5; + tmp15 = tmp1 - tmp5; + tmp12 = tmp2 + tmp4; + tmp16 = tmp2 - tmp4; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; + tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; + tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; + tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, + FIX(0.653061224)), /* 32/49 */ + CONST_BITS+1); + tmp13 += tmp13; + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ + MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ + MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ + CONST_BITS+1); + + tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ + + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ + + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ + CONST_BITS+1); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ + - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ + CONST_BITS+1); + + /* Odd part */ + + tmp10 = tmp1 + tmp2; + tmp11 = tmp5 - tmp4; + dataptr[DCTSIZE*7] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, + FIX(0.653061224)), /* 32/49 */ + CONST_BITS+1); + tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ + tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ + tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ + tmp10 += tmp11 - tmp3; + tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ + MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ + + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ + CONST_BITS+1); + tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ + MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ + - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ + CONST_BITS+1); + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp11 + tmp12 + tmp3 + - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ + - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ + CONST_BITS+1); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 15x15 sample block. + */ + +GLOBAL(void) +jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + INT32 z1, z2, z3; + DCTELEM workspace[8*7]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT. */ + /* cK represents sqrt(2) * cos(K*pi/30). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]); + tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]); + tmp7 = GETJSAMPLE(elemptr[7]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]); + tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]); + tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]); + tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]); + tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]); + tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]); + + z1 = tmp0 + tmp4 + tmp5; + z2 = tmp1 + tmp3 + tmp6; + z3 = tmp2 + tmp7; + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE); + z3 += z3; + dataptr[6] = (DCTELEM) + DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */ + MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */ + CONST_BITS); + tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; + z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */ + MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */ + z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */ + MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */ + z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */ + MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */ + MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */ + + dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS); + dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS); + + /* Odd part */ + + tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, + FIX(1.224744871)); /* c5 */ + tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */ + MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */ + tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */ + tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */ + MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */ + MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */ + tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */ + MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */ + MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */ + tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */ + MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */ + MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 15) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/15)**2 = 64/225, which we partially + * fold into the constant multipliers and final shifting: + * cK now represents sqrt(2) * cos(K*pi/30) * 256/225. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4]; + tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3]; + tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2]; + tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1]; + tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0]; + tmp7 = dataptr[DCTSIZE*7]; + + tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6]; + tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5]; + tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4]; + tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3]; + tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2]; + tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1]; + tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0]; + + z1 = tmp0 + tmp4 + tmp5; + z2 = tmp1 + tmp3 + tmp6; + z3 = tmp2 + tmp7; + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */ + CONST_BITS+2); + z3 += z3; + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */ + MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */ + CONST_BITS+2); + tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; + z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */ + MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */ + z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */ + MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */ + z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */ + MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */ + MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */ + + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2); + + /* Odd part */ + + tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, + FIX(1.393487498)); /* c5 */ + tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */ + MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */ + tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */ + tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */ + MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */ + MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */ + tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */ + MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */ + MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */ + tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */ + MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */ + MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 16x16 sample block. + */ + +GLOBAL(void) +jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; + DCTELEM workspace[DCTSIZE2]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* cK represents sqrt(2) * cos(K*pi/32). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); + tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); + tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); + + tmp10 = tmp0 + tmp7; + tmp14 = tmp0 - tmp7; + tmp11 = tmp1 + tmp6; + tmp15 = tmp1 - tmp6; + tmp12 = tmp2 + tmp5; + tmp16 = tmp2 - tmp5; + tmp13 = tmp3 + tmp4; + tmp17 = tmp3 - tmp4; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); + tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); + tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); + tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ + MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ + CONST_BITS-PASS1_BITS); + + tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ + MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ + + dataptr[2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ + + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ + - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ + CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ + MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ + MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ + MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ + tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ + MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ + tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ + MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ + tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ + MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ + MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ + tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ + - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ + tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ + + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ + tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ + + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == DCTSIZE * 2) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/16)**2 = 1/2**2. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; + tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; + tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; + tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; + + tmp10 = tmp0 + tmp7; + tmp14 = tmp0 - tmp7; + tmp11 = tmp1 + tmp6; + tmp15 = tmp1 - tmp6; + tmp12 = tmp2 + tmp5; + tmp16 = tmp2 - tmp5; + tmp13 = tmp3 + tmp4; + tmp17 = tmp3 - tmp4; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; + tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; + tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; + tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ + MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ + CONST_BITS+PASS1_BITS+2); + + tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ + MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ + + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ + + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */ + CONST_BITS+PASS1_BITS+2); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ + - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ + CONST_BITS+PASS1_BITS+2); + + /* Odd part */ + + tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ + MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ + MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ + MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ + tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ + MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ + tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ + MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ + tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ + MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ + MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ + tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ + - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ + tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ + + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ + tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ + + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 16x8 sample block. + * + * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; + INT32 z1; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ + + dataptr = data; + ctr = 0; + for (ctr = 0; ctr < DCTSIZE; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); + tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); + tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); + + tmp10 = tmp0 + tmp7; + tmp14 = tmp0 - tmp7; + tmp11 = tmp1 + tmp6; + tmp15 = tmp1 - tmp6; + tmp12 = tmp2 + tmp5; + tmp16 = tmp2 - tmp5; + tmp13 = tmp3 + tmp4; + tmp17 = tmp3 - tmp4; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); + tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); + tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); + tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ + MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ + CONST_BITS-PASS1_BITS); + + tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ + MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ + + dataptr[2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ + + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ + - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ + CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ + MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ + MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ + MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ + tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ + MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ + tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ + MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ + tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ + MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ + MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ + tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ + - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ + tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ + + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ + tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ + + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by 8/16 = 1/2. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + + tmp10 = tmp0 + tmp3; + tmp12 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp13 = tmp1 - tmp2; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), + CONST_BITS+PASS1_BITS+1); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + * i0..i3 in the paper are tmp0..tmp3 here. + */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ + tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + + tmp12 += z1; + tmp13 += z1; + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, + CONST_BITS+PASS1_BITS+1); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 14x7 sample block. + * + * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + INT32 z1, z2, z3; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Zero bottom row of output coefficient block. */ + MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ + + dataptr = data; + for (ctr = 0; ctr < 7; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); + tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); + tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); + + tmp10 = tmp0 + tmp6; + tmp14 = tmp0 - tmp6; + tmp11 = tmp1 + tmp5; + tmp15 = tmp1 - tmp5; + tmp12 = tmp2 + tmp4; + tmp16 = tmp2 - tmp4; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); + tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); + tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS); + tmp13 += tmp13; + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ + MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ + MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ + CONST_BITS-PASS1_BITS); + + tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ + + dataptr[2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ + + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ + - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ + CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp10 = tmp1 + tmp2; + tmp11 = tmp5 - tmp4; + dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS); + tmp3 <<= CONST_BITS; + tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ + tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ + tmp10 += tmp11 - tmp3; + tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ + MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ + dataptr[5] = (DCTELEM) + DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ + + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ + CONST_BITS-PASS1_BITS); + tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ + MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ + dataptr[3] = (DCTELEM) + DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ + - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ + CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) + DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - + MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ + CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/14)*(8/7) = 32/49, which we + * partially fold into the constant multipliers and final shifting: + * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; + tmp3 = dataptr[DCTSIZE*3]; + + tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; + tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; + tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; + + z1 = tmp0 + tmp2; + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ + CONST_BITS+PASS1_BITS+1); + tmp3 += tmp3; + z1 -= tmp3; + z1 -= tmp3; + z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ + z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1); + z1 -= z2; + z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ + tmp1 += tmp2; + tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ + tmp0 += tmp3; + tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 12x6 sample block. + * + * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Zero 2 bottom rows of output coefficient block. */ + MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ + + dataptr = data; + for (ctr = 0; ctr < 6; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); + tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); + + tmp10 = tmp0 + tmp5; + tmp13 = tmp0 - tmp5; + tmp11 = tmp1 + tmp4; + tmp14 = tmp1 - tmp4; + tmp12 = tmp2 + tmp3; + tmp15 = tmp2 - tmp3; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); + tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS); + dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ + CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) + DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ + CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ + tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ + tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ + tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ + + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ + tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ + tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ + + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ + tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ + - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ + tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ + - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/12)*(8/6) = 8/9, which we + * partially fold into the constant multipliers and final shifting: + * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; + tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; + + tmp10 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ + CONST_BITS+PASS1_BITS+1); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS+1); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 10x5 sample block. + * + * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Zero 3 bottom rows of output coefficient block. */ + MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ + + dataptr = data; + for (ctr = 0; ctr < 5; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); + tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); + tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); + + tmp10 = tmp0 + tmp4; + tmp13 = tmp0 - tmp4; + tmp11 = tmp1 + tmp3; + tmp14 = tmp1 - tmp3; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); + tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS); + tmp12 += tmp12; + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ + MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ + CONST_BITS-PASS1_BITS); + tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ + dataptr[2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ + CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp10 = tmp0 + tmp4; + tmp11 = tmp1 - tmp3; + dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS); + tmp2 <<= CONST_BITS; + dataptr[1] = (DCTELEM) + DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ + MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ + MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ + MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ + CONST_BITS-PASS1_BITS); + tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ + MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ + tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ + (tmp11 << (CONST_BITS - 1)) - tmp2; + dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/10)*(8/5) = 32/25, which we + * fold into the constant multipliers: + * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25. + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; + tmp2 = dataptr[DCTSIZE*2]; + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ + CONST_BITS+PASS1_BITS); + tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ + tmp10 -= tmp2 << 2; + tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ + dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on an 8x4 sample block. + * + * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Zero 4 bottom rows of output coefficient block. */ + MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We must also scale the output by 8/4 = 2, which we add here. */ + + dataptr = data; + for (ctr = 0; ctr < 4; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); + + tmp10 = tmp0 + tmp3; + tmp12 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp13 = tmp1 - tmp2; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1)); + dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1)); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-2); + dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), + CONST_BITS-PASS1_BITS-1); + dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), + CONST_BITS-PASS1_BITS-1); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + * i0..i3 in the paper are tmp0..tmp3 here. + */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-2); + + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ + tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + + tmp12 += z1; + tmp13 += z1; + + dataptr[1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1); + dataptr[3] = (DCTELEM) + RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1); + dataptr[5] = (DCTELEM) + RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1); + dataptr[7] = (DCTELEM) + RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; + + tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; + tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; + + dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); + dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); + + /* Odd part */ + + tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); + + dataptr[DCTSIZE*1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 6x3 sample block. + * + * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2; + INT32 tmp10, tmp11, tmp12; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We scale the results further by 2 as part of output adaption */ + /* scaling for different DCT size. */ + /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + + dataptr = data; + for (ctr = 0; ctr < 3; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); + tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); + + tmp10 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1)); + dataptr[2] = (DCTELEM) + DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ + CONST_BITS-PASS1_BITS-1); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ + CONST_BITS-PASS1_BITS-1); + + /* Odd part */ + + tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ + CONST_BITS-PASS1_BITS-1); + + dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1))); + dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1)); + dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1))); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially + * fold into the constant multipliers (other part was done in pass 1): + * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9. + */ + + dataptr = data; + for (ctr = 0; ctr < 6; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; + tmp1 = dataptr[DCTSIZE*1]; + + tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ + CONST_BITS+PASS1_BITS); + + /* Odd part */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 4x2 sample block. + * + * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1; + INT32 tmp10, tmp11; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */ + /* 4-point FDCT kernel, */ + /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ + + dataptr = data; + for (ctr = 0; ctr < 2; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3)); + dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3)); + + /* Odd part */ + + tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-4); + + dataptr[1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS-3); + dataptr[3] = (DCTELEM) + RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS-3); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = 0; ctr < 4; ctr++) { + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1)); + tmp1 = dataptr[DCTSIZE*1]; + + dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); + + /* Odd part */ + + dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 2x1 sample block. + * + * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1; + JSAMPROW elemptr; + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + elemptr = sample_data[0] + start_col; + + tmp0 = GETJSAMPLE(elemptr[0]); + tmp1 = GETJSAMPLE(elemptr[1]); + + /* We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/2)*(8/1) = 2**5. + */ + + /* Even part */ + /* Apply unsigned->signed conversion */ + data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); + + /* Odd part */ + data[1] = (DCTELEM) ((tmp0 - tmp1) << 5); +} + + +/* + * Perform the forward DCT on an 8x16 sample block. + * + * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; + INT32 z1; + DCTELEM workspace[DCTSIZE2]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); + tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); + + tmp10 = tmp0 + tmp3; + tmp12 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp13 = tmp1 - tmp2; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); + tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); + dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), + CONST_BITS-PASS1_BITS); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + * i0..i3 in the paper are tmp0..tmp3 here. + */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ + tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + + tmp12 += z1; + tmp13 += z1; + + dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); + dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == DCTSIZE * 2) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by 8/16 = 1/2. + * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). + */ + + dataptr = data; + wsptr = workspace; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; + tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; + tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; + tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; + + tmp10 = tmp0 + tmp7; + tmp14 = tmp0 - tmp7; + tmp11 = tmp1 + tmp6; + tmp15 = tmp1 - tmp6; + tmp12 = tmp2 + tmp5; + tmp16 = tmp2 - tmp5; + tmp13 = tmp3 + tmp4; + tmp17 = tmp3 - tmp4; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; + tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; + tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; + tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; + tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ + MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ + CONST_BITS+PASS1_BITS+1); + + tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ + MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ + + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ + + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ + CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ + - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ + CONST_BITS+PASS1_BITS+1); + + /* Odd part */ + + tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ + MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ + MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ + MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ + tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ + MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ + tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ + MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ + tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ + MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ + MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ + tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ + - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ + tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ + + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ + tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ + + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 7x14 sample block. + * + * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + INT32 z1, z2, z3; + DCTELEM workspace[8*6]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); + tmp3 = GETJSAMPLE(elemptr[3]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); + tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); + + z1 = tmp0 + tmp2; + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); + tmp3 += tmp3; + z1 -= tmp3; + z1 -= tmp3; + z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ + z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ + dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); + z1 -= z2; + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ + dataptr[4] = (DCTELEM) + DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ + CONST_BITS-PASS1_BITS); + dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ + tmp0 += tmp3; + tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ + + dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); + dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 14) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/7)*(8/14) = 32/49, which we + * fold into the constant multipliers: + * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; + tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; + tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; + tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; + + tmp10 = tmp0 + tmp6; + tmp14 = tmp0 - tmp6; + tmp11 = tmp1 + tmp5; + tmp15 = tmp1 - tmp5; + tmp12 = tmp2 + tmp4; + tmp16 = tmp2 - tmp4; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; + tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; + tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; + tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, + FIX(0.653061224)), /* 32/49 */ + CONST_BITS+PASS1_BITS); + tmp13 += tmp13; + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ + MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ + MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ + CONST_BITS+PASS1_BITS); + + tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ + + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ + + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ + - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ + CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp10 = tmp1 + tmp2; + tmp11 = tmp5 - tmp4; + dataptr[DCTSIZE*7] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, + FIX(0.653061224)), /* 32/49 */ + CONST_BITS+PASS1_BITS); + tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ + tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ + tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ + tmp10 += tmp11 - tmp3; + tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ + MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ + + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ + CONST_BITS+PASS1_BITS); + tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ + MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ + - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp11 + tmp12 + tmp3 + - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ + - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 6x12 sample block. + * + * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + DCTELEM workspace[8*4]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); + tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); + tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); + + tmp10 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); + tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); + dataptr[2] = (DCTELEM) + DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ + CONST_BITS-PASS1_BITS); + dataptr[4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ + CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ + CONST_BITS-PASS1_BITS); + + dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); + dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); + dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 12) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/6)*(8/12) = 8/9, which we + * fold into the constant multipliers: + * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; + tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; + tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; + tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; + tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; + + tmp10 = tmp0 + tmp5; + tmp13 = tmp0 - tmp5; + tmp11 = tmp1 + tmp4; + tmp14 = tmp1 - tmp4; + tmp12 = tmp2 + tmp3; + tmp15 = tmp2 - tmp3; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; + tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; + tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; + tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; + tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ + MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ + CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ + tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ + tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ + tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ + tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ + tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ + + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ + tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ + tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ + + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ + tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ + - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ + tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ + - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ + + dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 5x10 sample block. + * + * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4; + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + DCTELEM workspace[8*2]; + DCTELEM *dataptr; + DCTELEM *wsptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ + + dataptr = data; + ctr = 0; + for (;;) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); + tmp2 = GETJSAMPLE(elemptr[2]); + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + + tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); + tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS); + tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ + tmp10 -= tmp2 << 2; + tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ + dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS); + dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ + + dataptr[1] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ + CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ + CONST_BITS-PASS1_BITS); + + ctr++; + + if (ctr != DCTSIZE) { + if (ctr == 10) + break; /* Done. */ + dataptr += DCTSIZE; /* advance pointer to next row */ + } else + dataptr = workspace; /* switch pointer to extended workspace */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/5)*(8/10) = 32/25, which we + * fold into the constant multipliers: + * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25. + */ + + dataptr = data; + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; + tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; + tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; + + tmp10 = tmp0 + tmp4; + tmp13 = tmp0 - tmp4; + tmp11 = tmp1 + tmp3; + tmp14 = tmp1 - tmp3; + + tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; + tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; + tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ + CONST_BITS+PASS1_BITS); + tmp12 += tmp12; + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ + MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ + CONST_BITS+PASS1_BITS); + tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) + DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ + CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp10 = tmp0 + tmp4; + tmp11 = tmp1 - tmp3; + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ + CONST_BITS+PASS1_BITS); + tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ + MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ + MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ + MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ + CONST_BITS+PASS1_BITS); + tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ + MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ + tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ + MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ + dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + wsptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 4x8 sample block. + * + * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We must also scale the output by 8/4 = 2, which we add here. */ + /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ + + dataptr = data; + for (ctr = 0; ctr < DCTSIZE; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); + tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); + + tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); + tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1)); + dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1)); + + /* Odd part */ + + tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-2); + + dataptr[1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS-1); + dataptr[3] = (DCTELEM) + RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS-1); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + */ + + dataptr = data; + for (ctr = 0; ctr < 4; ctr++) { + /* Even part per LL&M figure 1 --- note that published figure is faulty; + * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". + */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; + + /* Add fudge factor here for final descale. */ + tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); + tmp12 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp13 = tmp1 - tmp2; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; + + dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS+PASS1_BITS-1); + dataptr[DCTSIZE*2] = (DCTELEM) + RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*6] = (DCTELEM) + RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); + + /* Odd part per figure 8 --- note paper omits factor of sqrt(2). + * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + * i0..i3 in the paper are tmp0..tmp3 here. + */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp0 + tmp2; + tmp13 = tmp1 + tmp3; + z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS+PASS1_BITS-1); + + tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ + tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ + tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ + tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ + tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ + tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ + tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ + tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ + + tmp12 += z1; + tmp13 += z1; + + dataptr[DCTSIZE*1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) + RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*7] = (DCTELEM) + RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 3x6 sample block. + * + * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1, tmp2; + INT32 tmp10, tmp11, tmp12; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + /* We scale the results further by 2 as part of output adaption */ + /* scaling for different DCT size. */ + /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ + + dataptr = data; + for (ctr = 0; ctr < 6; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); + tmp1 = GETJSAMPLE(elemptr[1]); + + tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) + ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1)); + dataptr[2] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ + CONST_BITS-PASS1_BITS-1); + + /* Odd part */ + + dataptr[1] = (DCTELEM) + DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ + CONST_BITS-PASS1_BITS-1); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We remove the PASS1_BITS scaling, but leave the results scaled up + * by an overall factor of 8. + * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially + * fold into the constant multipliers (other part was done in pass 1): + * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. + */ + + dataptr = data; + for (ctr = 0; ctr < 3; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; + tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; + + tmp10 = tmp0 + tmp2; + tmp12 = tmp0 - tmp2; + + tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; + tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; + tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; + + dataptr[DCTSIZE*0] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*2] = (DCTELEM) + DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*4] = (DCTELEM) + DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ + CONST_BITS+PASS1_BITS); + + /* Odd part */ + + tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ + + dataptr[DCTSIZE*1] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + dataptr[DCTSIZE*5] = (DCTELEM) + DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ + CONST_BITS+PASS1_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 2x4 sample block. + * + * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1; + INT32 tmp10, tmp11; + DCTELEM *dataptr; + JSAMPROW elemptr; + int ctr; + SHIFT_TEMPS + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true DCT. */ + /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */ + + dataptr = data; + for (ctr = 0; ctr < 4; ctr++) { + elemptr = sample_data[ctr] + start_col; + + /* Even part */ + + tmp0 = GETJSAMPLE(elemptr[0]); + tmp1 = GETJSAMPLE(elemptr[1]); + + /* Apply unsigned->signed conversion */ + dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3); + + /* Odd part */ + + dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3); + + dataptr += DCTSIZE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. + * We leave the results scaled up by an overall factor of 8. + * 4-point FDCT kernel, + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. + */ + + dataptr = data; + for (ctr = 0; ctr < 2; ctr++) { + /* Even part */ + + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3]; + tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; + + tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; + tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; + + dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1); + dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1); + + /* Odd part */ + + tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-1); + + dataptr[DCTSIZE*1] = (DCTELEM) + RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ + CONST_BITS); + dataptr[DCTSIZE*3] = (DCTELEM) + RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ + CONST_BITS); + + dataptr++; /* advance pointer to next column */ + } +} + + +/* + * Perform the forward DCT on a 1x2 sample block. + * + * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). + */ + +GLOBAL(void) +jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) +{ + INT32 tmp0, tmp1; + + /* Pre-zero output coefficient block. */ + MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); + + tmp0 = GETJSAMPLE(sample_data[0][start_col]); + tmp1 = GETJSAMPLE(sample_data[1][start_col]); + + /* We leave the results scaled up by an overall factor of 8. + * We must also scale the output by (8/1)*(8/2) = 2**5. + */ + + /* Even part */ + /* Apply unsigned->signed conversion */ + data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); + + /* Odd part */ + data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5); +} + +#endif /* DCT_SCALING_SUPPORTED */ +#endif /* DCT_ISLOW_SUPPORTED */ diff --git a/code/jpeg-6b/jidctflt.c b/code/jpeg-8c/jidctflt.c similarity index 83% rename from code/jpeg-6b/jidctflt.c rename to code/jpeg-8c/jidctflt.c index 0188ce3d..23ae9d33 100644 --- a/code/jpeg-6b/jidctflt.c +++ b/code/jpeg-8c/jidctflt.c @@ -2,6 +2,7 @@ * jidctflt.c * * Copyright (C) 1994-1998, Thomas G. Lane. + * Modified 2010 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -76,10 +77,9 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, FLOAT_MULT_TYPE * quantptr; FAST_FLOAT * wsptr; JSAMPROW outptr; - JSAMPLE *range_limit = IDCT_range_limit(cinfo); + JSAMPLE *range_limit = cinfo->sample_range_limit; int ctr; FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */ - SHIFT_TEMPS /* Pass 1: process columns from input, store into work array. */ @@ -152,12 +152,12 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); /* 2*c4 */ z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ - tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ + tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ tmp6 = tmp12 - tmp7; /* phase 2 */ tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; + tmp4 = tmp10 - tmp5; wsptr[DCTSIZE*0] = tmp0 + tmp7; wsptr[DCTSIZE*7] = tmp0 - tmp7; @@ -165,8 +165,8 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, wsptr[DCTSIZE*6] = tmp1 - tmp6; wsptr[DCTSIZE*2] = tmp2 + tmp5; wsptr[DCTSIZE*5] = tmp2 - tmp5; - wsptr[DCTSIZE*4] = tmp3 + tmp4; - wsptr[DCTSIZE*3] = tmp3 - tmp4; + wsptr[DCTSIZE*3] = tmp3 + tmp4; + wsptr[DCTSIZE*4] = tmp3 - tmp4; inptr++; /* advance pointers to next column */ quantptr++; @@ -174,7 +174,6 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, } /* Pass 2: process rows from work array, store into output array. */ - /* Note that we must descale the results by a factor of 8 == 2**3. */ wsptr = workspace; for (ctr = 0; ctr < DCTSIZE; ctr++) { @@ -187,8 +186,10 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, /* Even part */ - tmp10 = wsptr[0] + wsptr[4]; - tmp11 = wsptr[0] - wsptr[4]; + /* Apply signed->unsigned and prepare float->int conversion */ + z5 = wsptr[0] + ((FAST_FLOAT) CENTERJSAMPLE + (FAST_FLOAT) 0.5); + tmp10 = z5 + wsptr[4]; + tmp11 = z5 - wsptr[4]; tmp13 = wsptr[2] + wsptr[6]; tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT) 1.414213562) - tmp13; @@ -209,31 +210,23 @@ jpeg_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr, tmp11 = (z11 - z13) * ((FAST_FLOAT) 1.414213562); z5 = (z10 + z12) * ((FAST_FLOAT) 1.847759065); /* 2*c2 */ - tmp10 = ((FAST_FLOAT) 1.082392200) * z12 - z5; /* 2*(c2-c6) */ - tmp12 = ((FAST_FLOAT) -2.613125930) * z10 + z5; /* -2*(c2+c6) */ + tmp10 = z5 - z12 * ((FAST_FLOAT) 1.082392200); /* 2*(c2-c6) */ + tmp12 = z5 - z10 * ((FAST_FLOAT) 2.613125930); /* 2*(c2+c6) */ tmp6 = tmp12 - tmp7; tmp5 = tmp11 - tmp6; - tmp4 = tmp10 + tmp5; + tmp4 = tmp10 - tmp5; - /* Final output stage: scale down by a factor of 8 and range-limit */ + /* Final output stage: float->int conversion and range-limit */ - outptr[0] = range_limit[(int) DESCALE((INT32) (tmp0 + tmp7), 3) - & RANGE_MASK]; - outptr[7] = range_limit[(int) DESCALE((INT32) (tmp0 - tmp7), 3) - & RANGE_MASK]; - outptr[1] = range_limit[(int) DESCALE((INT32) (tmp1 + tmp6), 3) - & RANGE_MASK]; - outptr[6] = range_limit[(int) DESCALE((INT32) (tmp1 - tmp6), 3) - & RANGE_MASK]; - outptr[2] = range_limit[(int) DESCALE((INT32) (tmp2 + tmp5), 3) - & RANGE_MASK]; - outptr[5] = range_limit[(int) DESCALE((INT32) (tmp2 - tmp5), 3) - & RANGE_MASK]; - outptr[4] = range_limit[(int) DESCALE((INT32) (tmp3 + tmp4), 3) - & RANGE_MASK]; - outptr[3] = range_limit[(int) DESCALE((INT32) (tmp3 - tmp4), 3) - & RANGE_MASK]; + outptr[0] = range_limit[((int) (tmp0 + tmp7)) & RANGE_MASK]; + outptr[7] = range_limit[((int) (tmp0 - tmp7)) & RANGE_MASK]; + outptr[1] = range_limit[((int) (tmp1 + tmp6)) & RANGE_MASK]; + outptr[6] = range_limit[((int) (tmp1 - tmp6)) & RANGE_MASK]; + outptr[2] = range_limit[((int) (tmp2 + tmp5)) & RANGE_MASK]; + outptr[5] = range_limit[((int) (tmp2 - tmp5)) & RANGE_MASK]; + outptr[3] = range_limit[((int) (tmp3 + tmp4)) & RANGE_MASK]; + outptr[4] = range_limit[((int) (tmp3 - tmp4)) & RANGE_MASK]; wsptr += DCTSIZE; /* advance pointer to next row */ } diff --git a/code/jpeg-6b/jidctfst.c b/code/jpeg-8c/jidctfst.c similarity index 100% rename from code/jpeg-6b/jidctfst.c rename to code/jpeg-8c/jidctfst.c diff --git a/code/jpeg-8c/jidctint.c b/code/jpeg-8c/jidctint.c new file mode 100644 index 00000000..dcdf7ce4 --- /dev/null +++ b/code/jpeg-8c/jidctint.c @@ -0,0 +1,5137 @@ +/* + * jidctint.c + * + * Copyright (C) 1991-1998, Thomas G. Lane. + * Modification developed 2002-2009 by Guido Vollbeding. + * This file is part of the Independent JPEG Group's software. + * For conditions of distribution and use, see the accompanying README file. + * + * This file contains a slow-but-accurate integer implementation of the + * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine + * must also perform dequantization of the input coefficients. + * + * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT + * on each row (or vice versa, but it's more convenient to emit a row at + * a time). Direct algorithms are also available, but they are much more + * complex and seem not to be any faster when reduced to code. + * + * This implementation is based on an algorithm described in + * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT + * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, + * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. + * The primary algorithm described there uses 11 multiplies and 29 adds. + * We use their alternate method with 12 multiplies and 32 adds. + * The advantage of this method is that no data path contains more than one + * multiplication; this allows a very simple and accurate implementation in + * scaled fixed-point arithmetic, with a minimal number of shifts. + * + * We also provide IDCT routines with various output sample block sizes for + * direct resolution reduction or enlargement and for direct resolving the + * common 2x1 and 1x2 subsampling cases without additional resampling: NxN + * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block. + * + * For N<8 we simply take the corresponding low-frequency coefficients of + * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block + * to yield the downscaled outputs. + * This can be seen as direct low-pass downsampling from the DCT domain + * point of view rather than the usual spatial domain point of view, + * yielding significant computational savings and results at least + * as good as common bilinear (averaging) spatial downsampling. + * + * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as + * lower frequencies and higher frequencies assumed to be zero. + * It turns out that the computational effort is similar to the 8x8 IDCT + * regarding the output size. + * Furthermore, the scaling and descaling is the same for all IDCT sizes. + * + * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases + * since there would be too many additional constants to pre-calculate. + */ + +#define JPEG_INTERNALS +#include "jinclude.h" +#include "jpeglib.h" +#include "jdct.h" /* Private declarations for DCT subsystem */ + +#ifdef DCT_ISLOW_SUPPORTED + + +/* + * This module is specialized to the case DCTSIZE = 8. + */ + +#if DCTSIZE != 8 + Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ +#endif + + +/* + * The poop on this scaling stuff is as follows: + * + * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) + * larger than the true IDCT outputs. The final outputs are therefore + * a factor of N larger than desired; since N=8 this can be cured by + * a simple right shift at the end of the algorithm. The advantage of + * this arrangement is that we save two multiplications per 1-D IDCT, + * because the y0 and y4 inputs need not be divided by sqrt(N). + * + * We have to do addition and subtraction of the integer inputs, which + * is no problem, and multiplication by fractional constants, which is + * a problem to do in integer arithmetic. We multiply all the constants + * by CONST_SCALE and convert them to integer constants (thus retaining + * CONST_BITS bits of precision in the constants). After doing a + * multiplication we have to divide the product by CONST_SCALE, with proper + * rounding, to produce the correct output. This division can be done + * cheaply as a right shift of CONST_BITS bits. We postpone shifting + * as long as possible so that partial sums can be added together with + * full fractional precision. + * + * The outputs of the first pass are scaled up by PASS1_BITS bits so that + * they are represented to better-than-integral precision. These outputs + * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word + * with the recommended scaling. (To scale up 12-bit sample data further, an + * intermediate INT32 array would be needed.) + * + * To avoid overflow of the 32-bit intermediate results in pass 2, we must + * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis + * shows that the values given below are the most effective. + */ + +#if BITS_IN_JSAMPLE == 8 +#define CONST_BITS 13 +#define PASS1_BITS 2 +#else +#define CONST_BITS 13 +#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ +#endif + +/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus + * causing a lot of useless floating-point operations at run time. + * To get around this we use the following pre-calculated constants. + * If you change CONST_BITS you may want to add appropriate values. + * (With a reasonable C compiler, you can just rely on the FIX() macro...) + */ + +#if CONST_BITS == 13 +#define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ +#else +#define FIX_0_298631336 FIX(0.298631336) +#define FIX_0_390180644 FIX(0.390180644) +#define FIX_0_541196100 FIX(0.541196100) +#define FIX_0_765366865 FIX(0.765366865) +#define FIX_0_899976223 FIX(0.899976223) +#define FIX_1_175875602 FIX(1.175875602) +#define FIX_1_501321110 FIX(1.501321110) +#define FIX_1_847759065 FIX(1.847759065) +#define FIX_1_961570560 FIX(1.961570560) +#define FIX_2_053119869 FIX(2.053119869) +#define FIX_2_562915447 FIX(2.562915447) +#define FIX_3_072711026 FIX(3.072711026) +#endif + + +/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. + * For 8-bit samples with the recommended scaling, all the variable + * and constant values involved are no more than 16 bits wide, so a + * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. + * For 12-bit samples, a full 32-bit multiplication will be needed. + */ + +#if BITS_IN_JSAMPLE == 8 +#define MULTIPLY(var,const) MULTIPLY16C16(var,const) +#else +#define MULTIPLY(var,const) ((var) * (const)) +#endif + + +/* Dequantize a coefficient by multiplying it by the multiplier-table + * entry; produce an int result. In this module, both inputs and result + * are 16 bits or less, so either int or short multiply will work. + */ + +#define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) + + +/* + * Perform dequantization and inverse DCT on one block of coefficients. + */ + +GLOBAL(void) +jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[DCTSIZE2]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = DCTSIZE; ctr > 0; ctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any column in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * column DCT calculations can be simplified this way. + */ + + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { + /* AC terms all zero */ + int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; + + wsptr[DCTSIZE*0] = dcval; + wsptr[DCTSIZE*1] = dcval; + wsptr[DCTSIZE*2] = dcval; + wsptr[DCTSIZE*3] = dcval; + wsptr[DCTSIZE*4] = dcval; + wsptr[DCTSIZE*5] = dcval; + wsptr[DCTSIZE*6] = dcval; + wsptr[DCTSIZE*7] = dcval; + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); + + z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 <<= CONST_BITS; + z3 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z2 += ONE << (CONST_BITS-PASS1_BITS-1); + + tmp0 = z2 + z3; + tmp1 = z2 - z3; + + tmp10 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + tmp11 = tmp1 + tmp3; + tmp12 = tmp1 - tmp3; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + + z2 = tmp0 + tmp2; + z3 = tmp1 + tmp3; + + z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ + z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z2 += z1; + z3 += z1; + + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + tmp0 += z1 + z2; + tmp3 += z1 + z3; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp1 += z1 + z3; + tmp2 += z1 + z2; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + } + + /* Pass 2: process rows from work array, store into output array. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + wsptr = workspace; + for (ctr = 0; ctr < DCTSIZE; ctr++) { + outptr = output_buf[ctr] + output_col; + /* Rows of zeroes can be exploited in the same way as we did with columns. + * However, the column calculation has created many nonzero AC terms, so + * the simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + +#ifndef NO_ZERO_ROW_TEST + if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && + wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { + /* AC terms all zero */ + JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) + & RANGE_MASK]; + + outptr[0] = dcval; + outptr[1] = dcval; + outptr[2] = dcval; + outptr[3] = dcval; + outptr[4] = dcval; + outptr[5] = dcval; + outptr[6] = dcval; + outptr[7] = dcval; + + wsptr += DCTSIZE; /* advance pointer to next row */ + continue; + } +#endif + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); + + /* Add fudge factor here for final descale. */ + z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = (INT32) wsptr[4]; + + tmp0 = (z2 + z3) << CONST_BITS; + tmp1 = (z2 - z3) << CONST_BITS; + + tmp10 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + tmp11 = tmp1 + tmp3; + tmp12 = tmp1 - tmp3; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = (INT32) wsptr[7]; + tmp1 = (INT32) wsptr[5]; + tmp2 = (INT32) wsptr[3]; + tmp3 = (INT32) wsptr[1]; + + z2 = tmp0 + tmp2; + z3 = tmp1 + tmp3; + + z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ + z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z2 += z1; + z3 += z1; + + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + tmp0 += z1 + z2; + tmp3 += z1 + z3; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp1 += z1 + z3; + tmp2 += z1 + z2; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ + } +} + +#ifdef IDCT_SCALING_SUPPORTED + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 7x7 output block. + * + * Optimized algorithm with 12 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/14). + */ + +GLOBAL(void) +jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[7*7]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp13 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp0 = z1 + z3; + z2 -= tmp0; + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp0 += z2; + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 7 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp13 <<= CONST_BITS; + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[4]; + z3 = (INT32) wsptr[6]; + + tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp0 = z1 + z3; + z2 -= tmp0; + tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ + tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + + tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp0 = tmp1 - tmp2; + tmp1 += tmp2; + tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp1 += tmp2; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp0 += z2; + tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 7; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 6x6 output block. + * + * Optimized algorithm with 3 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/12). + */ + +GLOBAL(void) +jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[6*6]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); + tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); + tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); + tmp1 = (z1 - z2 - z3) << PASS1_BITS; + + /* Final output stage */ + + wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*1] = (int) (tmp11 + tmp1); + wsptr[6*4] = (int) (tmp11 - tmp1); + wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 6 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 <<= CONST_BITS; + tmp2 = (INT32) wsptr[4]; + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = tmp0 - tmp10 - tmp10; + tmp10 = (INT32) wsptr[2]; + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); + tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); + tmp1 = (z1 - z2 - z3) << CONST_BITS; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 6; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 5x5 output block. + * + * Optimized algorithm with 5 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/10). + */ + +GLOBAL(void) +jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp10, tmp11, tmp12; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[5*5]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp12 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= z2 << 2; + + /* Odd part */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 5 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp12 <<= CONST_BITS; + tmp0 = (INT32) wsptr[2]; + tmp1 = (INT32) wsptr[4]; + z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= z2 << 2; + + /* Odd part */ + + z2 = (INT32) wsptr[1]; + z3 = (INT32) wsptr[3]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 5; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 4x4 output block. + * + * Optimized algorithm with 3 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. + */ + +GLOBAL(void) +jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp2, tmp10, tmp12; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[4*4]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + + tmp10 = (tmp0 + tmp2) << PASS1_BITS; + tmp12 = (tmp0 - tmp2) << PASS1_BITS; + + /* Odd part */ + /* Same rotation as in the even part of the 8x8 LL&M IDCT */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS); + tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS); + + /* Final output stage */ + + wsptr[4*0] = (int) (tmp10 + tmp0); + wsptr[4*3] = (int) (tmp10 - tmp0); + wsptr[4*1] = (int) (tmp12 + tmp2); + wsptr[4*2] = (int) (tmp12 - tmp2); + } + + /* Pass 2: process 4 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 4; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp2 = (INT32) wsptr[2]; + + tmp10 = (tmp0 + tmp2) << CONST_BITS; + tmp12 = (tmp0 - tmp2) << CONST_BITS; + + /* Odd part */ + /* Same rotation as in the even part of the 8x8 LL&M IDCT */ + + z2 = (INT32) wsptr[1]; + z3 = (INT32) wsptr[3]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 4; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 3x3 output block. + * + * Optimized algorithm with 2 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/6). + */ + +GLOBAL(void) +jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp2, tmp10, tmp12; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[3*3]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 3 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 <<= CONST_BITS; + tmp2 = (INT32) wsptr[2]; + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = (INT32) wsptr[1]; + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 3; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 2x2 output block. + * + * Multiplication-less algorithm. + */ + +GLOBAL(void) +jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; + ISLOW_MULT_TYPE * quantptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + SHIFT_TEMPS + + /* Pass 1: process columns from input. */ + + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + + /* Column 0 */ + tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); + /* Add fudge factor here for final descale. */ + tmp4 += ONE << 2; + + tmp0 = tmp4 + tmp5; + tmp2 = tmp4 - tmp5; + + /* Column 1 */ + tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]); + tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]); + + tmp1 = tmp4 + tmp5; + tmp3 = tmp4 - tmp5; + + /* Pass 2: process 2 rows, store into output array. */ + + /* Row 0 */ + outptr = output_buf[0] + output_col; + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; + + /* Row 1 */ + outptr = output_buf[1] + output_col; + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 1x1 output block. + * + * We hardly need an inverse DCT routine for this: just take the + * average pixel value, which is one-eighth of the DC coefficient. + */ + +GLOBAL(void) +jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + int dcval; + ISLOW_MULT_TYPE * quantptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + SHIFT_TEMPS + + /* 1x1 is trivial: just take the DC coefficient divided by 8. */ + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + dcval = DEQUANTIZE(coef_block[0], quantptr[0]); + dcval = (int) DESCALE((INT32) dcval, 3); + + output_buf[0][output_col] = range_limit[dcval & RANGE_MASK]; +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 9x9 output block. + * + * Optimized algorithm with 10 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/18). + */ + +GLOBAL(void) +jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*9]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ + tmp1 = tmp0 + tmp3; + tmp2 = tmp0 - tmp3 - tmp3; + + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ + tmp11 = tmp2 + tmp0; + tmp14 = tmp2 - tmp0 - tmp0; + + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ + + tmp10 = tmp1 + tmp0 - tmp3; + tmp12 = tmp1 - tmp0 + tmp2; + tmp13 = tmp1 - tmp2 + tmp3; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ + tmp0 = tmp2 + tmp3 - z2; + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ + tmp2 += z2 - tmp1; + tmp3 += z2 + tmp1; + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 9 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 9; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 <<= CONST_BITS; + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[4]; + z3 = (INT32) wsptr[6]; + + tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ + tmp1 = tmp0 + tmp3; + tmp2 = tmp0 - tmp3 - tmp3; + + tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ + tmp11 = tmp2 + tmp0; + tmp14 = tmp2 - tmp0 - tmp0; + + tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ + tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ + tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ + + tmp10 = tmp1 + tmp0 - tmp3; + tmp12 = tmp1 - tmp0 + tmp2; + tmp13 = tmp1 - tmp2 + tmp3; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + + z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ + + tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ + tmp0 = tmp2 + tmp3 - z2; + tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ + tmp2 += z2 - tmp1; + tmp3 += z2 + tmp1; + tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 10x10 output block. + * + * Optimized algorithm with 12 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/20). + */ + +GLOBAL(void) +jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24; + INT32 z1, z2, z3, z4, z5; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*10]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ + CONST_BITS-PASS1_BITS); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + z5 = z3 << CONST_BITS; + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z5 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); + + tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) (tmp22 + tmp12); + wsptr[8*7] = (int) (tmp22 - tmp12); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 10 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 10; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 <<= CONST_BITS; + z4 = (INT32) wsptr[4]; + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ + + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z3 <<= CONST_BITS; + z4 = (INT32) wsptr[7]; + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z3 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); + + tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 11x11 output block. + * + * Optimized algorithm with 24 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/22). + */ + +GLOBAL(void) +jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*11]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp10 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ + z4 = z1 + z3; + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + z4 -= z2; + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ + tmp21 = tmp20 + tmp23 + tmp25 - + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ + tmp24 += tmp25; + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z1 + z2; + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + tmp11 += z1; + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 11 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 11; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp10 <<= CONST_BITS; + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[4]; + z3 = (INT32) wsptr[6]; + + tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ + tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ + z4 = z1 + z3; + tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ + z4 -= z2; + tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ + tmp21 = tmp20 + tmp23 + tmp25 - + MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ + tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ + tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ + tmp24 += tmp25; + tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ + tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ + MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ + tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + + tmp11 = z1 + z2; + tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ + tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ + tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ + z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ + tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ + tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ + z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ + tmp11 += z1; + tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ + tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ + MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ + MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 12x12 output block. + * + * Optimized algorithm with 15 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/24). + */ + +GLOBAL(void) +jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*12]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 <<= CONST_BITS; + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 <<= CONST_BITS; + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 12 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 12; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 <<= CONST_BITS; + + z4 = (INT32) wsptr[4]; + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = (INT32) wsptr[2]; + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 <<= CONST_BITS; + z2 = (INT32) wsptr[6]; + z2 <<= CONST_BITS; + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 13x13 output block. + * + * Optimized algorithm with 29 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/26). + */ + +GLOBAL(void) +jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*13]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ + + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ + + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ + + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ + tmp15 = z1 + z4; + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp11 += tmp14; + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp12 += tmp14; + tmp13 += tmp14; + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ + tmp14 += z1; + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 13 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 13; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 <<= CONST_BITS; + + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[4]; + z4 = (INT32) wsptr[6]; + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ + + tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ + tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ + + tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ + tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ + + tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ + tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ + + tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ + tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ + + tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + + tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ + tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ + tmp15 = z1 + z4; + tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ + tmp10 = tmp11 + tmp12 + tmp13 - + MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ + tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ + tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ + tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ + tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ + tmp11 += tmp14; + tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ + tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ + tmp12 += tmp14; + tmp13 += tmp14; + tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ + tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ + MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ + z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ + tmp14 += z1; + tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ + MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 14x14 output block. + * + * Optimized algorithm with 20 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/28). + */ + +GLOBAL(void) +jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*14]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ + CONST_BITS-PASS1_BITS); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp13 = z4 << CONST_BITS; + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ + tmp16 += tmp15; + z1 += z4; + z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ + tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = (z1 - z3) << PASS1_BITS; + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) (tmp23 + tmp13); + wsptr[8*10] = (int) (tmp23 - tmp13); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 14 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 14; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 <<= CONST_BITS; + z4 = (INT32) wsptr[4]; + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[6]; + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + z4 <<= CONST_BITS; + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ + tmp16 += tmp15; + tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ + tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = ((z1 - z3) << CONST_BITS) + z4; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 15x15 output block. + * + * Optimized algorithm with 22 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/30). + */ + +GLOBAL(void) +jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*15]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ + + tmp12 = z1 - tmp10; + tmp13 = z1 + tmp11; + z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ + + z4 = z2 - z3; + z3 += z2; + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ + + tmp20 = tmp13 + tmp10 + tmp11; + tmp23 = tmp12 - tmp10 + tmp11 + z2; + + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ + + tmp25 = tmp13 - tmp10 - tmp11; + tmp26 = tmp12 + tmp10 - tmp11 - z2; + + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ + + tmp21 = tmp12 + tmp10 + tmp11; + tmp24 = tmp13 - tmp10 + tmp11; + tmp11 += tmp11; + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp13 = z2 - z4; + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ + + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + z2 = z1 - z4; + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ + + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 15 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 15; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 <<= CONST_BITS; + + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[4]; + z4 = (INT32) wsptr[6]; + + tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ + tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ + + tmp12 = z1 - tmp10; + tmp13 = z1 + tmp11; + z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ + + z4 = z2 - z3; + z3 += z2; + tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ + z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ + + tmp20 = tmp13 + tmp10 + tmp11; + tmp23 = tmp12 - tmp10 + tmp11 + z2; + + tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ + + tmp25 = tmp13 - tmp10 - tmp11; + tmp26 = tmp12 + tmp10 - tmp11 - z2; + + tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ + tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ + + tmp21 = tmp12 + tmp10 + tmp11; + tmp24 = tmp13 - tmp10 + tmp11; + tmp11 += tmp11; + tmp22 = z1 + tmp11; /* c10 = c6-c12 */ + tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z4 = (INT32) wsptr[5]; + z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ + z4 = (INT32) wsptr[7]; + + tmp13 = z2 - z4; + tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ + tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ + tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ + + tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ + tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ + z2 = z1 - z4; + tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ + + tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ + tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ + tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ + z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ + tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ + tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 16x16 output block. + * + * Optimized algorithm with 28 multiplications in the 1-D kernel. + * cK represents sqrt(2) * cos(K*pi/32). + */ + +GLOBAL(void) +jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*16]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 16 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 16; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 <<= CONST_BITS; + + z1 = (INT32) wsptr[4]; + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[6]; + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 16x8 output block. + * + * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*8]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = DCTSIZE; ctr > 0; ctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any column in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * column DCT calculations can be simplified this way. + */ + + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { + /* AC terms all zero */ + int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; + + wsptr[DCTSIZE*0] = dcval; + wsptr[DCTSIZE*1] = dcval; + wsptr[DCTSIZE*2] = dcval; + wsptr[DCTSIZE*3] = dcval; + wsptr[DCTSIZE*4] = dcval; + wsptr[DCTSIZE*5] = dcval; + wsptr[DCTSIZE*6] = dcval; + wsptr[DCTSIZE*7] = dcval; + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); + + z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 <<= CONST_BITS; + z3 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z2 += ONE << (CONST_BITS-PASS1_BITS-1); + + tmp0 = z2 + z3; + tmp1 = z2 - z3; + + tmp10 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + tmp11 = tmp1 + tmp3; + tmp12 = tmp1 - tmp3; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + + z2 = tmp0 + tmp2; + z3 = tmp1 + tmp3; + + z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ + z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z2 += z1; + z3 += z1; + + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + tmp0 += z1 + z2; + tmp3 += z1 + z3; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp1 += z1 + z3; + tmp2 += z1 + z2; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + } + + /* Pass 2: process 8 rows from work array, store into output array. + * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). + */ + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 <<= CONST_BITS; + + z1 = (INT32) wsptr[4]; + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[6]; + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 14x7 output block. + * + * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*7]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp23 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp23 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp10 = z1 + z3; + z2 -= tmp10; + tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ + tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + + tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp10 = tmp11 - tmp12; + tmp11 += tmp12; + tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp11 += tmp12; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp10 += z2; + tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 7 rows from work array, store into output array. + * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). + */ + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z1 <<= CONST_BITS; + z4 = (INT32) wsptr[4]; + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[6]; + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + z4 <<= CONST_BITS; + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ + tmp16 += tmp15; + tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ + tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = ((z1 - z3) << CONST_BITS) + z4; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 12x6 output block. + * + * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*6]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp10 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ + tmp11 = tmp10 + tmp20; + tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS); + tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ + tmp20 = tmp11 + tmp10; + tmp22 = tmp11 - tmp10; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); + tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); + tmp11 = (z1 - z2 - z3) << PASS1_BITS; + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) (tmp21 + tmp11); + wsptr[8*4] = (int) (tmp21 - tmp11); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 6 rows from work array, store into output array. + * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). + */ + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 <<= CONST_BITS; + + z4 = (INT32) wsptr[4]; + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = (INT32) wsptr[2]; + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 <<= CONST_BITS; + z2 = (INT32) wsptr[6]; + z2 <<= CONST_BITS; + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z4 = (INT32) wsptr[7]; + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 10x5 output block. + * + * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*5]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp12 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= z2 << 2; + + /* Odd part */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 5 rows from work array, store into output array. + * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). + */ + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 <<= CONST_BITS; + z4 = (INT32) wsptr[4]; + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ + + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + z3 <<= CONST_BITS; + z4 = (INT32) wsptr[7]; + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z3 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); + + tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 8; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 8x4 output block. + * + * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*4]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + + tmp10 = (tmp0 + tmp2) << PASS1_BITS; + tmp12 = (tmp0 - tmp2) << PASS1_BITS; + + /* Odd part */ + /* Same rotation as in the even part of the 8x8 LL&M IDCT */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ + CONST_BITS-PASS1_BITS); + tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ + CONST_BITS-PASS1_BITS); + + /* Final output stage */ + + wsptr[8*0] = (int) (tmp10 + tmp0); + wsptr[8*3] = (int) (tmp10 - tmp0); + wsptr[8*1] = (int) (tmp12 + tmp2); + wsptr[8*2] = (int) (tmp12 - tmp2); + } + + /* Pass 2: process rows from work array, store into output array. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + wsptr = workspace; + for (ctr = 0; ctr < 4; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); + + /* Add fudge factor here for final descale. */ + z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = (INT32) wsptr[4]; + + tmp0 = (z2 + z3) << CONST_BITS; + tmp1 = (z2 - z3) << CONST_BITS; + + tmp10 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + tmp11 = tmp1 + tmp3; + tmp12 = tmp1 - tmp3; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = (INT32) wsptr[7]; + tmp1 = (INT32) wsptr[5]; + tmp2 = (INT32) wsptr[3]; + tmp3 = (INT32) wsptr[1]; + + z2 = tmp0 + tmp2; + z3 = tmp1 + tmp3; + + z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ + z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z2 += z1; + z3 += z1; + + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + tmp0 += z1 + z2; + tmp3 += z1 + z3; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp1 += z1 + z3; + tmp2 += z1 + z2; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 6x3 output block. + * + * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[6*3]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 3 rows from work array, store into output array. + * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). + */ + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 <<= CONST_BITS; + tmp2 = (INT32) wsptr[4]; + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = tmp0 - tmp10 - tmp10; + tmp10 = (INT32) wsptr[2]; + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); + tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); + tmp1 = (z1 - z2 - z3) << CONST_BITS; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 6; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 4x2 output block. + * + * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp2, tmp10, tmp12; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + INT32 * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + INT32 workspace[4*2]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + + /* Odd part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + + /* Final output stage */ + + wsptr[4*0] = tmp10 + tmp0; + wsptr[4*1] = tmp10 - tmp0; + } + + /* Pass 2: process 2 rows from work array, store into output array. + * 4-point IDCT kernel, + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. + */ + wsptr = workspace; + for (ctr = 0; ctr < 2; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = wsptr[0] + (ONE << 2); + tmp2 = wsptr[2]; + + tmp10 = (tmp0 + tmp2) << CONST_BITS; + tmp12 = (tmp0 - tmp2) << CONST_BITS; + + /* Odd part */ + /* Same rotation as in the even part of the 8x8 LL&M IDCT */ + + z2 = wsptr[1]; + z3 = wsptr[3]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+3) + & RANGE_MASK]; + + wsptr += 4; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 2x1 output block. + * + * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp10; + ISLOW_MULT_TYPE * quantptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + SHIFT_TEMPS + + /* Pass 1: empty. */ + + /* Pass 2: process 1 row from input, store into output array. */ + + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + outptr = output_buf[0] + output_col; + + /* Even part */ + + tmp10 = DEQUANTIZE(coef_block[0], quantptr[0]); + /* Add fudge factor here for final descale. */ + tmp10 += ONE << 2; + + /* Odd part */ + + tmp0 = DEQUANTIZE(coef_block[1], quantptr[1]); + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) & RANGE_MASK]; +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 8x16 output block. + * + * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[8*16]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + + z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ + tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ + + tmp10 = tmp0 + tmp1; + tmp11 = tmp0 - tmp1; + tmp12 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z3 = z1 - z2; + z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ + z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ + + tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ + tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ + tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ + tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ + + tmp20 = tmp10 + tmp0; + tmp27 = tmp10 - tmp0; + tmp21 = tmp12 + tmp1; + tmp26 = tmp12 - tmp1; + tmp22 = tmp13 + tmp2; + tmp25 = tmp13 - tmp2; + tmp23 = tmp11 + tmp3; + tmp24 = tmp11 - tmp3; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z1 + z3; + + tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ + tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ + tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ + tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ + tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ + tmp0 = tmp1 + tmp2 + tmp3 - + MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ + tmp13 = tmp10 + tmp11 + tmp12 - + MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ + z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ + tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ + tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ + z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ + tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ + tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ + z2 += z4; + z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ + tmp1 += z1; + tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ + z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ + tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ + tmp12 += z2; + z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ + tmp2 += z2; + tmp3 += z2; + z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ + tmp10 += z2; + tmp11 += z2; + + /* Final output stage */ + + wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process rows from work array, store into output array. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + wsptr = workspace; + for (ctr = 0; ctr < 16; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = (INT32) wsptr[2]; + z3 = (INT32) wsptr[6]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); + + /* Add fudge factor here for final descale. */ + z2 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + z3 = (INT32) wsptr[4]; + + tmp0 = (z2 + z3) << CONST_BITS; + tmp1 = (z2 - z3) << CONST_BITS; + + tmp10 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + tmp11 = tmp1 + tmp3; + tmp12 = tmp1 - tmp3; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = (INT32) wsptr[7]; + tmp1 = (INT32) wsptr[5]; + tmp2 = (INT32) wsptr[3]; + tmp3 = (INT32) wsptr[1]; + + z2 = tmp0 + tmp2; + z3 = tmp1 + tmp3; + + z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ + z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z2 += z1; + z3 += z1; + + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + tmp0 += z1 + z2; + tmp3 += z1 + z3; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp1 += z1 + z3; + tmp2 += z1 + z2; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += DCTSIZE; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 7x14 output block. + * + * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[7*14]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z1 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z1 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ + z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ + z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ + + tmp10 = z1 + z2; + tmp11 = z1 + z3; + tmp12 = z1 - z4; + + tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ + CONST_BITS-PASS1_BITS); + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ + + tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ + tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ + tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ + MULTIPLY(z2, FIX(1.378756276)); /* c2 */ + + tmp20 = tmp10 + tmp13; + tmp26 = tmp10 - tmp13; + tmp21 = tmp11 + tmp14; + tmp25 = tmp11 - tmp14; + tmp22 = tmp12 + tmp15; + tmp24 = tmp12 - tmp15; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp13 = z4 << CONST_BITS; + + tmp14 = z1 + z3; + tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ + tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ + tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ + tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ + tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ + z1 -= z2; + tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ + tmp16 += tmp15; + z1 += z4; + z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ + tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ + tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ + z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ + tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ + tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ + + tmp13 = (z1 - z3) << PASS1_BITS; + + /* Final output stage */ + + wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[7*3] = (int) (tmp23 + tmp13); + wsptr[7*10] = (int) (tmp23 - tmp13); + wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); + wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 14 rows from work array, store into output array. + * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). + */ + wsptr = workspace; + for (ctr = 0; ctr < 14; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp23 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp23 <<= CONST_BITS; + + z1 = (INT32) wsptr[2]; + z2 = (INT32) wsptr[4]; + z3 = (INT32) wsptr[6]; + + tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ + tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ + tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ + tmp10 = z1 + z3; + z2 -= tmp10; + tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ + tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ + tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ + tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + + tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ + tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ + tmp10 = tmp11 - tmp12; + tmp11 += tmp12; + tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ + tmp11 += tmp12; + z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ + tmp10 += z2; + tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 7; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 6x12 output block. + * + * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; + INT32 z1, z2, z3, z4; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[6*12]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ + + tmp10 = z3 + z4; + tmp11 = z3 - z4; + + z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ + z1 <<= CONST_BITS; + z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + z2 <<= CONST_BITS; + + tmp12 = z1 - z2; + + tmp21 = z3 + tmp12; + tmp24 = z3 - tmp12; + + tmp12 = z4 + z2; + + tmp20 = tmp10 + tmp12; + tmp25 = tmp10 - tmp12; + + tmp12 = z4 - z1 - z2; + + tmp22 = tmp11 + tmp12; + tmp23 = tmp11 - tmp12; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ + tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ + + tmp10 = z1 + z3; + tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ + tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ + tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ + tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ + tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ + tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ + tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ + MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ + + z1 -= z4; + z2 -= z3; + z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ + tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ + tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ + + /* Final output stage */ + + wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); + wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); + wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); + wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 12 rows from work array, store into output array. + * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). + */ + wsptr = workspace; + for (ctr = 0; ctr < 12; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp10 <<= CONST_BITS; + tmp12 = (INT32) wsptr[4]; + tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ + tmp11 = tmp10 + tmp20; + tmp21 = tmp10 - tmp20 - tmp20; + tmp20 = (INT32) wsptr[2]; + tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ + tmp20 = tmp11 + tmp10; + tmp22 = tmp11 - tmp10; + + /* Odd part */ + + z1 = (INT32) wsptr[1]; + z2 = (INT32) wsptr[3]; + z3 = (INT32) wsptr[5]; + tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); + tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); + tmp11 = (z1 - z2 - z3) << CONST_BITS; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 6; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 5x10 output block. + * + * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp10, tmp11, tmp12, tmp13, tmp14; + INT32 tmp20, tmp21, tmp22, tmp23, tmp24; + INT32 z1, z2, z3, z4, z5; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[5*10]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z3 += ONE << (CONST_BITS-PASS1_BITS-1); + z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ + z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ + tmp10 = z3 + z1; + tmp11 = z3 - z2; + + tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ + CONST_BITS-PASS1_BITS); + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ + tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ + tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ + + tmp20 = tmp10 + tmp12; + tmp24 = tmp10 - tmp12; + tmp21 = tmp11 + tmp13; + tmp23 = tmp11 - tmp13; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + + tmp11 = z2 + z4; + tmp13 = z2 - z4; + + tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ + z5 = z3 << CONST_BITS; + + z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ + z4 = z5 + tmp12; + + tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ + tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ + + z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ + z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); + + tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; + + tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ + tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ + + /* Final output stage */ + + wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); + wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); + wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); + wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); + wsptr[5*2] = (int) (tmp22 + tmp12); + wsptr[5*7] = (int) (tmp22 - tmp12); + wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); + wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); + wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); + wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 10 rows from work array, store into output array. + * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). + */ + wsptr = workspace; + for (ctr = 0; ctr < 10; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp12 <<= CONST_BITS; + tmp13 = (INT32) wsptr[2]; + tmp14 = (INT32) wsptr[4]; + z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ + z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ + z3 = tmp12 + z2; + tmp10 = z3 + z1; + tmp11 = z3 - z1; + tmp12 -= z2 << 2; + + /* Odd part */ + + z2 = (INT32) wsptr[1]; + z3 = (INT32) wsptr[3]; + + z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ + tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ + tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 5; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 4x8 output block. + * + * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp3; + INT32 tmp10, tmp11, tmp12, tmp13; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[4*8]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 4; ctr > 0; ctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any column in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * column DCT calculations can be simplified this way. + */ + + if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && + inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && + inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && + inptr[DCTSIZE*7] == 0) { + /* AC terms all zero */ + int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; + + wsptr[4*0] = dcval; + wsptr[4*1] = dcval; + wsptr[4*2] = dcval; + wsptr[4*3] = dcval; + wsptr[4*4] = dcval; + wsptr[4*5] = dcval; + wsptr[4*6] = dcval; + wsptr[4*7] = dcval; + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); + tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); + + z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + z2 <<= CONST_BITS; + z3 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + z2 += ONE << (CONST_BITS-PASS1_BITS-1); + + tmp0 = z2 + z3; + tmp1 = z2 - z3; + + tmp10 = tmp0 + tmp2; + tmp13 = tmp0 - tmp2; + tmp11 = tmp1 + tmp3; + tmp12 = tmp1 - tmp3; + + /* Odd part per figure 8; the matrix is unitary and hence its + * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. + */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); + tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + + z2 = tmp0 + tmp2; + z3 = tmp1 + tmp3; + + z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* sqrt(2) * c3 */ + z2 = MULTIPLY(z2, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ + z3 = MULTIPLY(z3, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ + z2 += z1; + z3 += z1; + + z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ + tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ + tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ + tmp0 += z1 + z2; + tmp3 += z1 + z3; + + z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ + tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ + tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ + tmp1 += z1 + z3; + tmp2 += z1 + z2; + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); + wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); + wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); + wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); + wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); + wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); + + inptr++; /* advance pointers to next column */ + quantptr++; + wsptr++; + } + + /* Pass 2: process 8 rows from work array, store into output array. + * 4-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). + */ + wsptr = workspace; + for (ctr = 0; ctr < 8; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp2 = (INT32) wsptr[2]; + + tmp10 = (tmp0 + tmp2) << CONST_BITS; + tmp12 = (tmp0 - tmp2) << CONST_BITS; + + /* Odd part */ + /* Same rotation as in the even part of the 8x8 LL&M IDCT */ + + z2 = (INT32) wsptr[1]; + z3 = (INT32) wsptr[3]; + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 4; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a reduced-size 3x6 output block. + * + * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + int * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + int workspace[3*6]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp0 <<= CONST_BITS; + /* Add fudge factor here for final descale. */ + tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); + tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ + tmp1 = tmp0 + tmp10; + tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); + tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ + tmp10 = tmp1 + tmp0; + tmp12 = tmp1 - tmp0; + + /* Odd part */ + + z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); + tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ + tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); + tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); + tmp1 = (z1 - z2 - z3) << PASS1_BITS; + + /* Final output stage */ + + wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); + wsptr[3*1] = (int) (tmp11 + tmp1); + wsptr[3*4] = (int) (tmp11 - tmp1); + wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); + wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); + } + + /* Pass 2: process 6 rows from work array, store into output array. + * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). + */ + wsptr = workspace; + for (ctr = 0; ctr < 6; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); + tmp0 <<= CONST_BITS; + tmp2 = (INT32) wsptr[2]; + tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ + tmp10 = tmp0 + tmp12; + tmp2 = tmp0 - tmp12 - tmp12; + + /* Odd part */ + + tmp12 = (INT32) wsptr[1]; + tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, + CONST_BITS+PASS1_BITS+3) + & RANGE_MASK]; + + wsptr += 3; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 2x4 output block. + * + * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp2, tmp10, tmp12; + INT32 z1, z2, z3; + JCOEFPTR inptr; + ISLOW_MULT_TYPE * quantptr; + INT32 * wsptr; + JSAMPROW outptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + int ctr; + INT32 workspace[2*4]; /* buffers data between passes */ + SHIFT_TEMPS + + /* Pass 1: process columns from input, store into work array. + * 4-point IDCT kernel, + * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. + */ + inptr = coef_block; + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + wsptr = workspace; + for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) { + /* Even part */ + + tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); + tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); + + tmp10 = (tmp0 + tmp2) << CONST_BITS; + tmp12 = (tmp0 - tmp2) << CONST_BITS; + + /* Odd part */ + /* Same rotation as in the even part of the 8x8 LL&M IDCT */ + + z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); + z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); + + z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ + tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ + tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ + + /* Final output stage */ + + wsptr[2*0] = tmp10 + tmp0; + wsptr[2*3] = tmp10 - tmp0; + wsptr[2*1] = tmp12 + tmp2; + wsptr[2*2] = tmp12 - tmp2; + } + + /* Pass 2: process 4 rows from work array, store into output array. */ + + wsptr = workspace; + for (ctr = 0; ctr < 4; ctr++) { + outptr = output_buf[ctr] + output_col; + + /* Even part */ + + /* Add fudge factor here for final descale. */ + tmp10 = wsptr[0] + (ONE << (CONST_BITS+2)); + + /* Odd part */ + + tmp0 = wsptr[1]; + + /* Final output stage */ + + outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3) + & RANGE_MASK]; + outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3) + & RANGE_MASK]; + + wsptr += 2; /* advance pointer to next row */ + } +} + + +/* + * Perform dequantization and inverse DCT on one block of coefficients, + * producing a 1x2 output block. + * + * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows). + */ + +GLOBAL(void) +jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, + JCOEFPTR coef_block, + JSAMPARRAY output_buf, JDIMENSION output_col) +{ + INT32 tmp0, tmp10; + ISLOW_MULT_TYPE * quantptr; + JSAMPLE *range_limit = IDCT_range_limit(cinfo); + SHIFT_TEMPS + + /* Process 1 column from input, store into output array. */ + + quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; + + /* Even part */ + + tmp10 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); + /* Add fudge factor here for final descale. */ + tmp10 += ONE << 2; + + /* Odd part */ + + tmp0 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); + + /* Final output stage */ + + output_buf[0][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 3) + & RANGE_MASK]; + output_buf[1][output_col] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 3) + & RANGE_MASK]; +} + +#endif /* IDCT_SCALING_SUPPORTED */ +#endif /* DCT_ISLOW_SUPPORTED */ diff --git a/code/jpeg-6b/jinclude.h b/code/jpeg-8c/jinclude.h similarity index 70% rename from code/jpeg-6b/jinclude.h rename to code/jpeg-8c/jinclude.h index 33d2cd5b..0a4f1514 100644 --- a/code/jpeg-6b/jinclude.h +++ b/code/jpeg-8c/jinclude.h @@ -14,38 +14,10 @@ * JPEG library. Most applications need only include jpeglib.h. */ -#ifdef __WIN64__ -#include "basetsd.h" -#endif - -#ifdef _MSC_VER - -#pragma warning(disable : 4018) // signed/unsigned mismatch -#pragma warning(disable : 4032) -#pragma warning(disable : 4051) -#pragma warning(disable : 4057) // slightly different base types -#pragma warning(disable : 4100) // unreferenced formal parameter -#pragma warning(disable : 4115) -#pragma warning(disable : 4125) // decimal digit terminates octal escape sequence -#pragma warning(disable : 4127) // conditional expression is constant -#pragma warning(disable : 4136) -#pragma warning(disable : 4152) // nonstandard extension, function/data pointer conversion in expression -#pragma warning(disable : 4201) -#pragma warning(disable : 4214) -#pragma warning(disable : 4244) -#pragma warning(disable : 4305) // truncation from const double to float -#pragma warning(disable : 4310) // cast truncates constant value -#pragma warning(disable: 4505) // unreferenced local function has been removed -#pragma warning(disable : 4514) -#pragma warning(disable : 4702) // unreachable code -#pragma warning(disable : 4711) // selected for automatic inline expansion -#pragma warning(disable : 4220) // varargs matches remaining parameters -#pragma warning(disable : 4761) // integral size mismatch -#endif /* Include auto-config file to find out which system include files we need. */ -#include "../jpeg-6b/jconfig.h" /* auto configuration options */ +#include "jconfig.h" /* auto configuration options */ #define JCONFIG_INCLUDED /* so that jpeglib.h doesn't do it again */ /* diff --git a/code/jpeg-6b/jmemmgr.c b/code/jpeg-8c/jmemmgr.c similarity index 100% rename from code/jpeg-6b/jmemmgr.c rename to code/jpeg-8c/jmemmgr.c diff --git a/code/jpeg-6b/jmemnobs.c b/code/jpeg-8c/jmemnobs.c similarity index 87% rename from code/jpeg-6b/jmemnobs.c rename to code/jpeg-8c/jmemnobs.c index cbd60814..eb8c3377 100644 --- a/code/jpeg-6b/jmemnobs.c +++ b/code/jpeg-8c/jmemnobs.c @@ -8,35 +8,39 @@ * This file provides a really simple implementation of the system- * dependent portion of the JPEG memory manager. This implementation * assumes that no backing-store files are needed: all required space - * can be obtained from ri.Malloc(). + * can be obtained from malloc(). * This is very portable in the sense that it'll compile on almost anything, * but you'd better have lots of main memory (or virtual memory) if you want * to process big images. * Note that the max_memory_to_use option is ignored by this implementation. */ -#include "../renderer/tr_local.h" - #define JPEG_INTERNALS #include "jinclude.h" #include "jpeglib.h" #include "jmemsys.h" /* import the system-dependent declarations */ +#ifndef HAVE_STDLIB_H /* should declare malloc(),free() */ +extern void * malloc JPP((size_t size)); +extern void free JPP((void *ptr)); +#endif + + /* * Memory allocation and freeing are controlled by the regular library - * routines ri.Malloc() and ri.Free(). + * routines malloc() and free(). */ GLOBAL(void *) jpeg_get_small (j_common_ptr cinfo, size_t sizeofobject) { - return (void *) ri.Malloc(sizeofobject); + return (void *) malloc(sizeofobject); } GLOBAL(void) jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) { - ri.Free(object); + free(object); } @@ -50,13 +54,13 @@ jpeg_free_small (j_common_ptr cinfo, void * object, size_t sizeofobject) GLOBAL(void FAR *) jpeg_get_large (j_common_ptr cinfo, size_t sizeofobject) { - return (void FAR *) ri.Malloc(sizeofobject); + return (void FAR *) malloc(sizeofobject); } GLOBAL(void) jpeg_free_large (j_common_ptr cinfo, void FAR * object, size_t sizeofobject) { - ri.Free(object); + free(object); } diff --git a/code/jpeg-6b/jmemsys.h b/code/jpeg-8c/jmemsys.h similarity index 100% rename from code/jpeg-6b/jmemsys.h rename to code/jpeg-8c/jmemsys.h diff --git a/code/jpeg-6b/jmorecfg.h b/code/jpeg-8c/jmorecfg.h similarity index 89% rename from code/jpeg-6b/jmorecfg.h rename to code/jpeg-8c/jmorecfg.h index d677127d..928d052c 100644 --- a/code/jpeg-6b/jmorecfg.h +++ b/code/jpeg-8c/jmorecfg.h @@ -2,6 +2,7 @@ * jmorecfg.h * * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 1997-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -157,12 +158,15 @@ typedef short INT16; /* INT32 must hold at least signed 32-bit values. */ -/* MinGW basetsd.h defines INT32 - don't redefine it */ -#ifndef __WIN64 -#if !(defined __MINGW32__ && (defined _BASETSD_H || defined _BASETSD_H_)) +#ifndef XMD_H /* X11/xmd.h correctly defines INT32 */ +#ifndef _BASETSD_H_ /* Microsoft defines it in basetsd.h */ +#ifndef _BASETSD_H /* MinGW is slightly different */ +#ifndef QGLOBAL_H /* Qt defines it in qglobal.h */ typedef long INT32; #endif #endif +#endif +#endif /* Datatype used for image dimensions. The JPEG standard only supports * images up to 64K*64K due to 16-bit fields in SOF markers. Therefore @@ -212,13 +216,13 @@ typedef unsigned int JDIMENSION; * explicit coding is needed; see uses of the NEED_FAR_POINTERS symbol. */ +#ifndef FAR #ifdef NEED_FAR_POINTERS -#undef FAR #define FAR far #else -#undef FAR #define FAR #endif +#endif /* @@ -228,7 +232,9 @@ typedef unsigned int JDIMENSION; * Defining HAVE_BOOLEAN before including jpeglib.h should make it work. */ -typedef unsigned char boolean; +#ifndef HAVE_BOOLEAN +typedef int boolean; +#endif #ifndef FALSE /* in case these macros already exist */ #define FALSE 0 /* values of boolean */ #endif @@ -259,19 +265,18 @@ typedef unsigned char boolean; * (You may HAVE to do that if your compiler doesn't like null source files.) */ -/* Arithmetic coding is unsupported for legal reasons. Complaints to IBM. */ - /* Capability options common to encoder and decoder: */ -#undef DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ -#undef DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ +#define DCT_ISLOW_SUPPORTED /* slow but accurate integer algorithm */ +#define DCT_IFAST_SUPPORTED /* faster, less accurate integer method */ #define DCT_FLOAT_SUPPORTED /* floating-point: accurate, fast on fast HW */ /* Encoder capability options: */ -#undef C_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */ +#define C_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */ #define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ #define C_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ +#define DCT_SCALING_SUPPORTED /* Input rescaling via DCT? (Requires DCT_ISLOW)*/ #define ENTROPY_OPT_SUPPORTED /* Optimization of entropy coding parms? */ /* Note: if you selected 12-bit data precision, it is dangerous to turn off * ENTROPY_OPT_SUPPORTED. The standard Huffman tables are only good for 8-bit @@ -285,16 +290,16 @@ typedef unsigned char boolean; /* Decoder capability options: */ -#undef D_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */ -#undef D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ -#undef D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ -#undef SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ -#undef BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */ -#undef IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ +#define D_ARITH_CODING_SUPPORTED /* Arithmetic coding back end? */ +#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */ +#define D_PROGRESSIVE_SUPPORTED /* Progressive JPEG? (Requires MULTISCAN)*/ +#define IDCT_SCALING_SUPPORTED /* Output rescaling via IDCT? */ +#define SAVE_MARKERS_SUPPORTED /* jpeg_save_markers() needed? */ +#define BLOCK_SMOOTHING_SUPPORTED /* Block smoothing? (Progressive only) */ #undef UPSAMPLE_SCALING_SUPPORTED /* Output rescaling at upsample stage? */ -#undef UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */ -#undef QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ -#undef QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ +#define UPSAMPLE_MERGING_SUPPORTED /* Fast path for sloppy upsampling? */ +#define QUANT_1PASS_SUPPORTED /* 1-pass color quantization? */ +#define QUANT_2PASS_SUPPORTED /* 2-pass color quantization? */ /* more capability options later, no doubt */ @@ -317,7 +322,7 @@ typedef unsigned char boolean; #define RGB_RED 0 /* Offset of Red in an RGB scanline element */ #define RGB_GREEN 1 /* Offset of Green */ #define RGB_BLUE 2 /* Offset of Blue */ -#define RGB_PIXELSIZE 4 /* JSAMPLEs per RGB scanline element */ +#define RGB_PIXELSIZE 3 /* JSAMPLEs per RGB scanline element */ /* Definitions for speed-related optimizations. */ diff --git a/code/jpeg-6b/jpegint.h b/code/jpeg-8c/jpegint.h similarity index 91% rename from code/jpeg-6b/jpegint.h rename to code/jpeg-8c/jpegint.h index 95b00d40..0c27a4e4 100644 --- a/code/jpeg-6b/jpegint.h +++ b/code/jpeg-8c/jpegint.h @@ -2,6 +2,7 @@ * jpegint.h * * Copyright (C) 1991-1997, Thomas G. Lane. + * Modified 1997-2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -99,14 +100,16 @@ struct jpeg_downsampler { }; /* Forward DCT (also controls coefficient quantization) */ +typedef JMETHOD(void, forward_DCT_ptr, + (j_compress_ptr cinfo, jpeg_component_info * compptr, + JSAMPARRAY sample_data, JBLOCKROW coef_blocks, + JDIMENSION start_row, JDIMENSION start_col, + JDIMENSION num_blocks)); + struct jpeg_forward_dct { JMETHOD(void, start_pass, (j_compress_ptr cinfo)); - /* perhaps this should be an array??? */ - JMETHOD(void, forward_DCT, (j_compress_ptr cinfo, - jpeg_component_info * compptr, - JSAMPARRAY sample_data, JBLOCKROW coef_blocks, - JDIMENSION start_row, JDIMENSION start_col, - JDIMENSION num_blocks)); + /* It is useful to allow each component to have a separate FDCT method. */ + forward_DCT_ptr forward_DCT[MAX_COMPONENTS]; }; /* Entropy encoding */ @@ -210,10 +213,6 @@ struct jpeg_entropy_decoder { JMETHOD(void, start_pass, (j_decompress_ptr cinfo)); JMETHOD(boolean, decode_mcu, (j_decompress_ptr cinfo, JBLOCKROW *MCU_data)); - - /* This is here to share code between baseline and progressive decoders; */ - /* other modules probably should not use it */ - boolean insufficient_data; /* set TRUE after emitting warning */ }; /* Inverse DCT (also performs dequantization) */ @@ -303,7 +302,7 @@ struct jpeg_color_quantizer { #define jinit_downsampler jIDownsampler #define jinit_forward_dct jIFDCT #define jinit_huff_encoder jIHEncoder -#define jinit_phuff_encoder jIPHEncoder +#define jinit_arith_encoder jIAEncoder #define jinit_marker_writer jIMWriter #define jinit_master_decompress jIDMaster #define jinit_d_main_controller jIDMainC @@ -312,7 +311,7 @@ struct jpeg_color_quantizer { #define jinit_input_controller jIInCtlr #define jinit_marker_reader jIMReader #define jinit_huff_decoder jIHDecoder -#define jinit_phuff_decoder jIPHDecoder +#define jinit_arith_decoder jIADecoder #define jinit_inverse_dct jIIDCT #define jinit_upsampler jIUpsampler #define jinit_color_deconverter jIDColor @@ -327,6 +326,13 @@ struct jpeg_color_quantizer { #define jzero_far jZeroFar #define jpeg_zigzag_order jZIGTable #define jpeg_natural_order jZAGTable +#define jpeg_natural_order7 jZAGTable7 +#define jpeg_natural_order6 jZAGTable6 +#define jpeg_natural_order5 jZAGTable5 +#define jpeg_natural_order4 jZAGTable4 +#define jpeg_natural_order3 jZAGTable3 +#define jpeg_natural_order2 jZAGTable2 +#define jpeg_aritab jAriTab #endif /* NEED_SHORT_EXTERNAL_NAMES */ @@ -344,7 +350,7 @@ EXTERN(void) jinit_color_converter JPP((j_compress_ptr cinfo)); EXTERN(void) jinit_downsampler JPP((j_compress_ptr cinfo)); EXTERN(void) jinit_forward_dct JPP((j_compress_ptr cinfo)); EXTERN(void) jinit_huff_encoder JPP((j_compress_ptr cinfo)); -EXTERN(void) jinit_phuff_encoder JPP((j_compress_ptr cinfo)); +EXTERN(void) jinit_arith_encoder JPP((j_compress_ptr cinfo)); EXTERN(void) jinit_marker_writer JPP((j_compress_ptr cinfo)); /* Decompression module initialization routines */ EXTERN(void) jinit_master_decompress JPP((j_decompress_ptr cinfo)); @@ -357,7 +363,7 @@ EXTERN(void) jinit_d_post_controller JPP((j_decompress_ptr cinfo, EXTERN(void) jinit_input_controller JPP((j_decompress_ptr cinfo)); EXTERN(void) jinit_marker_reader JPP((j_decompress_ptr cinfo)); EXTERN(void) jinit_huff_decoder JPP((j_decompress_ptr cinfo)); -EXTERN(void) jinit_phuff_decoder JPP((j_decompress_ptr cinfo)); +EXTERN(void) jinit_arith_decoder JPP((j_decompress_ptr cinfo)); EXTERN(void) jinit_inverse_dct JPP((j_decompress_ptr cinfo)); EXTERN(void) jinit_upsampler JPP((j_decompress_ptr cinfo)); EXTERN(void) jinit_color_deconverter JPP((j_decompress_ptr cinfo)); @@ -381,6 +387,15 @@ EXTERN(void) jzero_far JPP((void FAR * target, size_t bytestozero)); extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */ #endif extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */ +extern const int jpeg_natural_order7[]; /* zz to natural order for 7x7 block */ +extern const int jpeg_natural_order6[]; /* zz to natural order for 6x6 block */ +extern const int jpeg_natural_order5[]; /* zz to natural order for 5x5 block */ +extern const int jpeg_natural_order4[]; /* zz to natural order for 4x4 block */ +extern const int jpeg_natural_order3[]; /* zz to natural order for 3x3 block */ +extern const int jpeg_natural_order2[]; /* zz to natural order for 2x2 block */ + +/* Arithmetic coding probability estimation tables in jaricom.c */ +extern const INT32 jpeg_aritab[]; /* Suppress undefined-structure complaints if necessary. */ diff --git a/code/jpeg-6b/jpeglib.h b/code/jpeg-8c/jpeglib.h similarity index 92% rename from code/jpeg-6b/jpeglib.h rename to code/jpeg-8c/jpeglib.h index 8ff305d2..1eb1fac0 100644 --- a/code/jpeg-6b/jpeglib.h +++ b/code/jpeg-8c/jpeglib.h @@ -2,6 +2,7 @@ * jpeglib.h * * Copyright (C) 1991-1998, Thomas G. Lane. + * Modified 2002-2010 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -21,16 +22,24 @@ */ #ifndef JCONFIG_INCLUDED /* in case jinclude.h already did */ -#include "../jpeg-6b/jconfig.h" /* widely used configuration options */ +#include "jconfig.h" /* widely used configuration options */ #endif -#include "../jpeg-6b/jmorecfg.h" /* seldom changed options */ +#include "jmorecfg.h" /* seldom changed options */ -/* Version ID for the JPEG library. - * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60". +#ifdef __cplusplus +#ifndef DONT_USE_EXTERN_C +extern "C" { +#endif +#endif + +/* Version IDs for the JPEG library. + * Might be useful for tests like "#if JPEG_LIB_VERSION >= 80". */ -#define JPEG_LIB_VERSION 62 /* Version 6b */ +#define JPEG_LIB_VERSION 80 /* Compatibility version 8.0 */ +#define JPEG_LIB_VERSION_MAJOR 8 +#define JPEG_LIB_VERSION_MINOR 3 /* Various constants determining the sizes of things. @@ -138,18 +147,18 @@ typedef struct { */ JDIMENSION width_in_blocks; JDIMENSION height_in_blocks; - /* Size of a DCT block in samples. Always DCTSIZE for compression. - * For decompression this is the size of the output from one DCT block, - * reflecting any scaling we choose to apply during the IDCT step. - * Values of 1,2,4,8 are likely to be supported. Note that different - * components may receive different IDCT scalings. + /* Size of a DCT block in samples, + * reflecting any scaling we choose to apply during the DCT step. + * Values from 1 to 16 are supported. + * Note that different components may receive different DCT scalings. */ - int DCT_scaled_size; + int DCT_h_scaled_size; + int DCT_v_scaled_size; /* The downsampled dimensions are the component's actual, unpadded number - * of samples at the main buffer (preprocessing/compression interface), thus - * downsampled_width = ceil(image_width * Hi/Hmax) - * and similarly for height. For decompression, IDCT scaling is included, so - * downsampled_width = ceil(image_width * Hi/Hmax * DCT_scaled_size/DCTSIZE) + * of samples at the main buffer (preprocessing/compression interface); + * DCT scaling is included, so + * downsampled_width = ceil(image_width * Hi/Hmax * DCT_h_scaled_size/DCTSIZE) + * and similarly for height. */ JDIMENSION downsampled_width; /* actual width in samples */ JDIMENSION downsampled_height; /* actual height in samples */ @@ -164,7 +173,7 @@ typedef struct { int MCU_width; /* number of blocks per MCU, horizontally */ int MCU_height; /* number of blocks per MCU, vertically */ int MCU_blocks; /* MCU_width * MCU_height */ - int MCU_sample_width; /* MCU width in samples, MCU_width*DCT_scaled_size */ + int MCU_sample_width; /* MCU width in samples: MCU_width * DCT_h_scaled_size */ int last_col_width; /* # of non-dummy blocks across in last MCU */ int last_row_height; /* # of non-dummy blocks down in last MCU */ @@ -291,6 +300,17 @@ struct jpeg_compress_struct { * helper routines to simplify changing parameters. */ + unsigned int scale_num, scale_denom; /* fraction by which to scale image */ + + JDIMENSION jpeg_width; /* scaled JPEG image width */ + JDIMENSION jpeg_height; /* scaled JPEG image height */ + /* Dimensions of actual JPEG image that will be written to file, + * derived from input dimensions by scaling factors above. + * These fields are computed by jpeg_start_compress(). + * You can also use jpeg_calc_jpeg_dimensions() to determine these values + * in advance of calling jpeg_start_compress(). + */ + int data_precision; /* bits of precision in image data */ int num_components; /* # of color components in JPEG image */ @@ -298,14 +318,17 @@ struct jpeg_compress_struct { jpeg_component_info * comp_info; /* comp_info[i] describes component that appears i'th in SOF */ - + JQUANT_TBL * quant_tbl_ptrs[NUM_QUANT_TBLS]; - /* ptrs to coefficient quantization tables, or NULL if not defined */ - + int q_scale_factor[NUM_QUANT_TBLS]; + /* ptrs to coefficient quantization tables, or NULL if not defined, + * and corresponding scale factors (percentage, initialized 100). + */ + JHUFF_TBL * dc_huff_tbl_ptrs[NUM_HUFF_TBLS]; JHUFF_TBL * ac_huff_tbl_ptrs[NUM_HUFF_TBLS]; /* ptrs to Huffman coding tables, or NULL if not defined */ - + UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */ UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */ UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */ @@ -321,6 +344,7 @@ struct jpeg_compress_struct { boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ boolean optimize_coding; /* TRUE=optimize entropy encoding parms */ boolean CCIR601_sampling; /* TRUE=first samples are cosited */ + boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */ int smoothing_factor; /* 1..100, or 0 for no input smoothing */ J_DCT_METHOD dct_method; /* DCT algorithm selector */ @@ -364,6 +388,9 @@ struct jpeg_compress_struct { int max_h_samp_factor; /* largest h_samp_factor */ int max_v_samp_factor; /* largest v_samp_factor */ + int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ + int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ + JDIMENSION total_iMCU_rows; /* # of iMCU rows to be input to coef ctlr */ /* The coefficient controller receives data in units of MCU rows as defined * for fully interleaved scans (whether the JPEG file is interleaved or not). @@ -389,6 +416,10 @@ struct jpeg_compress_struct { int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ + int block_size; /* the basic DCT block size: 1..16 */ + const int * natural_order; /* natural-order position array */ + int lim_Se; /* min( Se, DCTSIZE2-1 ) */ + /* * Links to compression subobjects (methods and private variables of modules) */ @@ -535,6 +566,7 @@ struct jpeg_decompress_struct { jpeg_component_info * comp_info; /* comp_info[i] describes component that appears i'th in SOF */ + boolean is_baseline; /* TRUE if Baseline SOF0 encountered */ boolean progressive_mode; /* TRUE if SOFn specifies progressive mode */ boolean arith_code; /* TRUE=arithmetic coding, FALSE=Huffman */ @@ -575,7 +607,8 @@ struct jpeg_decompress_struct { int max_h_samp_factor; /* largest h_samp_factor */ int max_v_samp_factor; /* largest v_samp_factor */ - int min_DCT_scaled_size; /* smallest DCT_scaled_size of any component */ + int min_DCT_h_scaled_size; /* smallest DCT_h_scaled_size of any component */ + int min_DCT_v_scaled_size; /* smallest DCT_v_scaled_size of any component */ JDIMENSION total_iMCU_rows; /* # of iMCU rows in image */ /* The coefficient controller's input and output progress is measured in @@ -583,7 +616,7 @@ struct jpeg_decompress_struct { * in fully interleaved JPEG scans, but are used whether the scan is * interleaved or not. We define an iMCU row as v_samp_factor DCT block * rows of each component. Therefore, the IDCT output contains - * v_samp_factor*DCT_scaled_size sample rows of a component per iMCU row. + * v_samp_factor*DCT_v_scaled_size sample rows of a component per iMCU row. */ JSAMPLE * sample_range_limit; /* table for fast range-limiting */ @@ -607,6 +640,12 @@ struct jpeg_decompress_struct { int Ss, Se, Ah, Al; /* progressive JPEG parameters for scan */ + /* These fields are derived from Se of first SOS marker. + */ + int block_size; /* the basic DCT block size: 1..16 */ + const int * natural_order; /* natural-order position array for entropy decode */ + int lim_Se; /* min( Se, DCTSIZE2-1 ) for entropy decode */ + /* This field is shared between entropy decoder and marker parser. * It is either zero or the code of a JPEG marker that has been * read from the data source, but has not yet been processed. @@ -835,12 +874,15 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo)); #define jpeg_destroy_compress jDestCompress #define jpeg_destroy_decompress jDestDecompress #define jpeg_stdio_dest jStdDest +#define jpeg_stdio_src jStdSrc +#define jpeg_mem_dest jMemDest #define jpeg_mem_src jMemSrc #define jpeg_set_defaults jSetDefaults #define jpeg_set_colorspace jSetColorspace #define jpeg_default_colorspace jDefColorspace #define jpeg_set_quality jSetQuality #define jpeg_set_linear_quality jSetLQuality +#define jpeg_default_qtables jDefQTables #define jpeg_add_quant_table jAddQuantTable #define jpeg_quality_scaling jQualityScaling #define jpeg_simple_progression jSimProgress @@ -850,6 +892,7 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo)); #define jpeg_start_compress jStrtCompress #define jpeg_write_scanlines jWrtScanlines #define jpeg_finish_compress jFinCompress +#define jpeg_calc_jpeg_dimensions jCjpegDimensions #define jpeg_write_raw_data jWrtRawData #define jpeg_write_marker jWrtMarker #define jpeg_write_m_header jWrtMHeader @@ -866,6 +909,7 @@ typedef JMETHOD(boolean, jpeg_marker_parser_method, (j_decompress_ptr cinfo)); #define jpeg_input_complete jInComplete #define jpeg_new_colormap jNewCMap #define jpeg_consume_input jConsumeInput +#define jpeg_core_output_dimensions jCoreDimensions #define jpeg_calc_output_dimensions jCalcDimensions #define jpeg_save_markers jSaveMarkers #define jpeg_set_marker_processor jSetMarker @@ -908,7 +952,15 @@ EXTERN(void) jpeg_destroy_decompress JPP((j_decompress_ptr cinfo)); /* Standard data source and destination managers: stdio streams. */ /* Caller is responsible for opening the file before and closing after. */ EXTERN(void) jpeg_stdio_dest JPP((j_compress_ptr cinfo, FILE * outfile)); -EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo, unsigned char *inbuf, size_t size)); +EXTERN(void) jpeg_stdio_src JPP((j_decompress_ptr cinfo, FILE * infile)); + +/* Data source and destination managers: memory buffers. */ +EXTERN(void) jpeg_mem_dest JPP((j_compress_ptr cinfo, + unsigned char ** outbuffer, + unsigned long * outsize)); +EXTERN(void) jpeg_mem_src JPP((j_decompress_ptr cinfo, + unsigned char * inbuffer, + unsigned long insize)); /* Default parameter setup for compression */ EXTERN(void) jpeg_set_defaults JPP((j_compress_ptr cinfo)); @@ -921,6 +973,8 @@ EXTERN(void) jpeg_set_quality JPP((j_compress_ptr cinfo, int quality, EXTERN(void) jpeg_set_linear_quality JPP((j_compress_ptr cinfo, int scale_factor, boolean force_baseline)); +EXTERN(void) jpeg_default_qtables JPP((j_compress_ptr cinfo, + boolean force_baseline)); EXTERN(void) jpeg_add_quant_table JPP((j_compress_ptr cinfo, int which_tbl, const unsigned int *basic_table, int scale_factor, @@ -940,12 +994,15 @@ EXTERN(JDIMENSION) jpeg_write_scanlines JPP((j_compress_ptr cinfo, JDIMENSION num_lines)); EXTERN(void) jpeg_finish_compress JPP((j_compress_ptr cinfo)); +/* Precalculate JPEG dimensions for current compression parameters. */ +EXTERN(void) jpeg_calc_jpeg_dimensions JPP((j_compress_ptr cinfo)); + /* Replaces jpeg_write_scanlines when writing raw downsampled data. */ EXTERN(JDIMENSION) jpeg_write_raw_data JPP((j_compress_ptr cinfo, JSAMPIMAGE data, JDIMENSION num_lines)); -/* Write a special marker. See libjpeg.doc concerning safe usage. */ +/* Write a special marker. See libjpeg.txt concerning safe usage. */ EXTERN(void) jpeg_write_marker JPP((j_compress_ptr cinfo, int marker, const JOCTET * dataptr, unsigned int datalen)); @@ -999,6 +1056,7 @@ EXTERN(int) jpeg_consume_input JPP((j_decompress_ptr cinfo)); #define JPEG_SCAN_COMPLETED 4 /* Completed last iMCU row of a scan */ /* Precalculate output dimensions for current decompression parameters. */ +EXTERN(void) jpeg_core_output_dimensions JPP((j_decompress_ptr cinfo)); EXTERN(void) jpeg_calc_output_dimensions JPP((j_decompress_ptr cinfo)); /* Control saving of COM and APPn markers into marker_list. */ @@ -1089,8 +1147,14 @@ struct jpeg_color_quantizer { long dummy; }; */ #ifdef JPEG_INTERNALS -#include "../jpeg-6b/jpegint.h" /* fetch private declarations */ -#include "../jpeg-6b/jerror.h" /* fetch error codes too */ +#include "jpegint.h" /* fetch private declarations */ +#include "jerror.h" /* fetch error codes too */ +#endif + +#ifdef __cplusplus +#ifndef DONT_USE_EXTERN_C +} +#endif #endif #endif /* JPEGLIB_H */ diff --git a/code/jpeg-6b/jquant1.c b/code/jpeg-8c/jquant1.c similarity index 100% rename from code/jpeg-6b/jquant1.c rename to code/jpeg-8c/jquant1.c diff --git a/code/jpeg-6b/jquant2.c b/code/jpeg-8c/jquant2.c similarity index 100% rename from code/jpeg-6b/jquant2.c rename to code/jpeg-8c/jquant2.c diff --git a/code/jpeg-6b/jutils.c b/code/jpeg-8c/jutils.c similarity index 77% rename from code/jpeg-6b/jutils.c rename to code/jpeg-8c/jutils.c index d18a9555..04351797 100644 --- a/code/jpeg-6b/jutils.c +++ b/code/jpeg-8c/jutils.c @@ -2,6 +2,7 @@ * jutils.c * * Copyright (C) 1991-1996, Thomas G. Lane. + * Modified 2009 by Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -63,6 +64,57 @@ const int jpeg_natural_order[DCTSIZE2+16] = { 63, 63, 63, 63, 63, 63, 63, 63 }; +const int jpeg_natural_order7[7*7+16] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 14, 21, 28, 35, + 42, 49, 50, 43, 36, 29, 22, 30, + 37, 44, 51, 52, 45, 38, 46, 53, + 54, + 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */ + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +const int jpeg_natural_order6[6*6+16] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 41, 34, 27, + 20, 13, 21, 28, 35, 42, 43, 36, + 29, 37, 44, 45, + 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */ + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +const int jpeg_natural_order5[5*5+16] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 12, + 19, 26, 33, 34, 27, 20, 28, 35, + 36, + 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */ + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +const int jpeg_natural_order4[4*4+16] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 25, 18, 11, 19, 26, 27, + 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */ + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +const int jpeg_natural_order3[3*3+16] = { + 0, 1, 8, 16, 9, 2, 10, 17, + 18, + 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */ + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +const int jpeg_natural_order2[2*2+16] = { + 0, 1, 8, 9, + 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */ + 63, 63, 63, 63, 63, 63, 63, 63 +}; + /* * Arithmetic utilities diff --git a/code/jpeg-6b/jversion.h b/code/jpeg-8c/jversion.h similarity index 56% rename from code/jpeg-6b/jversion.h rename to code/jpeg-8c/jversion.h index 6472c58d..e868538c 100644 --- a/code/jpeg-6b/jversion.h +++ b/code/jpeg-8c/jversion.h @@ -1,7 +1,7 @@ /* * jversion.h * - * Copyright (C) 1991-1998, Thomas G. Lane. + * Copyright (C) 1991-2011, Thomas G. Lane, Guido Vollbeding. * This file is part of the Independent JPEG Group's software. * For conditions of distribution and use, see the accompanying README file. * @@ -9,6 +9,6 @@ */ -#define JVERSION "6b 27-Mar-1998" +#define JVERSION "8c 16-Jan-2011" -#define JCOPYRIGHT "Copyright (C) 1998, Thomas G. Lane" +#define JCOPYRIGHT "Copyright (C) 2011, Thomas G. Lane, Guido Vollbeding" diff --git a/code/renderer/tr_image_jpg.c b/code/renderer/tr_image_jpg.c index 2e1a52a7..c6c632ea 100644 --- a/code/renderer/tr_image_jpg.c +++ b/code/renderer/tr_image_jpg.c @@ -30,10 +30,43 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * You may also wish to include "jerror.h". */ -#define JPEG_INTERNALS -#include "../jpeg-6b/jpeglib.h" +#ifdef USE_INTERNAL_JPEG +# define JPEG_INTERNALS +#endif -void R_LoadJPG( const char *filename, unsigned char **pic, int *width, int *height ) { +#include + +#ifndef USE_INTERNAL_JPEG +# if JPEG_LIB_VERSION < 80 +# error Need system libjpeg >= 80 +# endif +#endif + +static void R_JPGErrorExit(j_common_ptr cinfo) +{ + char buffer[JMSG_LENGTH_MAX]; + + (*cinfo->err->format_message) (cinfo, buffer); + + /* Let the memory manager delete any temp files before we die */ + jpeg_destroy(cinfo); + + ri.Error(ERR_FATAL, "%s\n", buffer); +} + +static void R_JPGOutputMessage(j_common_ptr cinfo) +{ + char buffer[JMSG_LENGTH_MAX]; + + /* Create the message */ + (*cinfo->err->format_message) (cinfo, buffer); + + /* Send it to stderr, adding a newline */ + ri.Printf(PRINT_ALL, "%s\n", buffer); +} + +void R_LoadJPG(const char *filename, unsigned char **pic, int *width, int *height) +{ /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). */ @@ -55,6 +88,7 @@ void R_LoadJPG( const char *filename, unsigned char **pic, int *width, int *heig JSAMPARRAY buffer; /* Output row buffer */ unsigned row_stride; /* physical row width in output buffer */ unsigned pixelcount, memcount; + int sindex, dindex; unsigned char *out; int len; union { @@ -82,6 +116,8 @@ void R_LoadJPG( const char *filename, unsigned char **pic, int *width, int *heig * address which we place into the link field in cinfo. */ cinfo.err = jpeg_std_error(&jerr); + cinfo.err->error_exit = R_JPGErrorExit; + cinfo.err->output_message = R_JPGOutputMessage; /* Now we can initialize the JPEG decompression object. */ jpeg_create_decompress(&cinfo); @@ -101,9 +137,7 @@ void R_LoadJPG( const char *filename, unsigned char **pic, int *width, int *heig /* Step 4: set parameters for decompression */ - /* In this example, we don't need to change any of the defaults set by - * jpeg_read_header(), so we do nothing here. - */ + cinfo.out_color_space = JCS_RGB; /* Step 5: Start decompressor */ @@ -124,9 +158,10 @@ void R_LoadJPG( const char *filename, unsigned char **pic, int *width, int *heig if(!cinfo.output_width || !cinfo.output_height || ((pixelcount * 4) / cinfo.output_width) / 4 != cinfo.output_height - || pixelcount > 0x1FFFFFFF || cinfo.output_components > 4) // 4*1FFFFFFF == 0x7FFFFFFC < 0x7FFFFFFF + || pixelcount > 0x1FFFFFFF || cinfo.output_components != 3 + ) { - ri.Error (ERR_DROP, "LoadJPG: %s has an invalid image size: %dx%d*4=%d, components: %d\n", filename, + ri.Error (ERR_DROP, "LoadJPG: %s has an invalid image format: %dx%d*4=%d, components: %d\n", filename, cinfo.output_width, cinfo.output_height, pixelcount * 4, cinfo.output_components); } @@ -158,34 +193,19 @@ void R_LoadJPG( const char *filename, unsigned char **pic, int *width, int *heig // If we are processing an 8-bit JPEG (greyscale), we'll have to convert // the greyscale values to RGBA. - if(cinfo.output_components == 1) - { - int sindex = pixelcount, dindex = memcount; - unsigned char greyshade; + sindex = pixelcount * cinfo.output_components; + dindex = memcount; - // Only pixelcount number of bytes have been written. - // Expand the color values over the rest of the buffer, starting - // from the end. - do - { - greyshade = buf[--sindex]; - - buf[--dindex] = 255; - buf[--dindex] = greyshade; - buf[--dindex] = greyshade; - buf[--dindex] = greyshade; - } while(sindex); - } - else - { - // clear all the alphas to 255 - int i; - - for ( i = 3 ; i < memcount ; i+=4 ) - { - buf[i] = 255; - } - } + // Only pixelcount number of bytes have been written. + // Expand the color values over the rest of the buffer, starting + // from the end. + do + { + buf[--dindex] = 255; + buf[--dindex] = buf[--sindex]; + buf[--dindex] = buf[--sindex]; + buf[--dindex] = buf[--sindex]; + } while(sindex); *pic = out;