diff --git a/jpegturbo/BUILDING.md b/jpegturbo/BUILDING.md
index 9ef1f45e..a4ae1e06 100644
--- a/jpegturbo/BUILDING.md
+++ b/jpegturbo/BUILDING.md
@@ -15,13 +15,18 @@ Build Requirements
* If using NASM, 2.10 or later is required.
* If using NASM, 2.10 or later (except 2.11.08) is required for an x86-64 Mac
build (2.11.08 does not work properly with libjpeg-turbo's x86-64 SIMD code
- when building macho64 objects.) NASM or YASM can be obtained from
- [MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
+ when building macho64 objects.)
* If using YASM, 1.2.0 or later is required.
+ * If building on macOS, NASM or YASM can be obtained from
+ [MacPorts](http://www.macports.org/) or [Homebrew](http://brew.sh/).
- NOTE: Currently, if it is desirable to hide the SIMD function symbols in
Mac executables or shared libraries that statically link with
- libjpeg-turbo, then YASM must be used when building libjpeg-turbo.
+ libjpeg-turbo, then NASM 2.14 or later or YASM must be used when
+ building libjpeg-turbo.
* If building on Windows, **nasm.exe**/**yasm.exe** should be in your `PATH`.
+ * NASM and YASM are located in the CRB (Code Ready Builder) repository on
+ Red Hat Enterprise Linux 8 and in the PowerTools repository on CentOS 8,
+ which is not enabled by default.
The binary RPMs released by the NASM project do not work on older Linux
systems, such as Red Hat Enterprise Linux 5. On such systems, you can easily
diff --git a/jpegturbo/Brewfile b/jpegturbo/Brewfile
deleted file mode 100644
index 4a9cb3d2..00000000
--- a/jpegturbo/Brewfile
+++ /dev/null
@@ -1,4 +0,0 @@
-brew 'yasm'
-brew 'gcc@5'
-brew 'md5sha1sum'
-cask 'Caskroom/versions/java6'
diff --git a/jpegturbo/CMakeLists.txt b/jpegturbo/CMakeLists.txt
index 28fd443d..4e2d5bd6 100644
--- a/jpegturbo/CMakeLists.txt
+++ b/jpegturbo/CMakeLists.txt
@@ -5,7 +5,7 @@ if(CMAKE_EXECUTABLE_SUFFIX)
endif()
project(libjpeg-turbo C)
-set(VERSION 2.0.3)
+set(VERSION 2.0.4)
string(REPLACE "." ";" VERSION_TRIPLET ${VERSION})
list(GET VERSION_TRIPLET 0 VERSION_MAJOR)
list(GET VERSION_TRIPLET 1 VERSION_MINOR)
diff --git a/jpegturbo/ChangeLog.md b/jpegturbo/ChangeLog.md
index 3667d120..4d1219e5 100644
--- a/jpegturbo/ChangeLog.md
+++ b/jpegturbo/ChangeLog.md
@@ -1,3 +1,44 @@
+2.0.4
+=====
+
+### Significant changes relative to 2.0.3:
+
+1. Fixed a regression in the Windows packaging system (introduced by
+2.0 beta1[2]) whereby, if both the 64-bit libjpeg-turbo SDK for GCC and the
+64-bit libjpeg-turbo SDK for Visual C++ were installed on the same system, only
+one of them could be uninstalled.
+
+2. Fixed a signed integer overflow and subsequent segfault that occurred when
+attempting to decompress images with more than 715827882 pixels using the
+64-bit C version of TJBench.
+
+3. Fixed out-of-bounds write in `tjDecompressToYUV2()` and
+`tjDecompressToYUVPlanes()` (sometimes manifesting as a double free) that
+occurred when attempting to decompress grayscale JPEG images that were
+compressed with a sampling factor other than 1 (for instance, with
+`cjpeg -grayscale -sample 2x2`).
+
+4. Fixed a regression introduced by 2.0.2[5] that caused the TurboJPEG API to
+incorrectly identify some JPEG images with unusual sampling factors as 4:4:4
+JPEG images. This was known to cause a buffer overflow when attempting to
+decompress some such images using `tjDecompressToYUV2()` or
+`tjDecompressToYUVPlanes()`.
+
+5. Fixed an issue, detected by ASan, whereby attempting to losslessly transform
+a specially-crafted malformed JPEG image containing an extremely-high-frequency
+coefficient block (junk image data that could never be generated by a
+legitimate JPEG compressor) could cause the Huffman encoder's local buffer to
+be overrun. (Refer to 1.4.0[9] and 1.4beta1[15].) Given that the buffer
+overrun was fully contained within the stack and did not cause a segfault or
+other user-visible errant behavior, and given that the lossless transformer
+(unlike the decompressor) is not generally exposed to arbitrary data exploits,
+this issue did not likely pose a security risk.
+
+6. The ARM 64-bit (ARMv8) NEON SIMD assembly code now stores constants in a
+separate read-only data section rather than in the text section, to support
+execute-only memory layouts.
+
+
2.0.3
=====
@@ -138,10 +179,11 @@ would produce a "Bogus message code" error message if the underlying bitmap and
PPM readers/writers threw an error that was specific to the readers/writers
(as opposed to a general libjpeg API error.)
-4. Fixed an issue whereby a specially-crafted malformed BMP file, one in which
-the header specified an image width of 1073741824 pixels, would trigger a
-floating point exception (division by zero) in the `tjLoadImage()` function
-when attempting to load the BMP file into a 4-component image buffer.
+4. Fixed an issue (CVE-2018-1152) whereby a specially-crafted malformed BMP
+file, one in which the header specified an image width of 1073741824 pixels,
+would trigger a floating point exception (division by zero) in the
+`tjLoadImage()` function when attempting to load the BMP file into a
+4-component image buffer.
5. Fixed an issue whereby certain combinations of calls to
`jpeg_skip_scanlines()` and `jpeg_read_scanlines()` could trigger an infinite
@@ -155,10 +197,10 @@ a 4:2:2 or 4:2:0 JPEG image using the merged (non-fancy) upsampling algorithms
7. The new CMake-based build system will now disable the MIPS DSPr2 SIMD
extensions if it detects that the compiler does not support DSPr2 instructions.
-8. Fixed out-of-bounds read in cjpeg that occurred when attempting to compress
-a specially-crafted malformed color-index (8-bit-per-sample) BMP file in which
-some of the samples (color indices) exceeded the bounds of the BMP file's color
-table.
+8. Fixed out-of-bounds read in cjpeg (CVE-2018-14498) that occurred when
+attempting to compress a specially-crafted malformed color-index
+(8-bit-per-sample) BMP file in which some of the samples (color indices)
+exceeded the bounds of the BMP file's color table.
9. Fixed a signed integer overflow in the progressive Huffman decoder, detected
by the Clang and GCC undefined behavior sanitizers, that could be triggered by
@@ -318,8 +360,8 @@ write scanlines in bottom-up order.) djpeg will now exit gracefully if an
output format other than PPM/PGM, GIF, or Targa is selected along with the
`-crop` option.
-4. Fixed an issue whereby `jpeg_skip_scanlines()` would segfault if color
-quantization was enabled.
+4. Fixed an issue (CVE-2017-15232) whereby `jpeg_skip_scanlines()` would
+segfault if color quantization was enabled.
5. TJBench (both C and Java versions) will now display usage information if any
command-line argument is unrecognized. This prevents the program from silently
@@ -946,13 +988,13 @@ and IDCT algorithms (both are used during JPEG decompression.) For unknown
reasons (probably related to clang), this code cannot currently be compiled for
iOS.
-15. Fixed an extremely rare bug that could cause the Huffman encoder's local
-buffer to overrun when a very high-frequency MCU is compressed using quality
-100 and no subsampling, and when the JPEG output buffer is being dynamically
-resized by the destination manager. This issue was so rare that, even with a
-test program specifically designed to make the bug occur (by injecting random
-high-frequency YUV data into the compressor), it was reproducible only once in
-about every 25 million iterations.
+15. Fixed an extremely rare bug (CVE-2014-9092) that could cause the Huffman
+encoder's local buffer to overrun when a very high-frequency MCU is compressed
+using quality 100 and no subsampling, and when the JPEG output buffer is being
+dynamically resized by the destination manager. This issue was so rare that,
+even with a test program specifically designed to make the bug occur (by
+injecting random high-frequency YUV data into the compressor), it was
+reproducible only once in about every 25 million iterations.
16. Fixed an oversight in the TurboJPEG C wrapper: if any of the JPEG
compression functions was called repeatedly with the same
@@ -987,8 +1029,9 @@ entropy coding (by passing arguments of `-progressive -arithmetic` to cjpeg or
jpegtran, for instance) would result in an error, `Requested feature was
omitted at compile time`.
-4. Fixed a couple of issues whereby malformed JPEG images would cause
-libjpeg-turbo to use uninitialized memory during decompression.
+4. Fixed a couple of issues (CVE-2013-6629 and CVE-2013-6630) whereby malformed
+JPEG images would cause libjpeg-turbo to use uninitialized memory during
+decompression.
5. Fixed an error (`Buffer passed to JPEG library is too small`) that occurred
when calling the TurboJPEG YUV encoding function with a very small (< 5x5)
@@ -1127,9 +1170,9 @@ correct behavior of the colorspace extensions when merged upsampling is used.
upper 64 bits of xmm6 and xmm7 on Win64 platforms, which violated the Win64
calling conventions.
-4. Fixed a regression caused by 1.2.0[6] whereby decompressing corrupt JPEG
-images (specifically, images in which the component count was erroneously set
-to a large value) would cause libjpeg-turbo to segfault.
+4. Fixed a regression (CVE-2012-2806) caused by 1.2.0[6] whereby decompressing
+corrupt JPEG images (specifically, images in which the component count was
+erroneously set to a large value) would cause libjpeg-turbo to segfault.
5. Worked around a severe performance issue with "Bobcat" (AMD Embedded APU)
processors. The `MASKMOVDQU` instruction, which was used by the libjpeg-turbo
diff --git a/jpegturbo/README.md b/jpegturbo/README.md
old mode 100755
new mode 100644
index c61b8556..e7ff743a
--- a/jpegturbo/README.md
+++ b/jpegturbo/README.md
@@ -1,14 +1,14 @@
Background
==========
-libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
-AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression
-on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG
-compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is
-generally 2-6x as fast as libjpeg, all else being equal. On other types of
-systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by
-virtue of its highly-optimized Huffman coding routines. In many cases, the
-performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
+baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
+MIPS systems, as well as progressive JPEG compression on x86 and x86-64
+systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
+all else being equal. On other types of systems, libjpeg-turbo can still
+outperform libjpeg by a significant amount, by virtue of its highly-optimized
+Huffman coding routines. In many cases, the performance of libjpeg-turbo
+rivals that of proprietary high-speed JPEG codecs.
libjpeg-turbo implements both the traditional libjpeg API as well as the less
powerful but more straightforward TurboJPEG API. libjpeg-turbo also features
@@ -145,14 +145,14 @@ supported and which aren't.
#### Fully supported
-- **libjpeg: IDCT scaling extensions in decompressor**
+- **libjpeg API: IDCT scaling extensions in decompressor**
libjpeg-turbo supports IDCT scaling with scaling factors of 1/8, 1/4, 3/8,
1/2, 5/8, 3/4, 7/8, 9/8, 5/4, 11/8, 3/2, 13/8, 7/4, 15/8, and 2/1 (only 1/4
and 1/2 are SIMD-accelerated.)
-- **libjpeg: Arithmetic coding**
+- **libjpeg API: Arithmetic coding**
-- **libjpeg: In-memory source and destination managers**
+- **libjpeg API: In-memory source and destination managers**
See notes below.
- **cjpeg: Separate quality settings for luminance and chrominance**
@@ -184,14 +184,14 @@ means of quality improvement. The reader is invited to peruse the research at
but it is the general belief of our project that these features have not
demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.
-- **libjpeg: DCT scaling in compressor**
+- **libjpeg API: DCT scaling in compressor**
`cinfo.scale_num` and `cinfo.scale_denom` are silently ignored.
There is no technical reason why DCT scaling could not be supported when
emulating the libjpeg v7+ API/ABI, but without the SmartScale extension (see
below), only scaling factors of 1/2, 8/15, 4/7, 8/13, 2/3, 8/11, 4/5, and
8/9 would be available, which is of limited usefulness.
-- **libjpeg: SmartScale**
+- **libjpeg API: SmartScale**
`cinfo.block_size` is silently ignored.
SmartScale is an extension to the JPEG format that allows for DCT block
sizes other than 8x8. Providing support for this new format would be
@@ -204,7 +204,7 @@ demonstrated sufficient usefulness to justify inclusion in libjpeg-turbo.
interest in providing this feature would be as a means of supporting
additional DCT scaling factors.
-- **libjpeg: Fancy downsampling in compressor**
+- **libjpeg API: Fancy downsampling in compressor**
`cinfo.do_fancy_downsampling` is silently ignored.
This requires the DCT scaling feature, which is not supported.
@@ -252,8 +252,8 @@ building libjpeg-turbo. This will restore the pre-1.3 behavior, in which
libjpeg v8 API/ABI.
On Un*x systems, including the in-memory source/destination managers changes
-the dynamic library version from 62.1.0 to 62.2.0 if using libjpeg v6b API/ABI
-emulation and from 7.1.0 to 7.2.0 if using libjpeg v7 API/ABI emulation.
+the dynamic library version from 62.2.0 to 62.3.0 if using libjpeg v6b API/ABI
+emulation and from 7.2.0 to 7.3.0 if using libjpeg v7 API/ABI emulation.
Note that, on most Un*x systems, the dynamic linker will not look for a
function in a library until that function is actually used. Thus, if a program
@@ -329,7 +329,7 @@ in a way that makes the rest of the libjpeg infrastructure happy, so it is
necessary to use the slow Huffman decoder when decompressing a JPEG image that
has restart markers. This can cause the decompression performance to drop by
as much as 20%, but the performance will still be much greater than that of
-libjpeg. Many consumer packages, such as PhotoShop, use restart markers when
+libjpeg. Many consumer packages, such as Photoshop, use restart markers when
generating JPEG images, so images generated by those programs will experience
this issue.
diff --git a/jpegturbo/cmakescripts/BuildPackages.cmake b/jpegturbo/cmakescripts/BuildPackages.cmake
index 11d54268..395dd989 100644
--- a/jpegturbo/cmakescripts/BuildPackages.cmake
+++ b/jpegturbo/cmakescripts/BuildPackages.cmake
@@ -83,7 +83,7 @@ endif()
if(BITS EQUAL 64)
set(INST_PLATFORM "${INST_PLATFORM} 64-bit")
set(INST_NAME ${INST_NAME}64)
- set(INST_REG_NAME ${INST_DIR}64)
+ set(INST_REG_NAME ${INST_REG_NAME}64)
set(INST_DEFS ${INST_DEFS} -DWIN64)
endif()
diff --git a/jpegturbo/djpeg.c b/jpegturbo/djpeg.c
index 920e90d7..40e93e65 100644
--- a/jpegturbo/djpeg.c
+++ b/jpegturbo/djpeg.c
@@ -516,7 +516,9 @@ main(int argc, char **argv)
FILE *input_file;
FILE *output_file;
unsigned char *inbuffer = NULL;
+#if JPEG_LIB_VERSION >= 80 || defined(MEM_SRCDST_SUPPORTED)
unsigned long insize = 0;
+#endif
JDIMENSION num_scanlines;
/* On Mac, fetch a command line. */
diff --git a/jpegturbo/example.txt b/jpegturbo/example.txt
index 04c11fee..bc0ba49d 100644
--- a/jpegturbo/example.txt
+++ b/jpegturbo/example.txt
@@ -288,12 +288,14 @@ my_error_exit(j_common_ptr cinfo)
}
+METHODDEF(int) do_read_JPEG_file(struct jpeg_decompress_struct *cinfo,
+ char *filename);
+
/*
* Sample routine for JPEG decompression. We assume that the source file name
* is passed in. We want to return 1 on success, 0 on error.
*/
-
GLOBAL(int)
read_JPEG_file(char *filename)
{
@@ -301,6 +303,21 @@ read_JPEG_file(char *filename)
* working space (which is allocated as needed by the JPEG library).
*/
struct jpeg_decompress_struct cinfo;
+
+ return do_read_JPEG_file(&cinfo, filename);
+}
+
+/*
+ * We call the libjpeg API from within a separate function, because modifying
+ * the local non-volatile jpeg_decompress_struct instance below the setjmp()
+ * return point and then accessing the instance after setjmp() returns would
+ * return in undefined behavior that may potentially overwrite all or part of
+ * the structure.
+ */
+
+METHODDEF(int)
+do_read_JPEG_file(struct jpeg_decompress_struct *cinfo, char *filename)
+{
/* We use our private extension JPEG error handler.
* Note that this struct must live as long as the main JPEG parameter
* struct, to avoid dangling-pointer problems.
@@ -325,27 +342,27 @@ read_JPEG_file(char *filename)
/* Step 1: allocate and initialize JPEG decompression object */
/* We set up the normal JPEG error routines, then override error_exit. */
- cinfo.err = jpeg_std_error(&jerr.pub);
+ cinfo->err = jpeg_std_error(&jerr.pub);
jerr.pub.error_exit = my_error_exit;
/* Establish the setjmp return context for my_error_exit to use. */
if (setjmp(jerr.setjmp_buffer)) {
/* If we get here, the JPEG code has signaled an error.
* We need to clean up the JPEG object, close the input file, and return.
*/
- jpeg_destroy_decompress(&cinfo);
+ jpeg_destroy_decompress(cinfo);
fclose(infile);
return 0;
}
/* Now we can initialize the JPEG decompression object. */
- jpeg_create_decompress(&cinfo);
+ jpeg_create_decompress(cinfo);
/* Step 2: specify data source (eg, a file) */
- jpeg_stdio_src(&cinfo, infile);
+ jpeg_stdio_src(cinfo, infile);
/* Step 3: read file parameters with jpeg_read_header() */
- (void)jpeg_read_header(&cinfo, TRUE);
+ (void)jpeg_read_header(cinfo, TRUE);
/* We can ignore the return value from jpeg_read_header since
* (a) suspension is not possible with the stdio data source, and
* (b) we passed TRUE to reject a tables-only JPEG file as an error.
@@ -360,7 +377,7 @@ read_JPEG_file(char *filename)
/* Step 5: Start decompressor */
- (void)jpeg_start_decompress(&cinfo);
+ (void)jpeg_start_decompress(cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
@@ -372,30 +389,30 @@ read_JPEG_file(char *filename)
* In this example, we need to make an output work buffer of the right size.
*/
/* JSAMPLEs per row in output buffer */
- row_stride = cinfo.output_width * cinfo.output_components;
+ row_stride = cinfo->output_width * cinfo->output_components;
/* Make a one-row-high sample array that will go away when done with image */
- buffer = (*cinfo.mem->alloc_sarray)
- ((j_common_ptr)&cinfo, JPOOL_IMAGE, row_stride, 1);
+ buffer = (*cinfo->mem->alloc_sarray)
+ ((j_common_ptr)cinfo, JPOOL_IMAGE, row_stride, 1);
/* Step 6: while (scan lines remain to be read) */
/* jpeg_read_scanlines(...); */
- /* Here we use the library's state variable cinfo.output_scanline as the
+ /* Here we use the library's state variable cinfo->output_scanline as the
* loop counter, so that we don't have to keep track ourselves.
*/
- while (cinfo.output_scanline < cinfo.output_height) {
+ while (cinfo->output_scanline < cinfo->output_height) {
/* jpeg_read_scanlines expects an array of pointers to scanlines.
* Here the array is only one element long, but you could ask for
* more than one scanline at a time if that's more convenient.
*/
- (void)jpeg_read_scanlines(&cinfo, buffer, 1);
+ (void)jpeg_read_scanlines(cinfo, buffer, 1);
/* Assume put_scanline_someplace wants a pointer and sample count. */
put_scanline_someplace(buffer[0], row_stride);
}
/* Step 7: Finish decompression */
- (void)jpeg_finish_decompress(&cinfo);
+ (void)jpeg_finish_decompress(cinfo);
/* We can ignore the return value since suspension is not possible
* with the stdio data source.
*/
@@ -403,7 +420,7 @@ read_JPEG_file(char *filename)
/* Step 8: Release JPEG decompression object */
/* This is an important step since it will release a good deal of memory. */
- jpeg_destroy_decompress(&cinfo);
+ jpeg_destroy_decompress(cinfo);
/* After finish_decompress, we can close the input file.
* Here we postpone it until after no more JPEG errors are possible,
diff --git a/jpegturbo/java/TJBench.java b/jpegturbo/java/TJBench.java
index 6fac4d46..e43645ea 100644
--- a/jpegturbo/java/TJBench.java
+++ b/jpegturbo/java/TJBench.java
@@ -478,6 +478,8 @@ final class TJBench {
if (!compOnly)
decomp(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
fileName, tilew, tileh);
+ else if (quiet == 1)
+ System.out.println("N/A");
if (tilew == w && tileh == h) break;
}
diff --git a/jpegturbo/jchuff.c b/jpegturbo/jchuff.c
index 526203e3..cb05055d 100644
--- a/jpegturbo/jchuff.c
+++ b/jpegturbo/jchuff.c
@@ -43,8 +43,8 @@
*/
/* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
-#if !defined __thumb__ || defined __thumb2__
+#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
@@ -432,7 +432,7 @@ dump_buffer(working_state *state)
* scanning order-- 1, 8, 16, etc.), then this will produce an encoded block
* larger than 200 bytes.
*/
-#define BUFSIZE (DCTSIZE2 * 4)
+#define BUFSIZE (DCTSIZE2 * 8)
#define LOAD_BUFFER() { \
if (state->free_in_buffer < BUFSIZE) { \
diff --git a/jpegturbo/jconfig.h.cmake b/jpegturbo/jconfig.h.cmake
deleted file mode 100644
index aeb2b30c..00000000
--- a/jpegturbo/jconfig.h.cmake
+++ /dev/null
@@ -1,73 +0,0 @@
-/* Version ID for the JPEG library.
- * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
- */
-#define JPEG_LIB_VERSION 80
-
-/* libjpeg-turbo version */
-#define LIBJPEG_TURBO_VERSION 2.0.3
-
-/* libjpeg-turbo version in integer form */
-#define LIBJPEG_TURBO_VERSION_NUMBER 203
-
-/* Support arithmetic encoding */
-#define C_ARITH_CODING_SUPPORTED 1
-
-/* Support arithmetic decoding */
-#define D_ARITH_CODING_SUPPORTED 1
-
-/* Support in-memory source/destination managers */
-#define MEM_SRCDST_SUPPORTED 1
-
-/* Use accelerated SIMD routines. */
-#cmakedefine WITH_SIMD 1
-
-/*
- * Define BITS_IN_JSAMPLE as either
- * 8 for 8-bit sample values (the usual setting)
- * 12 for 12-bit sample values
- * Only 8 and 12 are legal data precisions for lossy JPEG according to the
- * JPEG standard, and the IJG code does not support anything else!
- * We do not support run-time selection of data precision, sorry.
- */
-
-#define BITS_IN_JSAMPLE 8 /* use 8 or 12 */
-
-/* Define to 1 if you have the header file. */
-#cmakedefine HAVE_LOCALE_H 1
-
-/* Define to 1 if you have the header file. */
-#cmakedefine HAVE_STDDEF_H 1
-
-/* Define to 1 if you have the header file. */
-#cmakedefine HAVE_STDLIB_H 1
-
-/* Define if you need to include to get size_t. */
-#cmakedefine NEED_SYS_TYPES_H 1
-
-/* Define if you have BSD-like bzero and bcopy in rather than
- memset/memcpy in . */
-#cmakedefine NEED_BSD_STRINGS 1
-
-/* Define to 1 if the system has the type `unsigned char'. */
-#define HAVE_UNSIGNED_CHAR 1
-
-/* Define to 1 if the system has the type `unsigned short'. */
-#define HAVE_UNSIGNED_SHORT 1
-
-/* Compiler does not support pointers to undefined structures. */
-#cmakedefine INCOMPLETE_TYPES_BROKEN 1
-
-/* Define if your (broken) compiler shifts signed values as if they were
- unsigned. */
-#cmakedefine RIGHT_SHIFT_IS_UNSIGNED 1
-
-/* Define to 1 if type `char' is unsigned and you are not using gcc. */
-#ifndef __CHAR_UNSIGNED__
- #cmakedefine __CHAR_UNSIGNED__ 1
-#endif
-
-/* Define to empty if `const' does not conform to ANSI C. */
-/* #undef const */
-
-/* Define to `unsigned int' if does not define. */
-/* #undef size_t */
diff --git a/jpegturbo/jconfigint.h.cmake b/jpegturbo/jconfigint.h.cmake
deleted file mode 100644
index 89ba876f..00000000
--- a/jpegturbo/jconfigint.h.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-/* libjpeg-turbo build number */
-#define BUILD "0"
-
-/* Compiler's inline keyword */
-#undef inline
-
-/* How to obtain function inlining. */
-#define INLINE __inline
-
-/* Define to the full name of this package. */
-#define PACKAGE_NAME "libjpeg-turbo"
-
-/* Version number of package */
-#define VERSION "2.0.3"
-
-/* The size of `size_t', as computed by sizeof. */
-#cmakedefine SIZEOF_SIZE_T @SIZEOF_SIZE_T@
-
-/* Define if your compiler has __builtin_ctzl() and sizeof(unsigned long) == sizeof(size_t). */
-#cmakedefine HAVE_BUILTIN_CTZL
-
-/* Define to 1 if you have the header file. */
-#cmakedefine HAVE_INTRIN_H
-
-#if defined(_MSC_VER) && defined(HAVE_INTRIN_H)
-#if (SIZEOF_SIZE_T == 8)
-#define HAVE_BITSCANFORWARD64
-#elif (SIZEOF_SIZE_T == 4)
-#define HAVE_BITSCANFORWARD
-#endif
-#endif
diff --git a/jpegturbo/jcphuff.c b/jpegturbo/jcphuff.c
index 024d3af0..8c4efaf1 100644
--- a/jpegturbo/jcphuff.c
+++ b/jpegturbo/jcphuff.c
@@ -52,8 +52,8 @@
*/
/* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
-#if !defined __thumb__ || defined __thumb2__
+#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))
+#if !defined(__thumb__) || defined(__thumb2__)
#define USE_CLZ_INTRINSIC
#endif
#endif
diff --git a/jpegturbo/jfdctint.c b/jpegturbo/jfdctint.c
index c0391a92..b47c3061 100644
--- a/jpegturbo/jfdctint.c
+++ b/jpegturbo/jfdctint.c
@@ -1,7 +1,7 @@
/*
* jfdctint.c
*
- * This file was part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1996, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, D. R. Commander.
diff --git a/jpegturbo/jidctint.c b/jpegturbo/jidctint.c
index 55573429..98425d5f 100644
--- a/jpegturbo/jidctint.c
+++ b/jpegturbo/jidctint.c
@@ -1,7 +1,7 @@
/*
* jidctint.c
*
- * This file was part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
* Copyright (C) 1991-1998, Thomas G. Lane.
* Modification developed 2002-2009 by Guido Vollbeding.
* libjpeg-turbo Modifications:
diff --git a/jpegturbo/jidctred.c b/jpegturbo/jidctred.c
index 1ff352f8..1dd65a94 100644
--- a/jpegturbo/jidctred.c
+++ b/jpegturbo/jidctred.c
@@ -1,7 +1,7 @@
/*
* jidctred.c
*
- * This file was part of the Independent JPEG Group's software.
+ * This file was part of the Independent JPEG Group's software:
* Copyright (C) 1994-1998, Thomas G. Lane.
* libjpeg-turbo Modifications:
* Copyright (C) 2015, D. R. Commander.
diff --git a/jpegturbo/release/License.rtf b/jpegturbo/release/License.rtf
old mode 100755
new mode 100644
diff --git a/jpegturbo/release/ReadMe.txt b/jpegturbo/release/ReadMe.txt
index cf9012af..0a087114 100644
--- a/jpegturbo/release/ReadMe.txt
+++ b/jpegturbo/release/ReadMe.txt
@@ -1,4 +1,4 @@
-libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2, AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and MIPS systems, as well as progressive JPEG compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg, all else being equal. On other types of systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by virtue of its highly-optimized Huffman coding routines. In many cases, the performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
libjpeg-turbo implements both the traditional libjpeg API as well as the less powerful but more straightforward TurboJPEG API. libjpeg-turbo also features colorspace extensions that allow it to compress from/decompress to 32-bit and big-endian pixel buffers (RGBX, XBGR, etc.), as well as a full-featured Java interface.
diff --git a/jpegturbo/release/Welcome.rtf b/jpegturbo/release/Welcome.rtf
old mode 100755
new mode 100644
diff --git a/jpegturbo/release/deb-control.in b/jpegturbo/release/deb-control.in
index 08131c66..c41c9a70 100644
--- a/jpegturbo/release/deb-control.in
+++ b/jpegturbo/release/deb-control.in
@@ -8,15 +8,14 @@ Maintainer: @PKGVENDOR@ <@PKGEMAIL@>
Homepage: @PKGURL@
Installed-Size: {__SIZE}
Description: A SIMD-accelerated JPEG codec that provides both the libjpeg and TurboJPEG APIs
- libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
- AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression
- on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG
- compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is
- generally 2-6x as fast as libjpeg, all else being equal. On other types of
- systems, libjpeg-turbo can still outperform libjpeg by a significant amount,
- by virtue of its highly-optimized Huffman coding routines. In many cases, the
- performance of libjpeg-turbo rivals that of proprietary high-speed JPEG
- codecs.
+ libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
+ baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
+ MIPS systems, as well as progressive JPEG compression on x86 and x86-64
+ systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
+ all else being equal. On other types of systems, libjpeg-turbo can still
+ outperform libjpeg by a significant amount, by virtue of its highly-optimized
+ Huffman coding routines. In many cases, the performance of libjpeg-turbo
+ rivals that of proprietary high-speed JPEG codecs.
.
libjpeg-turbo implements both the traditional libjpeg API as well as the less
powerful but more straightforward TurboJPEG API. libjpeg-turbo also features
diff --git a/jpegturbo/release/makecygwinpkg.in b/jpegturbo/release/makecygwinpkg.in
old mode 100755
new mode 100644
diff --git a/jpegturbo/release/rpm.spec.in b/jpegturbo/release/rpm.spec.in
index e5730e6c..83a1669f 100644
--- a/jpegturbo/release/rpm.spec.in
+++ b/jpegturbo/release/rpm.spec.in
@@ -51,14 +51,14 @@ Provides: %{name} = %{version}-%{release}, @CMAKE_PROJECT_NAME@ = %{version}-%{r
%endif
%description
-libjpeg-turbo is a JPEG image codec that uses SIMD instructions (MMX, SSE2,
-AVX2, NEON, AltiVec) to accelerate baseline JPEG compression and decompression
-on x86, x86-64, ARM, and PowerPC systems, as well as progressive JPEG
-compression on x86 and x86-64 systems. On such systems, libjpeg-turbo is
-generally 2-6x as fast as libjpeg, all else being equal. On other types of
-systems, libjpeg-turbo can still outperform libjpeg by a significant amount, by
-virtue of its highly-optimized Huffman coding routines. In many cases, the
-performance of libjpeg-turbo rivals that of proprietary high-speed JPEG codecs.
+libjpeg-turbo is a JPEG image codec that uses SIMD instructions to accelerate
+baseline JPEG compression and decompression on x86, x86-64, ARM, PowerPC, and
+MIPS systems, as well as progressive JPEG compression on x86 and x86-64
+systems. On such systems, libjpeg-turbo is generally 2-6x as fast as libjpeg,
+all else being equal. On other types of systems, libjpeg-turbo can still
+outperform libjpeg by a significant amount, by virtue of its highly-optimized
+Huffman coding routines. In many cases, the performance of libjpeg-turbo
+rivals that of proprietary high-speed JPEG codecs.
libjpeg-turbo implements both the traditional libjpeg API as well as the less
powerful but more straightforward TurboJPEG API. libjpeg-turbo also features
diff --git a/jpegturbo/sharedlib/CMakeLists.txt b/jpegturbo/sharedlib/CMakeLists.txt
old mode 100755
new mode 100644
diff --git a/jpegturbo/simd/CMakeLists.txt b/jpegturbo/simd/CMakeLists.txt
old mode 100755
new mode 100644
index 3472c0db..5c8009a9
--- a/jpegturbo/simd/CMakeLists.txt
+++ b/jpegturbo/simd/CMakeLists.txt
@@ -38,6 +38,14 @@ elseif(CPU_TYPE STREQUAL "i386")
endif()
endif()
+if(NOT REQUIRE_SIMD)
+ include(CheckLanguage)
+ check_language(ASM_NASM)
+ if(NOT CMAKE_ASM_NASM_COMPILER)
+ simd_fail("SIMD extensions disabled: could not find NASM compiler")
+ return()
+ endif()
+endif()
enable_language(ASM_NASM)
message(STATUS "CMAKE_ASM_NASM_COMPILER = ${CMAKE_ASM_NASM_COMPILER}")
diff --git a/jpegturbo/simd/arm64/jsimd_neon.S b/jpegturbo/simd/arm64/jsimd_neon.S
index 93472efc..a3aa4066 100644
--- a/jpegturbo/simd/arm64/jsimd_neon.S
+++ b/jpegturbo/simd/arm64/jsimd_neon.S
@@ -31,6 +31,251 @@
.section .note.GNU-stack, "", %progbits /* mark stack as non-executable */
#endif
+#if defined(__APPLE__)
+.section __DATA, __const
+#else
+.section .rodata, "a", %progbits
+#endif
+
+/* Constants for jsimd_idct_islow_neon() */
+
+#define F_0_298 2446 /* FIX(0.298631336) */
+#define F_0_390 3196 /* FIX(0.390180644) */
+#define F_0_541 4433 /* FIX(0.541196100) */
+#define F_0_765 6270 /* FIX(0.765366865) */
+#define F_0_899 7373 /* FIX(0.899976223) */
+#define F_1_175 9633 /* FIX(1.175875602) */
+#define F_1_501 12299 /* FIX(1.501321110) */
+#define F_1_847 15137 /* FIX(1.847759065) */
+#define F_1_961 16069 /* FIX(1.961570560) */
+#define F_2_053 16819 /* FIX(2.053119869) */
+#define F_2_562 20995 /* FIX(2.562915447) */
+#define F_3_072 25172 /* FIX(3.072711026) */
+
+.balign 16
+Ljsimd_idct_islow_neon_consts:
+ .short F_0_298
+ .short -F_0_390
+ .short F_0_541
+ .short F_0_765
+ .short - F_0_899
+ .short F_1_175
+ .short F_1_501
+ .short - F_1_847
+ .short - F_1_961
+ .short F_2_053
+ .short - F_2_562
+ .short F_3_072
+ .short 0 /* padding */
+ .short 0
+ .short 0
+ .short 0
+
+#undef F_0_298
+#undef F_0_390
+#undef F_0_541
+#undef F_0_765
+#undef F_0_899
+#undef F_1_175
+#undef F_1_501
+#undef F_1_847
+#undef F_1_961
+#undef F_2_053
+#undef F_2_562
+#undef F_3_072
+
+/* Constants for jsimd_idct_ifast_neon() */
+
+.balign 16
+Ljsimd_idct_ifast_neon_consts:
+ .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
+ .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
+ .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
+ .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
+
+/* Constants for jsimd_idct_4x4_neon() and jsimd_idct_2x2_neon() */
+
+#define CONST_BITS 13
+
+#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
+#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
+#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
+#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
+#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
+#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
+#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
+#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
+#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
+#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
+#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
+#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
+#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
+#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
+
+.balign 16
+Ljsimd_idct_4x4_neon_consts:
+ .short FIX_1_847759065 /* v0.h[0] */
+ .short -FIX_0_765366865 /* v0.h[1] */
+ .short -FIX_0_211164243 /* v0.h[2] */
+ .short FIX_1_451774981 /* v0.h[3] */
+ .short -FIX_2_172734803 /* d1[0] */
+ .short FIX_1_061594337 /* d1[1] */
+ .short -FIX_0_509795579 /* d1[2] */
+ .short -FIX_0_601344887 /* d1[3] */
+ .short FIX_0_899976223 /* v2.h[0] */
+ .short FIX_2_562915447 /* v2.h[1] */
+ .short 1 << (CONST_BITS + 1) /* v2.h[2] */
+ .short 0 /* v2.h[3] */
+
+.balign 8
+Ljsimd_idct_2x2_neon_consts:
+ .short -FIX_0_720959822 /* v14[0] */
+ .short FIX_0_850430095 /* v14[1] */
+ .short -FIX_1_272758580 /* v14[2] */
+ .short FIX_3_624509785 /* v14[3] */
+
+/* Constants for jsimd_ycc_*_neon() */
+
+.balign 16
+Ljsimd_ycc_rgb_neon_consts:
+ .short 0, 0, 0, 0
+ .short 22971, -11277, -23401, 29033
+ .short -128, -128, -128, -128
+ .short -128, -128, -128, -128
+
+/* Constants for jsimd_*_ycc_neon() */
+
+.balign 16
+Ljsimd_rgb_ycc_neon_consts:
+ .short 19595, 38470, 7471, 11059
+ .short 21709, 32768, 27439, 5329
+ .short 32767, 128, 32767, 128
+ .short 32767, 128, 32767, 128
+
+/* Constants for jsimd_fdct_islow_neon() */
+
+#define F_0_298 2446 /* FIX(0.298631336) */
+#define F_0_390 3196 /* FIX(0.390180644) */
+#define F_0_541 4433 /* FIX(0.541196100) */
+#define F_0_765 6270 /* FIX(0.765366865) */
+#define F_0_899 7373 /* FIX(0.899976223) */
+#define F_1_175 9633 /* FIX(1.175875602) */
+#define F_1_501 12299 /* FIX(1.501321110) */
+#define F_1_847 15137 /* FIX(1.847759065) */
+#define F_1_961 16069 /* FIX(1.961570560) */
+#define F_2_053 16819 /* FIX(2.053119869) */
+#define F_2_562 20995 /* FIX(2.562915447) */
+#define F_3_072 25172 /* FIX(3.072711026) */
+
+.balign 16
+Ljsimd_fdct_islow_neon_consts:
+ .short F_0_298
+ .short -F_0_390
+ .short F_0_541
+ .short F_0_765
+ .short - F_0_899
+ .short F_1_175
+ .short F_1_501
+ .short - F_1_847
+ .short - F_1_961
+ .short F_2_053
+ .short - F_2_562
+ .short F_3_072
+ .short 0 /* padding */
+ .short 0
+ .short 0
+ .short 0
+
+#undef F_0_298
+#undef F_0_390
+#undef F_0_541
+#undef F_0_765
+#undef F_0_899
+#undef F_1_175
+#undef F_1_501
+#undef F_1_847
+#undef F_1_961
+#undef F_2_053
+#undef F_2_562
+#undef F_3_072
+
+/* Constants for jsimd_fdct_ifast_neon() */
+
+.balign 16
+Ljsimd_fdct_ifast_neon_consts:
+ .short (98 * 128) /* XFIX_0_382683433 */
+ .short (139 * 128) /* XFIX_0_541196100 */
+ .short (181 * 128) /* XFIX_0_707106781 */
+ .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */
+
+/* Constants for jsimd_h2*_downsample_neon() */
+
+.balign 16
+Ljsimd_h2_downsample_neon_consts:
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \
+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \
+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \
+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */
+ .byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \
+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */
+ .byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \
+ 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */
+ .byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */
+ .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */
+
+/* Constants for jsimd_huff_encode_one_block_neon() */
+
+.balign 16
+Ljsimd_huff_encode_one_block_neon_consts:
+ .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
+ .byte 0, 1, 2, 3, 16, 17, 32, 33, \
+ 18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */
+ .byte 34, 35, 48, 49, 255, 255, 50, 51, \
+ 36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */
+ .byte 8, 9, 22, 23, 36, 37, 50, 51, \
+ 255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */
+ .byte 54, 55, 40, 41, 26, 27, 12, 13, \
+ 14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */
+ .byte 6, 7, 20, 21, 34, 35, 48, 49, \
+ 50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */
+ .byte 42, 43, 28, 29, 14, 15, 30, 31, \
+ 44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */
+ .byte 255, 255, 255, 255, 56, 57, 42, 43, \
+ 28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */
+ .byte 26, 27, 40, 41, 42, 43, 28, 29, \
+ 14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */
+ .byte 255, 255, 255, 255, 0, 1, 255, 255, \
+ 255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */
+ .byte 255, 255, 255, 255, 255, 255, 255, 255, \
+ 0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */
+ .byte 255, 255, 255, 255, 255, 255, 255, 255, \
+ 255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */
+ .byte 4, 5, 6, 7, 255, 255, 255, 255, \
+ 255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */
+
.text
@@ -55,6 +300,17 @@ _\fname:
#endif
.endm
+/* Get symbol location */
+.macro get_symbol_loc reg, symbol
+#ifdef __APPLE__
+ adrp \reg, \symbol@PAGE
+ add \reg, \reg, \symbol@PAGEOFF
+#else
+ adrp \reg, \symbol
+ add \reg, \reg, :lo12:\symbol
+#endif
+.endm
+
/* Transpose elements of single 128 bit registers */
.macro transpose_single x0, x1, xi, xilen, literal
ins \xi\xilen[0], \x0\xilen[0]
@@ -139,51 +395,6 @@ _\fname:
#define CONST_BITS 13
#define PASS1_BITS 2
-#define F_0_298 2446 /* FIX(0.298631336) */
-#define F_0_390 3196 /* FIX(0.390180644) */
-#define F_0_541 4433 /* FIX(0.541196100) */
-#define F_0_765 6270 /* FIX(0.765366865) */
-#define F_0_899 7373 /* FIX(0.899976223) */
-#define F_1_175 9633 /* FIX(1.175875602) */
-#define F_1_501 12299 /* FIX(1.501321110) */
-#define F_1_847 15137 /* FIX(1.847759065) */
-#define F_1_961 16069 /* FIX(1.961570560) */
-#define F_2_053 16819 /* FIX(2.053119869) */
-#define F_2_562 20995 /* FIX(2.562915447) */
-#define F_3_072 25172 /* FIX(3.072711026) */
-
-.balign 16
-Ljsimd_idct_islow_neon_consts:
- .short F_0_298
- .short -F_0_390
- .short F_0_541
- .short F_0_765
- .short - F_0_899
- .short F_1_175
- .short F_1_501
- .short - F_1_847
- .short - F_1_961
- .short F_2_053
- .short - F_2_562
- .short F_3_072
- .short 0 /* padding */
- .short 0
- .short 0
- .short 0
-
-#undef F_0_298
-#undef F_0_390
-#undef F_0_541
-#undef F_0_765
-#undef F_0_899
-#undef F_1_175
-#undef F_1_501
-#undef F_1_847
-#undef F_1_961
-#undef F_2_053
-#undef F_2_562
-#undef F_3_072
-
#define XFIX_P_0_298 v0.h[0]
#define XFIX_N_0_390 v0.h[1]
#define XFIX_P_0_541 v0.h[2]
@@ -217,7 +428,7 @@ asm_function jsimd_idct_islow_neon
uxtw x3, w3
sub sp, sp, #64
- adr x15, Ljsimd_idct_islow_neon_consts
+ get_symbol_loc x15, Ljsimd_idct_islow_neon_consts
mov x10, sp
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x10], #32
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x10], #32
@@ -791,13 +1002,6 @@ asm_function jsimd_idct_islow_neon
#define XFIX_1_847759065 v0.h[2]
#define XFIX_2_613125930 v0.h[3]
-.balign 16
-Ljsimd_idct_ifast_neon_consts:
- .short (277 * 128 - 256 * 128) /* XFIX_1_082392200 */
- .short (362 * 128 - 256 * 128) /* XFIX_1_414213562 */
- .short (473 * 128 - 256 * 128) /* XFIX_1_847759065 */
- .short (669 * 128 - 512 * 128) /* XFIX_2_613125930 */
-
asm_function jsimd_idct_ifast_neon
DCT_TABLE .req x0
@@ -832,7 +1036,7 @@ asm_function jsimd_idct_ifast_neon
* 7 | d30 | d31 ( v23.8h )
*/
/* Save NEON registers used in fast IDCT */
- adr TMP5, Ljsimd_idct_ifast_neon_consts
+ get_symbol_loc TMP5, Ljsimd_idct_ifast_neon_consts
ld1 {v16.8h, v17.8h}, [COEF_BLOCK], 32
ld1 {v0.8h, v1.8h}, [DCT_TABLE], 32
ld1 {v18.8h, v19.8h}, [COEF_BLOCK], 32
@@ -1023,38 +1227,6 @@ asm_function jsimd_idct_ifast_neon
* but readability will suffer somewhat.
*/
-#define CONST_BITS 13
-
-#define FIX_0_211164243 (1730) /* FIX(0.211164243) */
-#define FIX_0_509795579 (4176) /* FIX(0.509795579) */
-#define FIX_0_601344887 (4926) /* FIX(0.601344887) */
-#define FIX_0_720959822 (5906) /* FIX(0.720959822) */
-#define FIX_0_765366865 (6270) /* FIX(0.765366865) */
-#define FIX_0_850430095 (6967) /* FIX(0.850430095) */
-#define FIX_0_899976223 (7373) /* FIX(0.899976223) */
-#define FIX_1_061594337 (8697) /* FIX(1.061594337) */
-#define FIX_1_272758580 (10426) /* FIX(1.272758580) */
-#define FIX_1_451774981 (11893) /* FIX(1.451774981) */
-#define FIX_1_847759065 (15137) /* FIX(1.847759065) */
-#define FIX_2_172734803 (17799) /* FIX(2.172734803) */
-#define FIX_2_562915447 (20995) /* FIX(2.562915447) */
-#define FIX_3_624509785 (29692) /* FIX(3.624509785) */
-
-.balign 16
-Ljsimd_idct_4x4_neon_consts:
- .short FIX_1_847759065 /* v0.h[0] */
- .short -FIX_0_765366865 /* v0.h[1] */
- .short -FIX_0_211164243 /* v0.h[2] */
- .short FIX_1_451774981 /* v0.h[3] */
- .short -FIX_2_172734803 /* d1[0] */
- .short FIX_1_061594337 /* d1[1] */
- .short -FIX_0_509795579 /* d1[2] */
- .short -FIX_0_601344887 /* d1[3] */
- .short FIX_0_899976223 /* v2.h[0] */
- .short FIX_2_562915447 /* v2.h[1] */
- .short 1 << (CONST_BITS + 1) /* v2.h[2] */
- .short 0 /* v2.h[3] */
-
.macro idct_helper x4, x6, x8, x10, x12, x14, x16, shift, y26, y27, y28, y29
smull v28.4s, \x4, v2.h[2]
smlal v28.4s, \x8, v0.h[0]
@@ -1121,7 +1293,7 @@ asm_function jsimd_idct_4x4_neon
sub sp, sp, 64
mov x9, sp
/* Load constants (v3.4h is just used for padding) */
- adr TMP4, Ljsimd_idct_4x4_neon_consts
+ get_symbol_loc TMP4, Ljsimd_idct_4x4_neon_consts
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
ld1 {v0.4h, v1.4h, v2.4h, v3.4h}, [TMP4]
@@ -1264,13 +1436,6 @@ asm_function jsimd_idct_4x4_neon
* bit exact compatibility with jpeg-6b.
*/
-.balign 8
-Ljsimd_idct_2x2_neon_consts:
- .short -FIX_0_720959822 /* v14[0] */
- .short FIX_0_850430095 /* v14[1] */
- .short -FIX_1_272758580 /* v14[2] */
- .short FIX_3_624509785 /* v14[3] */
-
.macro idct_helper x4, x6, x10, x12, x16, shift, y26, y27
sshll v15.4s, \x4, #15
smull v26.4s, \x6, v14.h[3]
@@ -1311,7 +1476,7 @@ asm_function jsimd_idct_2x2_neon
mov x9, sp
/* Load constants */
- adr TMP2, Ljsimd_idct_2x2_neon_consts
+ get_symbol_loc TMP2, Ljsimd_idct_2x2_neon_consts
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
st1 {v12.8b, v13.8b, v14.8b, v15.8b}, [x9], 32
ld1 {v14.4h}, [TMP2]
@@ -1663,21 +1828,6 @@ asm_function jsimd_idct_2x2_neon
do_yuv_to_rgb_stage2
.endm
-/* Apple gas crashes on adrl, work around that by using adr.
- * But this requires a copy of these constants for each function.
- */
-
-.balign 16
-.if \fast_st3 == 1
-Ljsimd_ycc_\colorid\()_neon_consts:
-.else
-Ljsimd_ycc_\colorid\()_neon_slowst3_consts:
-.endif
- .short 0, 0, 0, 0
- .short 22971, -11277, -23401, 29033
- .short -128, -128, -128, -128
- .short -128, -128, -128, -128
-
.if \fast_st3 == 1
asm_function jsimd_ycc_\colorid\()_convert_neon
.else
@@ -1703,11 +1853,7 @@ asm_function jsimd_ycc_\colorid\()_convert_neon_slowst3
mov x9, sp
/* Load constants to d1, d2, d3 (v0.4h is just used for padding) */
- .if \fast_st3 == 1
- adr x15, Ljsimd_ycc_\colorid\()_neon_consts
- .else
- adr x15, Ljsimd_ycc_\colorid\()_neon_slowst3_consts
- .endif
+ get_symbol_loc x15, Ljsimd_ycc_rgb_neon_consts
/* Save NEON registers */
st1 {v8.8b, v9.8b, v10.8b, v11.8b}, [x9], 32
@@ -2004,17 +2150,6 @@ generate_jsimd_ycc_rgb_convert_neon extbgr, 24, 2, .4h, 1, .4h, 0, .4h, .8b,
do_rgb_to_yuv_stage1
.endm
-.balign 16
-.if \fast_ld3 == 1
-Ljsimd_\colorid\()_ycc_neon_consts:
-.else
-Ljsimd_\colorid\()_ycc_neon_slowld3_consts:
-.endif
- .short 19595, 38470, 7471, 11059
- .short 21709, 32768, 27439, 5329
- .short 32767, 128, 32767, 128
- .short 32767, 128, 32767, 128
-
.if \fast_ld3 == 1
asm_function jsimd_\colorid\()_ycc_convert_neon
.else
@@ -2037,11 +2172,7 @@ asm_function jsimd_\colorid\()_ycc_convert_neon_slowld3
N .req w12
/* Load constants to d0, d1, d2, d3 */
- .if \fast_ld3 == 1
- adr x13, Ljsimd_\colorid\()_ycc_neon_consts
- .else
- adr x13, Ljsimd_\colorid\()_ycc_neon_slowld3_consts
- .endif
+ get_symbol_loc x13, Ljsimd_rgb_ycc_neon_consts
ld1 {v0.8h, v1.8h}, [x13]
ldr OUTPUT_BUF0, [OUTPUT_BUF]
@@ -2241,50 +2372,6 @@ asm_function jsimd_convsamp_neon
#define DESCALE_P1 (CONST_BITS - PASS1_BITS)
#define DESCALE_P2 (CONST_BITS + PASS1_BITS)
-#define F_0_298 2446 /* FIX(0.298631336) */
-#define F_0_390 3196 /* FIX(0.390180644) */
-#define F_0_541 4433 /* FIX(0.541196100) */
-#define F_0_765 6270 /* FIX(0.765366865) */
-#define F_0_899 7373 /* FIX(0.899976223) */
-#define F_1_175 9633 /* FIX(1.175875602) */
-#define F_1_501 12299 /* FIX(1.501321110) */
-#define F_1_847 15137 /* FIX(1.847759065) */
-#define F_1_961 16069 /* FIX(1.961570560) */
-#define F_2_053 16819 /* FIX(2.053119869) */
-#define F_2_562 20995 /* FIX(2.562915447) */
-#define F_3_072 25172 /* FIX(3.072711026) */
-
-.balign 16
-Ljsimd_fdct_islow_neon_consts:
- .short F_0_298
- .short -F_0_390
- .short F_0_541
- .short F_0_765
- .short - F_0_899
- .short F_1_175
- .short F_1_501
- .short - F_1_847
- .short - F_1_961
- .short F_2_053
- .short - F_2_562
- .short F_3_072
- .short 0 /* padding */
- .short 0
- .short 0
- .short 0
-
-#undef F_0_298
-#undef F_0_390
-#undef F_0_541
-#undef F_0_765
-#undef F_0_899
-#undef F_1_175
-#undef F_1_501
-#undef F_1_847
-#undef F_1_961
-#undef F_2_053
-#undef F_2_562
-#undef F_3_072
#define XFIX_P_0_298 v0.h[0]
#define XFIX_N_0_390 v0.h[1]
#define XFIX_P_0_541 v0.h[2]
@@ -2304,7 +2391,7 @@ asm_function jsimd_fdct_islow_neon
TMP .req x9
/* Load constants */
- adr TMP, Ljsimd_fdct_islow_neon_consts
+ get_symbol_loc TMP, Ljsimd_fdct_islow_neon_consts
ld1 {v0.8h, v1.8h}, [TMP]
/* Save NEON registers */
@@ -2583,20 +2670,13 @@ asm_function jsimd_fdct_islow_neon
#define XFIX_0_707106781 v0.h[2]
#define XFIX_1_306562965 v0.h[3]
-.balign 16
-Ljsimd_fdct_ifast_neon_consts:
- .short (98 * 128) /* XFIX_0_382683433 */
- .short (139 * 128) /* XFIX_0_541196100 */
- .short (181 * 128) /* XFIX_0_707106781 */
- .short (334 * 128 - 256 * 128) /* XFIX_1_306562965 */
-
asm_function jsimd_fdct_ifast_neon
DATA .req x0
TMP .req x9
/* Load constants */
- adr TMP, Ljsimd_fdct_ifast_neon_consts
+ get_symbol_loc TMP, Ljsimd_fdct_ifast_neon_consts
ld1 {v0.4h}, [TMP]
/* Load all DATA into NEON registers with the following allocation:
@@ -2775,41 +2855,6 @@ asm_function jsimd_quantize_neon
* JSAMPARRAY input_data, JSAMPARRAY output_data);
*/
-.balign 16
-Ljsimd_h2_downsample_neon_consts:
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F /* diff 0 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0E /* diff 1 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0D, 0x0D /* diff 2 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0C, 0x0C, 0x0C /* diff 3 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B /* diff 4 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x09, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A /* diff 5 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09 /* diff 6 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 /* diff 7 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \
- 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07 /* diff 8 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x06, \
- 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06 /* diff 9 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x05, \
- 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05 /* diff 10 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x04, \
- 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04 /* diff 11 */
- .byte 0x00, 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, \
- 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 /* diff 12 */
- .byte 0x00, 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, \
- 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02 /* diff 13 */
- .byte 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, \
- 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 /* diff 14 */
- .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /* diff 15 */
-
asm_function jsimd_h2v1_downsample_neon
IMAGE_WIDTH .req x0
MAX_V_SAMP .req x1
@@ -2827,7 +2872,7 @@ asm_function jsimd_h2v1_downsample_neon
mov TMPDUP, #0x10000
lsl TMP2, BLOCK_WIDTH, #4
sub TMP2, TMP2, IMAGE_WIDTH
- adr TMP3, Ljsimd_h2_downsample_neon_consts
+ get_symbol_loc TMP3, Ljsimd_h2_downsample_neon_consts
add TMP3, TMP3, TMP2, lsl #4
dup v16.4s, TMPDUP
ld1 {v18.16b}, [TMP3]
@@ -2906,7 +2951,7 @@ asm_function jsimd_h2v2_downsample_neon
lsl TMP2, BLOCK_WIDTH, #4
lsl TMPDUP, TMPDUP, #17
sub TMP2, TMP2, IMAGE_WIDTH
- adr TMP3, Ljsimd_h2_downsample_neon_consts
+ get_symbol_loc TMP3, Ljsimd_h2_downsample_neon_consts
orr TMPDUP, TMPDUP, #1
add TMP3, TMP3, TMP2, lsl #4
dup v16.4s, TMPDUP
@@ -3012,41 +3057,6 @@ asm_function jsimd_h2v2_downsample_neon
.macro generate_jsimd_huff_encode_one_block fast_tbl
-.balign 16
-.if \fast_tbl == 1
-Ljsimd_huff_encode_one_block_neon_consts:
-.else
-Ljsimd_huff_encode_one_block_neon_slowtbl_consts:
-.endif
- .byte 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, \
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80
-.if \fast_tbl == 1
- .byte 0, 1, 2, 3, 16, 17, 32, 33, \
- 18, 19, 4, 5, 6, 7, 20, 21 /* L0 => L3 : 4 lines OK */
- .byte 34, 35, 48, 49, 255, 255, 50, 51, \
- 36, 37, 22, 23, 8, 9, 10, 11 /* L0 => L3 : 4 lines OK */
- .byte 8, 9, 22, 23, 36, 37, 50, 51, \
- 255, 255, 255, 255, 255, 255, 52, 53 /* L1 => L4 : 4 lines OK */
- .byte 54, 55, 40, 41, 26, 27, 12, 13, \
- 14, 15, 28, 29, 42, 43, 56, 57 /* L0 => L3 : 4 lines OK */
- .byte 6, 7, 20, 21, 34, 35, 48, 49, \
- 50, 51, 36, 37, 22, 23, 8, 9 /* L4 => L7 : 4 lines OK */
- .byte 42, 43, 28, 29, 14, 15, 30, 31, \
- 44, 45, 58, 59, 255, 255, 255, 255 /* L1 => L4 : 4 lines OK */
- .byte 255, 255, 255, 255, 56, 57, 42, 43, \
- 28, 29, 14, 15, 30, 31, 44, 45 /* L3 => L6 : 4 lines OK */
- .byte 26, 27, 40, 41, 42, 43, 28, 29, \
- 14, 15, 30, 31, 44, 45, 46, 47 /* L5 => L7 : 3 lines OK */
- .byte 255, 255, 255, 255, 0, 1, 255, 255, \
- 255, 255, 255, 255, 255, 255, 255, 255 /* L4 : 1 lines OK */
- .byte 255, 255, 255, 255, 255, 255, 255, 255, \
- 0, 1, 16, 17, 2, 3, 255, 255 /* L5 => L6 : 2 lines OK */
- .byte 255, 255, 255, 255, 255, 255, 255, 255, \
- 255, 255, 255, 255, 8, 9, 22, 23 /* L5 => L6 : 2 lines OK */
- .byte 4, 5, 6, 7, 255, 255, 255, 255, \
- 255, 255, 255, 255, 255, 255, 255, 255 /* L7 : 1 line OK */
-.endif
-
.if \fast_tbl == 1
asm_function jsimd_huff_encode_one_block_neon
.else
@@ -3056,11 +3066,7 @@ asm_function jsimd_huff_encode_one_block_neon_slowtbl
sub BUFFER, BUFFER, #0x1 /* BUFFER=buffer-- */
/* Save ARM registers */
stp x19, x20, [sp]
-.if \fast_tbl == 1
- adr x15, Ljsimd_huff_encode_one_block_neon_consts
-.else
- adr x15, Ljsimd_huff_encode_one_block_neon_slowtbl_consts
-.endif
+ get_symbol_loc x15, Ljsimd_huff_encode_one_block_neon_consts
ldr PUT_BUFFER, [x0, #0x10]
ldr PUT_BITSw, [x0, #0x18]
ldrsh w12, [x2] /* load DC coeff in w12 */
diff --git a/jpegturbo/simd/i386/jccolext-avx2.asm b/jpegturbo/simd/i386/jccolext-avx2.asm
index 7a8d784a..c46d6844 100644
--- a/jpegturbo/simd/i386/jccolext-avx2.asm
+++ b/jpegturbo/simd/i386/jccolext-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -110,12 +108,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
- movzx eax, BYTE [esi+ecx]
+ movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
- movzx edx, WORD [esi+ecx]
+ movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
diff --git a/jpegturbo/simd/i386/jccolext-mmx.asm b/jpegturbo/simd/i386/jccolext-mmx.asm
index 9a2c30e2..6357a42b 100644
--- a/jpegturbo/simd/i386/jccolext-mmx.asm
+++ b/jpegturbo/simd/i386/jccolext-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -111,13 +109,13 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
xor eax, eax
- mov al, BYTE [esi+ecx]
+ mov al, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
xor edx, edx
- mov dx, WORD [esi+ecx]
+ mov dx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
@@ -127,7 +125,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
test cl, SIZEOF_DWORD
jz short .column_ld8
sub ecx, byte SIZEOF_DWORD
- movd mmG, DWORD [esi+ecx]
+ movd mmG, dword [esi+ecx]
psllq mmA, DWORD_BIT
por mmA, mmG
.column_ld8:
@@ -197,7 +195,7 @@ EXTN(jsimd_rgb_ycc_convert_mmx):
test cl, SIZEOF_MMWORD/8
jz short .column_ld2
sub ecx, byte SIZEOF_MMWORD/8
- movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+ movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
.column_ld2:
test cl, SIZEOF_MMWORD/4
jz short .column_ld4
diff --git a/jpegturbo/simd/i386/jccolext-sse2.asm b/jpegturbo/simd/i386/jccolext-sse2.asm
index e830562c..c6c80852 100644
--- a/jpegturbo/simd/i386/jccolext-sse2.asm
+++ b/jpegturbo/simd/i386/jccolext-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -109,12 +107,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
- movzx eax, BYTE [esi+ecx]
+ movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
- movzx edx, WORD [esi+ecx]
+ movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
diff --git a/jpegturbo/simd/i386/jccolor-avx2.asm b/jpegturbo/simd/i386/jccolor-avx2.asm
index 958517f3..14944e95 100644
--- a/jpegturbo/simd/i386/jccolor-avx2.asm
+++ b/jpegturbo/simd/i386/jccolor-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jccolor-mmx.asm b/jpegturbo/simd/i386/jccolor-mmx.asm
index 47be9e1e..8cb399bd 100644
--- a/jpegturbo/simd/i386/jccolor-mmx.asm
+++ b/jpegturbo/simd/i386/jccolor-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jccolor-sse2.asm b/jpegturbo/simd/i386/jccolor-sse2.asm
index c0d5d45e..686d222f 100644
--- a/jpegturbo/simd/i386/jccolor-sse2.asm
+++ b/jpegturbo/simd/i386/jccolor-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jcgray-avx2.asm b/jpegturbo/simd/i386/jcgray-avx2.asm
index 4d66242c..560ee0c7 100644
--- a/jpegturbo/simd/i386/jcgray-avx2.asm
+++ b/jpegturbo/simd/i386/jcgray-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jcgray-mmx.asm b/jpegturbo/simd/i386/jcgray-mmx.asm
index 07c7ea6c..79fdf082 100644
--- a/jpegturbo/simd/i386/jcgray-mmx.asm
+++ b/jpegturbo/simd/i386/jcgray-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jcgray-sse2.asm b/jpegturbo/simd/i386/jcgray-sse2.asm
index 4b8c7971..cb4b28e8 100644
--- a/jpegturbo/simd/i386/jcgray-sse2.asm
+++ b/jpegturbo/simd/i386/jcgray-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jcgryext-avx2.asm b/jpegturbo/simd/i386/jcgryext-avx2.asm
index 52e99a84..3fa7973d 100644
--- a/jpegturbo/simd/i386/jcgryext-avx2.asm
+++ b/jpegturbo/simd/i386/jcgryext-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -102,12 +100,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
- movzx eax, BYTE [esi+ecx]
+ movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
- movzx edx, WORD [esi+ecx]
+ movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
diff --git a/jpegturbo/simd/i386/jcgryext-mmx.asm b/jpegturbo/simd/i386/jcgryext-mmx.asm
index 4a9ab0da..8af42e5a 100644
--- a/jpegturbo/simd/i386/jcgryext-mmx.asm
+++ b/jpegturbo/simd/i386/jcgryext-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -103,13 +101,13 @@ EXTN(jsimd_rgb_gray_convert_mmx):
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
xor eax, eax
- mov al, BYTE [esi+ecx]
+ mov al, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
xor edx, edx
- mov dx, WORD [esi+ecx]
+ mov dx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
@@ -119,7 +117,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
test cl, SIZEOF_DWORD
jz short .column_ld8
sub ecx, byte SIZEOF_DWORD
- movd mmG, DWORD [esi+ecx]
+ movd mmG, dword [esi+ecx]
psllq mmA, DWORD_BIT
por mmA, mmG
.column_ld8:
@@ -189,7 +187,7 @@ EXTN(jsimd_rgb_gray_convert_mmx):
test cl, SIZEOF_MMWORD/8
jz short .column_ld2
sub ecx, byte SIZEOF_MMWORD/8
- movd mmA, DWORD [esi+ecx*RGB_PIXELSIZE]
+ movd mmA, dword [esi+ecx*RGB_PIXELSIZE]
.column_ld2:
test cl, SIZEOF_MMWORD/4
jz short .column_ld4
diff --git a/jpegturbo/simd/i386/jcgryext-sse2.asm b/jpegturbo/simd/i386/jcgryext-sse2.asm
index 04d891cd..c9d6ff1e 100644
--- a/jpegturbo/simd/i386/jcgryext-sse2.asm
+++ b/jpegturbo/simd/i386/jcgryext-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -101,12 +99,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub ecx, byte SIZEOF_BYTE
- movzx eax, BYTE [esi+ecx]
+ movzx eax, byte [esi+ecx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub ecx, byte SIZEOF_WORD
- movzx edx, WORD [esi+ecx]
+ movzx edx, word [esi+ecx]
shl eax, WORD_BIT
or eax, edx
.column_ld4:
diff --git a/jpegturbo/simd/i386/jchuff-sse2.asm b/jpegturbo/simd/i386/jchuff-sse2.asm
index 6ea69f6e..79f0ca52 100644
--- a/jpegturbo/simd/i386/jchuff-sse2.asm
+++ b/jpegturbo/simd/i386/jchuff-sse2.asm
@@ -17,8 +17,6 @@
; This file contains an SSE2 implementation for Huffman coding of one block.
; The following code is based directly on jchuff.c; see jchuff.c for more
; details.
-;
-; [TAB8]
%include "jsimdext.inc"
@@ -197,8 +195,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
push ebp
mov esi, POINTER [eax+8] ; (working_state *state)
- mov put_buffer, DWORD [esi+8] ; put_buffer = state->cur.put_buffer;
- mov put_bits, DWORD [esi+12] ; put_bits = state->cur.put_bits;
+ mov put_buffer, dword [esi+8] ; put_buffer = state->cur.put_buffer;
+ mov put_bits, dword [esi+12] ; put_bits = state->cur.put_bits;
push esi ; esi is now scratch
get_GOT edx ; get GOT address
@@ -214,7 +212,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
; Encode the DC coefficient difference per section F.1.2.1
mov esi, POINTER [esp+block] ; block
movsx ecx, word [esi] ; temp = temp2 = block[0] - last_dc_val;
- sub ecx, DWORD [eax+20]
+ sub ecx, dword [eax+20]
mov esi, ecx
; This is a well-known technique for obtaining the absolute value
@@ -229,12 +227,12 @@ EXTN(jsimd_huff_encode_one_block_sse2):
; For a negative input, want temp2 = bitwise complement of abs(input)
; This code assumes we are on a two's complement machine
add esi, edx ; temp2 += temp3;
- mov DWORD [esp+temp], esi ; backup temp2 in temp
+ mov dword [esp+temp], esi ; backup temp2 in temp
; Find the number of bits needed for the magnitude of the coefficient
movpic ebp, POINTER [esp+gotptr] ; load GOT address (ebp)
movzx edx, byte [GOTOFF(ebp, jpeg_nbits_table + ecx)] ; nbits = JPEG_NBITS(temp);
- mov DWORD [esp+temp2], edx ; backup nbits in temp2
+ mov dword [esp+temp2], edx ; backup nbits in temp2
; Emit the Huffman-coded symbol for the number of bits
mov ebp, POINTER [eax+24] ; After this point, arguments are not accessible anymore
@@ -242,13 +240,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
movzx ecx, byte [ebp + edx + 1024] ; size = dctbl->ehufsi[nbits];
EMIT_BITS eax ; EMIT_BITS(code, size)
- mov ecx, DWORD [esp+temp2] ; restore nbits
+ mov ecx, dword [esp+temp2] ; restore nbits
; Mask off any extra bits in code
mov eax, 1
shl eax, cl
dec eax
- and eax, DWORD [esp+temp] ; temp2 &= (((JLONG)1)<>= r;
- mov DWORD [esp+temp3], edx
+ mov dword [esp+temp3], edx
.BRLOOP:
cmp ecx, 16 ; while (r > 15) {
jl near .ERLOOP
sub ecx, 16 ; r -= 16;
- mov DWORD [esp+temp], ecx
+ mov dword [esp+temp], ecx
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
- mov ecx, DWORD [esp+temp]
+ mov ecx, dword [esp+temp]
jmp .BRLOOP
.ERLOOP:
movsx eax, word [esi] ; temp = t1[k];
movpic edx, POINTER [esp+gotptr] ; load GOT address (edx)
movzx eax, byte [GOTOFF(edx, jpeg_nbits_table + eax)] ; nbits = JPEG_NBITS(temp);
- mov DWORD [esp+temp2], eax
+ mov dword [esp+temp2], eax
; Emit Huffman symbol for run length / number of bits
shl ecx, 4 ; temp3 = (r << 4) + nbits;
add ecx, eax
@@ -316,13 +314,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
; Mask off any extra bits in code
- mov ecx, DWORD [esp+temp2]
+ mov ecx, dword [esp+temp2]
mov eax, 1
shl eax, cl
dec eax
and eax, edx ; temp2 &= (((JLONG)1)<>= 1;
@@ -352,29 +350,29 @@ EXTN(jsimd_huff_encode_one_block_sse2):
shr edx, cl ; index >>= r;
add ecx, eax
lea esi, [esi+ecx*2] ; k += r;
- mov DWORD [esp+temp3], edx
+ mov dword [esp+temp3], edx
jmp .BRLOOP2
.BLOOP2:
bsf ecx, edx ; r = __builtin_ctzl(index);
jz near .ELOOP2
lea esi, [esi+ecx*2] ; k += r;
shr edx, cl ; index >>= r;
- mov DWORD [esp+temp3], edx
+ mov dword [esp+temp3], edx
.BRLOOP2:
cmp ecx, 16 ; while (r > 15) {
jl near .ERLOOP2
sub ecx, 16 ; r -= 16;
- mov DWORD [esp+temp], ecx
+ mov dword [esp+temp], ecx
mov eax, INT [ebp + 240 * 4] ; code_0xf0 = actbl->ehufco[0xf0];
movzx ecx, byte [ebp + 1024 + 240] ; size_0xf0 = actbl->ehufsi[0xf0];
EMIT_BITS eax ; EMIT_BITS(code_0xf0, size_0xf0)
- mov ecx, DWORD [esp+temp]
+ mov ecx, dword [esp+temp]
jmp .BRLOOP2
.ERLOOP2:
movsx eax, word [esi] ; temp = t1[k];
bsr eax, eax ; nbits = 32 - __builtin_clz(temp);
inc eax
- mov DWORD [esp+temp2], eax
+ mov dword [esp+temp2], eax
; Emit Huffman symbol for run length / number of bits
shl ecx, 4 ; temp3 = (r << 4) + nbits;
add ecx, eax
@@ -384,13 +382,13 @@ EXTN(jsimd_huff_encode_one_block_sse2):
movsx edx, word [esi+DCTSIZE2*2] ; temp2 = t2[k];
; Mask off any extra bits in code
- mov ecx, DWORD [esp+temp2]
+ mov ecx, dword [esp+temp2]
mov eax, 1
shl eax, cl
dec eax
and eax, edx ; temp2 &= (((JLONG)1)<>= 1;
@@ -407,8 +405,8 @@ EXTN(jsimd_huff_encode_one_block_sse2):
mov eax, [esp+buffer]
pop esi
; Save put_buffer & put_bits
- mov DWORD [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
- mov DWORD [esi+12], put_bits ; state->cur.put_bits = put_bits;
+ mov dword [esi+8], put_buffer ; state->cur.put_buffer = put_buffer;
+ mov dword [esi+12], put_bits ; state->cur.put_bits = put_bits;
pop ebp
pop edi
diff --git a/jpegturbo/simd/i386/jcphuff-sse2.asm b/jpegturbo/simd/i386/jcphuff-sse2.asm
index e35a7d8b..8b731783 100644
--- a/jpegturbo/simd/i386/jcphuff-sse2.asm
+++ b/jpegturbo/simd/i386/jcphuff-sse2.asm
@@ -15,8 +15,6 @@
;
; This file contains an SSE2 implementation of data preparation for progressive
; Huffman encoding. See jcphuff.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jcsample-avx2.asm b/jpegturbo/simd/i386/jcsample-avx2.asm
index 5bcdefd5..0a20802d 100644
--- a/jpegturbo/simd/i386/jcsample-avx2.asm
+++ b/jpegturbo/simd/i386/jcsample-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jcsample-mmx.asm b/jpegturbo/simd/i386/jcsample-mmx.asm
index faf42345..2c223eeb 100644
--- a/jpegturbo/simd/i386/jcsample-mmx.asm
+++ b/jpegturbo/simd/i386/jcsample-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jcsample-sse2.asm b/jpegturbo/simd/i386/jcsample-sse2.asm
index b10fa836..4fea60d2 100644
--- a/jpegturbo/simd/i386/jcsample-sse2.asm
+++ b/jpegturbo/simd/i386/jcsample-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdcolext-avx2.asm b/jpegturbo/simd/i386/jdcolext-avx2.asm
index 46de9b9d..015be041 100644
--- a/jpegturbo/simd/i386/jdcolext-avx2.asm
+++ b/jpegturbo/simd/i386/jdcolext-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -348,7 +346,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
vmovd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [edi], ax
+ mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -357,7 +355,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
; space.
test ecx, ecx
jz short .nextrow
- mov BYTE [edi], al
+ mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/i386/jdcolext-mmx.asm b/jpegturbo/simd/i386/jdcolext-mmx.asm
index cd2cb3f9..5813cfcb 100644
--- a/jpegturbo/simd/i386/jdcolext-mmx.asm
+++ b/jpegturbo/simd/i386/jdcolext-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -280,7 +278,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
movd eax, mmA
cmp ecx, byte SIZEOF_DWORD
jb short .column_st2
- mov DWORD [edi+0*SIZEOF_DWORD], eax
+ mov dword [edi+0*SIZEOF_DWORD], eax
psrlq mmA, DWORD_BIT
movd eax, mmA
sub ecx, byte SIZEOF_DWORD
@@ -288,14 +286,14 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
.column_st2:
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [edi+0*SIZEOF_WORD], ax
+ mov word [edi+0*SIZEOF_WORD], ax
shr eax, WORD_BIT
sub ecx, byte SIZEOF_WORD
add edi, byte SIZEOF_WORD
.column_st1:
cmp ecx, byte SIZEOF_BYTE
jb short .nextrow
- mov BYTE [edi+0*SIZEOF_BYTE], al
+ mov byte [edi+0*SIZEOF_BYTE], al
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -367,7 +365,7 @@ EXTN(jsimd_ycc_rgb_convert_mmx):
.column_st4:
cmp ecx, byte SIZEOF_MMWORD/8
jb short .nextrow
- movd DWORD [edi+0*SIZEOF_DWORD], mmA
+ movd dword [edi+0*SIZEOF_DWORD], mmA
%endif ; RGB_PIXELSIZE ; ---------------
diff --git a/jpegturbo/simd/i386/jdcolext-sse2.asm b/jpegturbo/simd/i386/jdcolext-sse2.asm
index 0fcb0067..d5572b32 100644
--- a/jpegturbo/simd/i386/jdcolext-sse2.asm
+++ b/jpegturbo/simd/i386/jdcolext-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -320,7 +318,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [edi], ax
+ mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -329,7 +327,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
; space.
test ecx, ecx
jz short .nextrow
- mov BYTE [edi], al
+ mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/i386/jdcolor-avx2.asm b/jpegturbo/simd/i386/jdcolor-avx2.asm
index d2f86e65..e05b60d0 100644
--- a/jpegturbo/simd/i386/jdcolor-avx2.asm
+++ b/jpegturbo/simd/i386/jdcolor-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdcolor-mmx.asm b/jpegturbo/simd/i386/jdcolor-mmx.asm
index 8f5a3b3f..fb7e7bcc 100644
--- a/jpegturbo/simd/i386/jdcolor-mmx.asm
+++ b/jpegturbo/simd/i386/jdcolor-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdcolor-sse2.asm b/jpegturbo/simd/i386/jdcolor-sse2.asm
index ae553dba..b7362553 100644
--- a/jpegturbo/simd/i386/jdcolor-sse2.asm
+++ b/jpegturbo/simd/i386/jdcolor-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdmerge-avx2.asm b/jpegturbo/simd/i386/jdmerge-avx2.asm
index 17318440..711e6792 100644
--- a/jpegturbo/simd/i386/jdmerge-avx2.asm
+++ b/jpegturbo/simd/i386/jdmerge-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdmerge-mmx.asm b/jpegturbo/simd/i386/jdmerge-mmx.asm
index 607bf39f..6e8311d4 100644
--- a/jpegturbo/simd/i386/jdmerge-mmx.asm
+++ b/jpegturbo/simd/i386/jdmerge-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdmerge-sse2.asm b/jpegturbo/simd/i386/jdmerge-sse2.asm
index ddb1d5e7..e32f90aa 100644
--- a/jpegturbo/simd/i386/jdmerge-sse2.asm
+++ b/jpegturbo/simd/i386/jdmerge-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdmrgext-avx2.asm b/jpegturbo/simd/i386/jdmrgext-avx2.asm
index cde48653..e35f7282 100644
--- a/jpegturbo/simd/i386/jdmrgext-avx2.asm
+++ b/jpegturbo/simd/i386/jdmrgext-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -354,7 +352,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
vmovd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [edi], ax
+ mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -363,7 +361,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
; space.
test ecx, ecx
jz short .endcolumn
- mov BYTE [edi], al
+ mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/i386/jdmrgext-mmx.asm b/jpegturbo/simd/i386/jdmrgext-mmx.asm
index 4b9e35d8..eb3e36b4 100644
--- a/jpegturbo/simd/i386/jdmrgext-mmx.asm
+++ b/jpegturbo/simd/i386/jdmrgext-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -283,7 +281,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
movd eax, mmA
cmp ecx, byte SIZEOF_DWORD
jb short .column_st2
- mov DWORD [edi+0*SIZEOF_DWORD], eax
+ mov dword [edi+0*SIZEOF_DWORD], eax
psrlq mmA, DWORD_BIT
movd eax, mmA
sub ecx, byte SIZEOF_DWORD
@@ -291,14 +289,14 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
.column_st2:
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [edi+0*SIZEOF_WORD], ax
+ mov word [edi+0*SIZEOF_WORD], ax
shr eax, WORD_BIT
sub ecx, byte SIZEOF_WORD
add edi, byte SIZEOF_WORD
.column_st1:
cmp ecx, byte SIZEOF_BYTE
jb short .endcolumn
- mov BYTE [edi+0*SIZEOF_BYTE], al
+ mov byte [edi+0*SIZEOF_BYTE], al
%else ; RGB_PIXELSIZE == 4 ; -----------
@@ -373,7 +371,7 @@ EXTN(jsimd_h2v1_merged_upsample_mmx):
.column_st4:
cmp ecx, byte SIZEOF_MMWORD/8
jb short .endcolumn
- movd DWORD [edi+0*SIZEOF_DWORD], mmA
+ movd dword [edi+0*SIZEOF_DWORD], mmA
%endif ; RGB_PIXELSIZE ; ---------------
diff --git a/jpegturbo/simd/i386/jdmrgext-sse2.asm b/jpegturbo/simd/i386/jdmrgext-sse2.asm
index ac4697ef..c113dc4d 100644
--- a/jpegturbo/simd/i386/jdmrgext-sse2.asm
+++ b/jpegturbo/simd/i386/jdmrgext-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -325,7 +323,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movd eax, xmmA
cmp ecx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [edi], ax
+ mov word [edi], ax
add edi, byte SIZEOF_WORD
sub ecx, byte SIZEOF_WORD
shr eax, 16
@@ -334,7 +332,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
; space.
test ecx, ecx
jz short .endcolumn
- mov BYTE [edi], al
+ mov byte [edi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/i386/jdsample-avx2.asm b/jpegturbo/simd/i386/jdsample-avx2.asm
index 61ce511c..a800c35e 100644
--- a/jpegturbo/simd/i386/jdsample-avx2.asm
+++ b/jpegturbo/simd/i386/jdsample-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdsample-mmx.asm b/jpegturbo/simd/i386/jdsample-mmx.asm
index 1f810fab..12c49f0e 100644
--- a/jpegturbo/simd/i386/jdsample-mmx.asm
+++ b/jpegturbo/simd/i386/jdsample-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jdsample-sse2.asm b/jpegturbo/simd/i386/jdsample-sse2.asm
index f0da6269..4e28d2f4 100644
--- a/jpegturbo/simd/i386/jdsample-sse2.asm
+++ b/jpegturbo/simd/i386/jdsample-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/i386/jfdctflt-3dn.asm b/jpegturbo/simd/i386/jfdctflt-3dn.asm
index 1d45865b..322ab163 100644
--- a/jpegturbo/simd/i386/jfdctflt-3dn.asm
+++ b/jpegturbo/simd/i386/jfdctflt-3dn.asm
@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jfdctflt-sse.asm b/jpegturbo/simd/i386/jfdctflt-sse.asm
index 1faf8352..86952c64 100644
--- a/jpegturbo/simd/i386/jfdctflt-sse.asm
+++ b/jpegturbo/simd/i386/jfdctflt-sse.asm
@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jfdctfst-mmx.asm b/jpegturbo/simd/i386/jfdctfst-mmx.asm
index 0271901c..80645a50 100644
--- a/jpegturbo/simd/i386/jfdctfst-mmx.asm
+++ b/jpegturbo/simd/i386/jfdctfst-mmx.asm
@@ -18,8 +18,6 @@
; the forward DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
; for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jfdctfst-sse2.asm b/jpegturbo/simd/i386/jfdctfst-sse2.asm
index f09dadd9..446fa7a6 100644
--- a/jpegturbo/simd/i386/jfdctfst-sse2.asm
+++ b/jpegturbo/simd/i386/jfdctfst-sse2.asm
@@ -18,8 +18,6 @@
; the forward DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
; for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jfdctint-avx2.asm b/jpegturbo/simd/i386/jfdctint-avx2.asm
index ae258eee..97de2302 100644
--- a/jpegturbo/simd/i386/jfdctint-avx2.asm
+++ b/jpegturbo/simd/i386/jfdctint-avx2.asm
@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jfdctint-mmx.asm b/jpegturbo/simd/i386/jfdctint-mmx.asm
index c6bd9597..3ade9d49 100644
--- a/jpegturbo/simd/i386/jfdctint-mmx.asm
+++ b/jpegturbo/simd/i386/jfdctint-mmx.asm
@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jfdctint-sse2.asm b/jpegturbo/simd/i386/jfdctint-sse2.asm
index d67dcc1b..71b684c4 100644
--- a/jpegturbo/simd/i386/jfdctint-sse2.asm
+++ b/jpegturbo/simd/i386/jfdctint-sse2.asm
@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jidctflt-3dn.asm b/jpegturbo/simd/i386/jidctflt-3dn.asm
index 73aa18d0..87951910 100644
--- a/jpegturbo/simd/i386/jidctflt-3dn.asm
+++ b/jpegturbo/simd/i386/jidctflt-3dn.asm
@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -92,23 +90,23 @@ EXTN(jsimd_idct_float_3dnow):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_3DNOW
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
pushpic ebx ; save GOT address
- mov ebx, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
- mov eax, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
- or ebx, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
- or ebx, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ mov ebx, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+ or ebx, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+ or ebx, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
or eax, ebx
poppic ebx ; restore GOT address
jnz short .columnDCT
; -- AC terms all zero
- movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
punpcklwd mm0, mm0
psrad mm0, (DWORD_BIT-WORD_BIT)
@@ -135,10 +133,10 @@ EXTN(jsimd_idct_float_3dnow):
; -- Even part
- movd mm0, DWORD [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
- movd mm1, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
- movd mm2, DWORD [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
- movd mm3, DWORD [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
+ movd mm0, dword [DWBLOCK(0,0,esi,SIZEOF_JCOEF)]
+ movd mm1, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ movd mm2, dword [DWBLOCK(4,0,esi,SIZEOF_JCOEF)]
+ movd mm3, dword [DWBLOCK(6,0,esi,SIZEOF_JCOEF)]
punpcklwd mm0, mm0
punpcklwd mm1, mm1
@@ -182,10 +180,10 @@ EXTN(jsimd_idct_float_3dnow):
; -- Odd part
- movd mm2, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- movd mm3, DWORD [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
- movd mm5, DWORD [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
- movd mm1, DWORD [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
+ movd mm2, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ movd mm3, dword [DWBLOCK(3,0,esi,SIZEOF_JCOEF)]
+ movd mm5, dword [DWBLOCK(5,0,esi,SIZEOF_JCOEF)]
+ movd mm1, dword [DWBLOCK(7,0,esi,SIZEOF_JCOEF)]
punpcklwd mm2, mm2
punpcklwd mm3, mm3
diff --git a/jpegturbo/simd/i386/jidctflt-sse.asm b/jpegturbo/simd/i386/jidctflt-sse.asm
index 386650f0..b27ecfdf 100644
--- a/jpegturbo/simd/i386/jidctflt-sse.asm
+++ b/jpegturbo/simd/i386/jidctflt-sse.asm
@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/i386/jidctflt-sse2.asm b/jpegturbo/simd/i386/jidctflt-sse2.asm
index 9de71392..c646eaef 100644
--- a/jpegturbo/simd/i386/jidctflt-sse2.asm
+++ b/jpegturbo/simd/i386/jidctflt-sse2.asm
@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -102,8 +100,8 @@ EXTN(jsimd_idct_float_sse2):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/i386/jidctfst-mmx.asm b/jpegturbo/simd/i386/jidctfst-mmx.asm
index d3e8a5d6..24622d43 100644
--- a/jpegturbo/simd/i386/jidctfst-mmx.asm
+++ b/jpegturbo/simd/i386/jidctfst-mmx.asm
@@ -18,8 +18,6 @@
; the inverse DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
; for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -123,8 +121,8 @@ EXTN(jsimd_idct_ifast_mmx):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_IFAST_MMX
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/i386/jidctfst-sse2.asm b/jpegturbo/simd/i386/jidctfst-sse2.asm
index 83bc414a..19704ffa 100644
--- a/jpegturbo/simd/i386/jidctfst-sse2.asm
+++ b/jpegturbo/simd/i386/jidctfst-sse2.asm
@@ -18,8 +18,6 @@
; the inverse DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
; for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -118,8 +116,8 @@ EXTN(jsimd_idct_ifast_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/i386/jidctint-avx2.asm b/jpegturbo/simd/i386/jidctint-avx2.asm
index b3b7b14f..c371985c 100644
--- a/jpegturbo/simd/i386/jidctint-avx2.asm
+++ b/jpegturbo/simd/i386/jidctint-avx2.asm
@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -320,8 +318,8 @@ EXTN(jsimd_idct_islow_avx2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/i386/jidctint-mmx.asm b/jpegturbo/simd/i386/jidctint-mmx.asm
index 6ca6d060..4f07f567 100644
--- a/jpegturbo/simd/i386/jidctint-mmx.asm
+++ b/jpegturbo/simd/i386/jidctint-mmx.asm
@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -136,8 +134,8 @@ EXTN(jsimd_idct_islow_mmx):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_MMX
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/i386/jidctint-sse2.asm b/jpegturbo/simd/i386/jidctint-sse2.asm
index a6bd00a9..e442fdd2 100644
--- a/jpegturbo/simd/i386/jidctint-sse2.asm
+++ b/jpegturbo/simd/i386/jidctint-sse2.asm
@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -131,8 +129,8 @@ EXTN(jsimd_idct_islow_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/i386/jidctred-mmx.asm b/jpegturbo/simd/i386/jidctred-mmx.asm
index 336ee3b9..e2307e1c 100644
--- a/jpegturbo/simd/i386/jidctred-mmx.asm
+++ b/jpegturbo/simd/i386/jidctred-mmx.asm
@@ -18,8 +18,6 @@
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
; The following code is based directly on the IJG's original jidctred.c;
; see the jidctred.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -144,8 +142,8 @@ EXTN(jsimd_idct_4x4_mmx):
alignx 16, 7
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_4X4_MMX
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movq mm0, MMWORD [MMBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -464,16 +462,16 @@ EXTN(jsimd_idct_4x4_mmx):
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+2*SIZEOF_JSAMPROW]
- movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
- movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+ movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
+ movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
psrlq mm1, 4*BYTE_BIT
psrlq mm0, 4*BYTE_BIT
mov edx, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+3*SIZEOF_JSAMPROW]
- movd DWORD [edx+eax*SIZEOF_JSAMPLE], mm1
- movd DWORD [esi+eax*SIZEOF_JSAMPLE], mm0
+ movd dword [edx+eax*SIZEOF_JSAMPLE], mm1
+ movd dword [esi+eax*SIZEOF_JSAMPLE], mm0
emms ; empty MMX state
@@ -688,8 +686,8 @@ EXTN(jsimd_idct_2x2_mmx):
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
- mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
- mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
+ mov word [edx+eax*SIZEOF_JSAMPLE], bx
+ mov word [esi+eax*SIZEOF_JSAMPLE], cx
emms ; empty MMX state
diff --git a/jpegturbo/simd/i386/jidctred-sse2.asm b/jpegturbo/simd/i386/jidctred-sse2.asm
index 97838baf..6e56494e 100644
--- a/jpegturbo/simd/i386/jidctred-sse2.asm
+++ b/jpegturbo/simd/i386/jidctred-sse2.asm
@@ -18,8 +18,6 @@
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
; The following code is based directly on the IJG's original jidctred.c;
; see the jidctred.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -139,8 +137,8 @@ EXTN(jsimd_idct_4x4_sse2):
mov esi, JCOEFPTR [coef_block(eax)] ; inptr
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
- mov eax, DWORD [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,esi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,esi,SIZEOF_JCOEF)]
jnz short .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,esi,SIZEOF_JCOEF)]
@@ -578,8 +576,8 @@ EXTN(jsimd_idct_2x2_sse2):
mov edx, JSAMPROW [edi+0*SIZEOF_JSAMPROW]
mov esi, JSAMPROW [edi+1*SIZEOF_JSAMPROW]
- mov WORD [edx+eax*SIZEOF_JSAMPLE], bx
- mov WORD [esi+eax*SIZEOF_JSAMPLE], cx
+ mov word [edx+eax*SIZEOF_JSAMPLE], bx
+ mov word [esi+eax*SIZEOF_JSAMPLE], cx
pop edi
pop esi
diff --git a/jpegturbo/simd/i386/jquant-3dn.asm b/jpegturbo/simd/i386/jquant-3dn.asm
index 1767f444..5cb60caa 100644
--- a/jpegturbo/simd/i386/jquant-3dn.asm
+++ b/jpegturbo/simd/i386/jquant-3dn.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jquant-mmx.asm b/jpegturbo/simd/i386/jquant-mmx.asm
index 98932db6..61305c62 100644
--- a/jpegturbo/simd/i386/jquant-mmx.asm
+++ b/jpegturbo/simd/i386/jquant-mmx.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jquant-sse.asm b/jpegturbo/simd/i386/jquant-sse.asm
index cc244c4b..218adc97 100644
--- a/jpegturbo/simd/i386/jquant-sse.asm
+++ b/jpegturbo/simd/i386/jquant-sse.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jquantf-sse2.asm b/jpegturbo/simd/i386/jquantf-sse2.asm
index 8d1201c0..a881ab50 100644
--- a/jpegturbo/simd/i386/jquantf-sse2.asm
+++ b/jpegturbo/simd/i386/jquantf-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jquanti-avx2.asm b/jpegturbo/simd/i386/jquanti-avx2.asm
index ea8e1a1e..5ed6bec2 100644
--- a/jpegturbo/simd/i386/jquanti-avx2.asm
+++ b/jpegturbo/simd/i386/jquanti-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jquanti-sse2.asm b/jpegturbo/simd/i386/jquanti-sse2.asm
index 2a69494b..0a509408 100644
--- a/jpegturbo/simd/i386/jquanti-sse2.asm
+++ b/jpegturbo/simd/i386/jquanti-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/i386/jsimdcpu.asm b/jpegturbo/simd/i386/jsimdcpu.asm
index 0af4eecf..ddcafa9e 100644
--- a/jpegturbo/simd/i386/jsimdcpu.asm
+++ b/jpegturbo/simd/i386/jsimdcpu.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/nasm/jcolsamp.inc b/jpegturbo/simd/nasm/jcolsamp.inc
index a2d5b494..6f6d7f29 100644
--- a/jpegturbo/simd/nasm/jcolsamp.inc
+++ b/jpegturbo/simd/nasm/jcolsamp.inc
@@ -7,8 +7,6 @@
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
-;
-; [TAB8]
; --------------------------------------------------------------------------
diff --git a/jpegturbo/simd/nasm/jdct.inc b/jpegturbo/simd/nasm/jdct.inc
index 79d51460..9192f66f 100644
--- a/jpegturbo/simd/nasm/jdct.inc
+++ b/jpegturbo/simd/nasm/jdct.inc
@@ -7,8 +7,6 @@
; Based on the x86 SIMD extension for IJG JPEG library
; Copyright (C) 1999-2006, MIYASAKA Masaru.
; For conditions of distribution and use, see copyright notice in jsimdext.inc
-;
-; [TAB8]
; Each IDCT routine is responsible for range-limiting its results and
; converting them to unsigned form (0..MAXJSAMPLE). The raw outputs could
diff --git a/jpegturbo/simd/nasm/jsimdext.inc b/jpegturbo/simd/nasm/jsimdext.inc
index b40901f0..9930d80c 100644
--- a/jpegturbo/simd/nasm/jsimdext.inc
+++ b/jpegturbo/simd/nasm/jsimdext.inc
@@ -2,7 +2,7 @@
; jsimdext.inc - common declarations
;
; Copyright 2009 Pierre Ossman for Cendio AB
-; Copyright (C) 2010, 2016, D. R. Commander.
+; Copyright (C) 2010, 2016, 2019, D. R. Commander.
; Copyright (C) 2018, Matthieu Darbois.
;
; Based on the x86 SIMD extension for IJG JPEG library - version 1.02
@@ -24,8 +24,6 @@
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
-;
-; [TAB8]
; ==========================================================================
; System-dependent configurations
@@ -167,19 +165,19 @@ section .note.GNU-stack noalloc noexec nowrite progbits
%define XMM_DWORD
%define XMM_MMWORD
-%define SIZEOF_BYTE 1 ; sizeof(BYTE)
-%define SIZEOF_WORD 2 ; sizeof(WORD)
-%define SIZEOF_DWORD 4 ; sizeof(DWORD)
-%define SIZEOF_QWORD 8 ; sizeof(QWORD)
-%define SIZEOF_OWORD 16 ; sizeof(OWORD)
-%define SIZEOF_YWORD 32 ; sizeof(YWORD)
+%define SIZEOF_BYTE 1 ; sizeof(byte)
+%define SIZEOF_WORD 2 ; sizeof(word)
+%define SIZEOF_DWORD 4 ; sizeof(dword)
+%define SIZEOF_QWORD 8 ; sizeof(qword)
+%define SIZEOF_OWORD 16 ; sizeof(oword)
+%define SIZEOF_YWORD 32 ; sizeof(yword)
%define BYTE_BIT 8 ; CHAR_BIT in C
-%define WORD_BIT 16 ; sizeof(WORD)*BYTE_BIT
-%define DWORD_BIT 32 ; sizeof(DWORD)*BYTE_BIT
-%define QWORD_BIT 64 ; sizeof(QWORD)*BYTE_BIT
-%define OWORD_BIT 128 ; sizeof(OWORD)*BYTE_BIT
-%define YWORD_BIT 256 ; sizeof(YWORD)*BYTE_BIT
+%define WORD_BIT 16 ; sizeof(word)*BYTE_BIT
+%define DWORD_BIT 32 ; sizeof(dword)*BYTE_BIT
+%define QWORD_BIT 64 ; sizeof(qword)*BYTE_BIT
+%define OWORD_BIT 128 ; sizeof(oword)*BYTE_BIT
+%define YWORD_BIT 256 ; sizeof(yword)*BYTE_BIT
; --------------------------------------------------------------------------
; External Symbol Name
@@ -198,6 +196,11 @@ section .note.GNU-stack noalloc noexec nowrite progbits
%ifdef __YASM_VER__
%define GLOBAL_FUNCTION(name) global EXTN(name):private_extern
%define GLOBAL_DATA(name) global EXTN(name):private_extern
+%else
+%if __NASM_VERSION_ID__ >= 0x020E0000
+%define GLOBAL_FUNCTION(name) global EXTN(name):private_extern
+%define GLOBAL_DATA(name) global EXTN(name):private_extern
+%endif
%endif
%endif
diff --git a/jpegturbo/simd/x86_64/jccolext-avx2.asm b/jpegturbo/simd/x86_64/jccolext-avx2.asm
index 5fa3848c..10d28348 100644
--- a/jpegturbo/simd/x86_64/jccolext-avx2.asm
+++ b/jpegturbo/simd/x86_64/jccolext-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -96,12 +94,12 @@ EXTN(jsimd_rgb_ycc_convert_avx2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub rcx, byte SIZEOF_BYTE
- movzx rax, BYTE [rsi+rcx]
+ movzx rax, byte [rsi+rcx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub rcx, byte SIZEOF_WORD
- movzx rdx, WORD [rsi+rcx]
+ movzx rdx, word [rsi+rcx]
shl rax, WORD_BIT
or rax, rdx
.column_ld4:
diff --git a/jpegturbo/simd/x86_64/jccolext-sse2.asm b/jpegturbo/simd/x86_64/jccolext-sse2.asm
index b1486c0b..2c914d31 100644
--- a/jpegturbo/simd/x86_64/jccolext-sse2.asm
+++ b/jpegturbo/simd/x86_64/jccolext-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -95,12 +93,12 @@ EXTN(jsimd_rgb_ycc_convert_sse2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub rcx, byte SIZEOF_BYTE
- movzx rax, BYTE [rsi+rcx]
+ movzx rax, byte [rsi+rcx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub rcx, byte SIZEOF_WORD
- movzx rdx, WORD [rsi+rcx]
+ movzx rdx, word [rsi+rcx]
shl rax, WORD_BIT
or rax, rdx
.column_ld4:
diff --git a/jpegturbo/simd/x86_64/jccolor-avx2.asm b/jpegturbo/simd/x86_64/jccolor-avx2.asm
index f9f4be06..16b78298 100644
--- a/jpegturbo/simd/x86_64/jccolor-avx2.asm
+++ b/jpegturbo/simd/x86_64/jccolor-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jccolor-sse2.asm b/jpegturbo/simd/x86_64/jccolor-sse2.asm
index 3e46601d..e2955c21 100644
--- a/jpegturbo/simd/x86_64/jccolor-sse2.asm
+++ b/jpegturbo/simd/x86_64/jccolor-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jcgray-avx2.asm b/jpegturbo/simd/x86_64/jcgray-avx2.asm
index 0ec24104..591255bb 100644
--- a/jpegturbo/simd/x86_64/jcgray-avx2.asm
+++ b/jpegturbo/simd/x86_64/jcgray-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jcgray-sse2.asm b/jpegturbo/simd/x86_64/jcgray-sse2.asm
index edf9222e..e389904f 100644
--- a/jpegturbo/simd/x86_64/jcgray-sse2.asm
+++ b/jpegturbo/simd/x86_64/jcgray-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jcgryext-avx2.asm b/jpegturbo/simd/x86_64/jcgryext-avx2.asm
index 79e2aa01..175b60de 100644
--- a/jpegturbo/simd/x86_64/jcgryext-avx2.asm
+++ b/jpegturbo/simd/x86_64/jcgryext-avx2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -88,12 +86,12 @@ EXTN(jsimd_rgb_gray_convert_avx2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub rcx, byte SIZEOF_BYTE
- movzx rax, BYTE [rsi+rcx]
+ movzx rax, byte [rsi+rcx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub rcx, byte SIZEOF_WORD
- movzx rdx, WORD [rsi+rcx]
+ movzx rdx, word [rsi+rcx]
shl rax, WORD_BIT
or rax, rdx
.column_ld4:
diff --git a/jpegturbo/simd/x86_64/jcgryext-sse2.asm b/jpegturbo/simd/x86_64/jcgryext-sse2.asm
index 9c3ae5ef..873be805 100644
--- a/jpegturbo/simd/x86_64/jcgryext-sse2.asm
+++ b/jpegturbo/simd/x86_64/jcgryext-sse2.asm
@@ -12,8 +12,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -87,12 +85,12 @@ EXTN(jsimd_rgb_gray_convert_sse2):
test cl, SIZEOF_BYTE
jz short .column_ld2
sub rcx, byte SIZEOF_BYTE
- movzx rax, BYTE [rsi+rcx]
+ movzx rax, byte [rsi+rcx]
.column_ld2:
test cl, SIZEOF_WORD
jz short .column_ld4
sub rcx, byte SIZEOF_WORD
- movzx rdx, WORD [rsi+rcx]
+ movzx rdx, word [rsi+rcx]
shl rax, WORD_BIT
or rax, rdx
.column_ld4:
diff --git a/jpegturbo/simd/x86_64/jchuff-sse2.asm b/jpegturbo/simd/x86_64/jchuff-sse2.asm
index 1b091ad1..aa78fd5c 100644
--- a/jpegturbo/simd/x86_64/jchuff-sse2.asm
+++ b/jpegturbo/simd/x86_64/jchuff-sse2.asm
@@ -17,8 +17,6 @@
; This file contains an SSE2 implementation for Huffman coding of one block.
; The following code is based directly on jchuff.c; see jchuff.c for more
; details.
-;
-; [TAB8]
%include "jsimdext.inc"
@@ -200,7 +198,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
mov buffer, r11 ; r11 is now sratch
mov put_buffer, MMWORD [r10+16] ; put_buffer = state->cur.put_buffer;
- mov put_bits, DWORD [r10+24] ; put_bits = state->cur.put_bits;
+ mov put_bits, dword [r10+24] ; put_bits = state->cur.put_bits;
push r10 ; r10 is now scratch
; Encode the DC coefficient difference per section F.1.2.1
@@ -333,7 +331,7 @@ EXTN(jsimd_huff_encode_one_block_sse2):
pop r10
; Save put_buffer & put_bits
mov MMWORD [r10+16], put_buffer ; state->cur.put_buffer = put_buffer;
- mov DWORD [r10+24], put_bits ; state->cur.put_bits = put_bits;
+ mov dword [r10+24], put_bits ; state->cur.put_bits = put_bits;
pop rbx
uncollect_args 6
diff --git a/jpegturbo/simd/x86_64/jcphuff-sse2.asm b/jpegturbo/simd/x86_64/jcphuff-sse2.asm
index a9446b7a..8ed44728 100644
--- a/jpegturbo/simd/x86_64/jcphuff-sse2.asm
+++ b/jpegturbo/simd/x86_64/jcphuff-sse2.asm
@@ -16,8 +16,6 @@
;
; This file contains an SSE2 implementation of data preparation for progressive
; Huffman encoding. See jcphuff.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jcsample-avx2.asm b/jpegturbo/simd/x86_64/jcsample-avx2.asm
index 9d5a8618..d9922bb4 100644
--- a/jpegturbo/simd/x86_64/jcsample-avx2.asm
+++ b/jpegturbo/simd/x86_64/jcsample-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jcsample-sse2.asm b/jpegturbo/simd/x86_64/jcsample-sse2.asm
index 1b315364..0f107e9a 100644
--- a/jpegturbo/simd/x86_64/jcsample-sse2.asm
+++ b/jpegturbo/simd/x86_64/jcsample-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jdcolext-avx2.asm b/jpegturbo/simd/x86_64/jdcolext-avx2.asm
index e2b96c73..677b8ed8 100644
--- a/jpegturbo/simd/x86_64/jdcolext-avx2.asm
+++ b/jpegturbo/simd/x86_64/jdcolext-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -334,7 +332,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
vmovd eax, xmmA
cmp rcx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [rdi], ax
+ mov word [rdi], ax
add rdi, byte SIZEOF_WORD
sub rcx, byte SIZEOF_WORD
shr rax, 16
@@ -343,7 +341,7 @@ EXTN(jsimd_ycc_rgb_convert_avx2):
; space.
test rcx, rcx
jz short .nextrow
- mov BYTE [rdi], al
+ mov byte [rdi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/x86_64/jdcolext-sse2.asm b/jpegturbo/simd/x86_64/jdcolext-sse2.asm
index a94954bb..071aa629 100644
--- a/jpegturbo/simd/x86_64/jdcolext-sse2.asm
+++ b/jpegturbo/simd/x86_64/jdcolext-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -306,7 +304,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
movd eax, xmmA
cmp rcx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [rdi], ax
+ mov word [rdi], ax
add rdi, byte SIZEOF_WORD
sub rcx, byte SIZEOF_WORD
shr rax, 16
@@ -315,7 +313,7 @@ EXTN(jsimd_ycc_rgb_convert_sse2):
; space.
test rcx, rcx
jz short .nextrow
- mov BYTE [rdi], al
+ mov byte [rdi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/x86_64/jdcolor-avx2.asm b/jpegturbo/simd/x86_64/jdcolor-avx2.asm
index abad1766..43de9db0 100644
--- a/jpegturbo/simd/x86_64/jdcolor-avx2.asm
+++ b/jpegturbo/simd/x86_64/jdcolor-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jdcolor-sse2.asm b/jpegturbo/simd/x86_64/jdcolor-sse2.asm
index e7079f60..b3f1fec0 100644
--- a/jpegturbo/simd/x86_64/jdcolor-sse2.asm
+++ b/jpegturbo/simd/x86_64/jdcolor-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jdmerge-avx2.asm b/jpegturbo/simd/x86_64/jdmerge-avx2.asm
index ca3f063c..9515a170 100644
--- a/jpegturbo/simd/x86_64/jdmerge-avx2.asm
+++ b/jpegturbo/simd/x86_64/jdmerge-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jdmerge-sse2.asm b/jpegturbo/simd/x86_64/jdmerge-sse2.asm
index f3e09fa8..aedccc20 100644
--- a/jpegturbo/simd/x86_64/jdmerge-sse2.asm
+++ b/jpegturbo/simd/x86_64/jdmerge-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jdmrgext-avx2.asm b/jpegturbo/simd/x86_64/jdmrgext-avx2.asm
index 04e8a945..bb733c58 100644
--- a/jpegturbo/simd/x86_64/jdmrgext-avx2.asm
+++ b/jpegturbo/simd/x86_64/jdmrgext-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -339,7 +337,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
vmovd eax, xmmA
cmp rcx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [rdi], ax
+ mov word [rdi], ax
add rdi, byte SIZEOF_WORD
sub rcx, byte SIZEOF_WORD
shr rax, 16
@@ -348,7 +346,7 @@ EXTN(jsimd_h2v1_merged_upsample_avx2):
; space.
test rcx, rcx
jz short .endcolumn
- mov BYTE [rdi], al
+ mov byte [rdi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/x86_64/jdmrgext-sse2.asm b/jpegturbo/simd/x86_64/jdmrgext-sse2.asm
index 1cc33455..b176a4cd 100644
--- a/jpegturbo/simd/x86_64/jdmrgext-sse2.asm
+++ b/jpegturbo/simd/x86_64/jdmrgext-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jcolsamp.inc"
@@ -310,7 +308,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
movd eax, xmmA
cmp rcx, byte SIZEOF_WORD
jb short .column_st1
- mov WORD [rdi], ax
+ mov word [rdi], ax
add rdi, byte SIZEOF_WORD
sub rcx, byte SIZEOF_WORD
shr rax, 16
@@ -319,7 +317,7 @@ EXTN(jsimd_h2v1_merged_upsample_sse2):
; space.
test rcx, rcx
jz short .endcolumn
- mov BYTE [rdi], al
+ mov byte [rdi], al
%else ; RGB_PIXELSIZE == 4 ; -----------
diff --git a/jpegturbo/simd/x86_64/jdsample-avx2.asm b/jpegturbo/simd/x86_64/jdsample-avx2.asm
index 10fa5c45..fc274a95 100644
--- a/jpegturbo/simd/x86_64/jdsample-avx2.asm
+++ b/jpegturbo/simd/x86_64/jdsample-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jdsample-sse2.asm b/jpegturbo/simd/x86_64/jdsample-sse2.asm
index d8ccda9b..20e07670 100644
--- a/jpegturbo/simd/x86_64/jdsample-sse2.asm
+++ b/jpegturbo/simd/x86_64/jdsample-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/simd/x86_64/jfdctflt-sse.asm b/jpegturbo/simd/x86_64/jfdctflt-sse.asm
index 26f9fb6a..ef279664 100644
--- a/jpegturbo/simd/x86_64/jfdctflt-sse.asm
+++ b/jpegturbo/simd/x86_64/jfdctflt-sse.asm
@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the forward DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jfdctflt.c; see the jfdctflt.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/x86_64/jfdctfst-sse2.asm b/jpegturbo/simd/x86_64/jfdctfst-sse2.asm
index aaf8b9e3..2e1bfe6e 100644
--- a/jpegturbo/simd/x86_64/jfdctfst-sse2.asm
+++ b/jpegturbo/simd/x86_64/jfdctfst-sse2.asm
@@ -18,8 +18,6 @@
; the forward DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jfdctfst.c; see the jfdctfst.c
; for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/x86_64/jfdctint-avx2.asm b/jpegturbo/simd/x86_64/jfdctint-avx2.asm
index 448f47d4..6ad4cf0b 100644
--- a/jpegturbo/simd/x86_64/jfdctint-avx2.asm
+++ b/jpegturbo/simd/x86_64/jfdctint-avx2.asm
@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/x86_64/jfdctint-sse2.asm b/jpegturbo/simd/x86_64/jfdctint-sse2.asm
index ef16a52a..5d0de3cf 100644
--- a/jpegturbo/simd/x86_64/jfdctint-sse2.asm
+++ b/jpegturbo/simd/x86_64/jfdctint-sse2.asm
@@ -18,8 +18,6 @@
; forward DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jfdctint.c; see the jfdctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/x86_64/jidctflt-sse2.asm b/jpegturbo/simd/x86_64/jidctflt-sse2.asm
index b676ef39..ab95e1a6 100644
--- a/jpegturbo/simd/x86_64/jidctflt-sse2.asm
+++ b/jpegturbo/simd/x86_64/jidctflt-sse2.asm
@@ -17,8 +17,6 @@
; This file contains a floating-point implementation of the inverse DCT
; (Discrete Cosine Transform). The following code is based directly on
; the IJG's original jidctflt.c; see the jidctflt.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -95,8 +93,8 @@ EXTN(jsimd_idct_float_sse2):
mov rcx, DCTSIZE/4 ; ctr
.columnloop:
%ifndef NO_ZERO_COLUMN_TEST_FLOAT_SSE
- mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
jnz near .columnDCT
movq xmm1, XMM_MMWORD [MMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/x86_64/jidctfst-sse2.asm b/jpegturbo/simd/x86_64/jidctfst-sse2.asm
index c6c42f9b..a66a6811 100644
--- a/jpegturbo/simd/x86_64/jidctfst-sse2.asm
+++ b/jpegturbo/simd/x86_64/jidctfst-sse2.asm
@@ -18,8 +18,6 @@
; the inverse DCT (Discrete Cosine Transform). The following code is
; based directly on the IJG's original jidctfst.c; see the jidctfst.c
; for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -111,8 +109,8 @@ EXTN(jsimd_idct_ifast_sse2):
mov rsi, r11 ; inptr
%ifndef NO_ZERO_COLUMN_TEST_IFAST_SSE2
- mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/x86_64/jidctint-avx2.asm b/jpegturbo/simd/x86_64/jidctint-avx2.asm
index b60b44f2..50270f47 100644
--- a/jpegturbo/simd/x86_64/jidctint-avx2.asm
+++ b/jpegturbo/simd/x86_64/jidctint-avx2.asm
@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -292,8 +290,8 @@ EXTN(jsimd_idct_islow_avx2):
; ---- Pass 1: process columns.
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_AVX2
- mov eax, DWORD [DWBLOCK(1,0,r11,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,r11,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,r11,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,r11,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,r11,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/x86_64/jidctint-sse2.asm b/jpegturbo/simd/x86_64/jidctint-sse2.asm
index 83fc344b..034530c2 100644
--- a/jpegturbo/simd/x86_64/jidctint-sse2.asm
+++ b/jpegturbo/simd/x86_64/jidctint-sse2.asm
@@ -18,8 +18,6 @@
; inverse DCT (Discrete Cosine Transform). The following code is based
; directly on the IJG's original jidctint.c; see the jidctint.c for
; more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -124,8 +122,8 @@ EXTN(jsimd_idct_islow_sse2):
mov rsi, r11 ; inptr
%ifndef NO_ZERO_COLUMN_TEST_ISLOW_SSE2
- mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
jnz near .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
diff --git a/jpegturbo/simd/x86_64/jidctred-sse2.asm b/jpegturbo/simd/x86_64/jidctred-sse2.asm
index af64fdc2..7fbfcc51 100644
--- a/jpegturbo/simd/x86_64/jidctred-sse2.asm
+++ b/jpegturbo/simd/x86_64/jidctred-sse2.asm
@@ -18,8 +18,6 @@
; output: either 4x4 or 2x2 pixels from an 8x8 DCT block.
; The following code is based directly on the IJG's original jidctred.c;
; see the jidctred.c for more details.
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
@@ -132,8 +130,8 @@ EXTN(jsimd_idct_4x4_sse2):
mov rsi, r11 ; inptr
%ifndef NO_ZERO_COLUMN_TEST_4X4_SSE2
- mov eax, DWORD [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
- or eax, DWORD [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
+ mov eax, dword [DWBLOCK(1,0,rsi,SIZEOF_JCOEF)]
+ or eax, dword [DWBLOCK(2,0,rsi,SIZEOF_JCOEF)]
jnz short .columnDCT
movdqa xmm0, XMMWORD [XMMBLOCK(1,0,rsi,SIZEOF_JCOEF)]
@@ -562,8 +560,8 @@ EXTN(jsimd_idct_2x2_sse2):
mov rdx, JSAMPROW [rdi+0*SIZEOF_JSAMPROW]
mov rsi, JSAMPROW [rdi+1*SIZEOF_JSAMPROW]
- mov WORD [rdx+rax*SIZEOF_JSAMPLE], bx
- mov WORD [rsi+rax*SIZEOF_JSAMPLE], cx
+ mov word [rdx+rax*SIZEOF_JSAMPLE], bx
+ mov word [rsi+rax*SIZEOF_JSAMPLE], cx
pop rbx
uncollect_args 4
diff --git a/jpegturbo/simd/x86_64/jquantf-sse2.asm b/jpegturbo/simd/x86_64/jquantf-sse2.asm
index 4600eecc..83596a91 100644
--- a/jpegturbo/simd/x86_64/jquantf-sse2.asm
+++ b/jpegturbo/simd/x86_64/jquantf-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/x86_64/jquanti-avx2.asm b/jpegturbo/simd/x86_64/jquanti-avx2.asm
index b7243e43..5f04d223 100644
--- a/jpegturbo/simd/x86_64/jquanti-avx2.asm
+++ b/jpegturbo/simd/x86_64/jquanti-avx2.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/x86_64/jquanti-sse2.asm b/jpegturbo/simd/x86_64/jquanti-sse2.asm
index 7ff7275f..bb6fa69e 100644
--- a/jpegturbo/simd/x86_64/jquanti-sse2.asm
+++ b/jpegturbo/simd/x86_64/jquanti-sse2.asm
@@ -13,8 +13,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
%include "jdct.inc"
diff --git a/jpegturbo/simd/x86_64/jsimdcpu.asm b/jpegturbo/simd/x86_64/jsimdcpu.asm
index a905282a..705f813d 100644
--- a/jpegturbo/simd/x86_64/jsimdcpu.asm
+++ b/jpegturbo/simd/x86_64/jsimdcpu.asm
@@ -14,8 +14,6 @@
; assembler (including Borland's Turbo Assembler).
; NASM is available from http://nasm.sourceforge.net/ or
; http://sourceforge.net/project/showfiles.php?group_id=6208
-;
-; [TAB8]
%include "jsimdext.inc"
diff --git a/jpegturbo/tjbench.c b/jpegturbo/tjbench.c
index be6d23ca..13a5bde6 100644
--- a/jpegturbo/tjbench.c
+++ b/jpegturbo/tjbench.c
@@ -171,7 +171,7 @@ static int decomp(unsigned char *srcBuf, unsigned char **jpegBuf,
}
/* Set the destination buffer to gray so we know whether the decompressor
attempted to write to it */
- memset(dstBuf, 127, pitch * scaledh);
+ memset(dstBuf, 127, (size_t)pitch * scaledh);
if (doYUV) {
int width = doTile ? tilew : scaledw;
@@ -193,7 +193,7 @@ static int decomp(unsigned char *srcBuf, unsigned char **jpegBuf,
double start = getTime();
for (row = 0, dstPtr = dstBuf; row < ntilesh;
- row++, dstPtr += pitch * tileh) {
+ row++, dstPtr += (size_t)pitch * tileh) {
for (col = 0, dstPtr2 = dstPtr; col < ntilesw;
col++, tile++, dstPtr2 += ps * tilew) {
int width = doTile ? min(tilew, w - col * tilew) : scaledw;
@@ -486,7 +486,7 @@ static int fullTest(unsigned char *srcBuf, int w, int h, int subsamp,
if (decomp(srcBuf, jpegBuf, jpegSize, tmpBuf, w, h, subsamp, jpegQual,
fileName, tilew, tileh) == -1)
goto bailout;
- }
+ } else if (quiet == 1) printf("N/A\n");
for (i = 0; i < ntilesw * ntilesh; i++) {
if (jpegBuf[i]) tjFree(jpegBuf[i]);
diff --git a/jpegturbo/tjbenchtest.in b/jpegturbo/tjbenchtest.in
old mode 100755
new mode 100644
diff --git a/jpegturbo/tjbenchtest.java.in b/jpegturbo/tjbenchtest.java.in
old mode 100755
new mode 100644
diff --git a/jpegturbo/tjexampletest.in b/jpegturbo/tjexampletest.in
old mode 100755
new mode 100644
diff --git a/jpegturbo/turbojpeg-mapfile b/jpegturbo/turbojpeg-mapfile
old mode 100755
new mode 100644
diff --git a/jpegturbo/turbojpeg-mapfile.jni b/jpegturbo/turbojpeg-mapfile.jni
old mode 100755
new mode 100644
diff --git a/jpegturbo/turbojpeg.c b/jpegturbo/turbojpeg.c
index 3a1e3a98..7f607d14 100644
--- a/jpegturbo/turbojpeg.c
+++ b/jpegturbo/turbojpeg.c
@@ -368,9 +368,9 @@ static int getSubsamp(j_decompress_ptr dinfo)
D_MAX_BLOCKS_IN_MCU / pixelsize[i] && i == TJSAMP_444) {
int match = 0;
for (k = 1; k < dinfo->num_components; k++) {
- if (dinfo->comp_info[i].h_samp_factor ==
+ if (dinfo->comp_info[k].h_samp_factor ==
dinfo->comp_info[0].h_samp_factor &&
- dinfo->comp_info[i].v_samp_factor ==
+ dinfo->comp_info[k].v_samp_factor ==
dinfo->comp_info[0].v_samp_factor)
match++;
if (match == dinfo->num_components - 1) {
@@ -1648,10 +1648,8 @@ DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle,
iw[i] = compptr->width_in_blocks * dctsize;
ih = compptr->height_in_blocks * dctsize;
- pw[i] = PAD(dinfo->output_width, dinfo->max_h_samp_factor) *
- compptr->h_samp_factor / dinfo->max_h_samp_factor;
- ph[i] = PAD(dinfo->output_height, dinfo->max_v_samp_factor) *
- compptr->v_samp_factor / dinfo->max_v_samp_factor;
+ pw[i] = tjPlaneWidth(i, dinfo->output_width, jpegSubsamp);
+ ph[i] = tjPlaneHeight(i, dinfo->output_height, jpegSubsamp);
if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1;
th[i] = compptr->v_samp_factor * dctsize;
tmpbufsize += iw[i] * th[i];
@@ -1908,10 +1906,11 @@ DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf,
if (xinfo[i].crop) {
if ((t[i].r.x % xinfo[i].iMCU_sample_width) != 0 ||
(t[i].r.y % xinfo[i].iMCU_sample_height) != 0) {
- snprintf(errStr, JMSG_LENGTH_MAX,
+ snprintf(this->errStr, JMSG_LENGTH_MAX,
"To crop this JPEG image, x must be a multiple of %d\n"
"and y must be a multiple of %d.\n",
xinfo[i].iMCU_sample_width, xinfo[i].iMCU_sample_height);
+ this->isInstanceError = TRUE;
retval = -1; goto bailout;
}
}
diff --git a/jpegturbo/win/jpeg62-memsrcdst.def b/jpegturbo/win/jpeg62-memsrcdst.def
old mode 100755
new mode 100644
diff --git a/jpegturbo/win/jpeg62.def b/jpegturbo/win/jpeg62.def
old mode 100755
new mode 100644