Use more portable hqx code from googlecode hqx

Conflicts:
	src/CMakeLists.txt
This commit is contained in:
galtgendo 2014-03-02 11:32:45 +01:00
parent 4a23b97963
commit 4909aa750f
12 changed files with 13347 additions and 14998 deletions

View File

@ -624,16 +624,6 @@ if( WIN32 )
set( SYSTEM_SOURCES ${SYSTEM_SOURCES} win32/zdoom.rc )
endif( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" )
# [BB] Maxim Stepin's hq2x/3x/4x pixel upsampling algorithm as library. Currently only used with VC++.
if( MSVC AND NOT X64 )
set( SYSTEM_SOURCES
${SYSTEM_SOURCES}
gl/hqnx/Image.cpp
gl/hqnx/hq2x.cpp
gl/hqnx/hq3x.cpp
gl/hqnx/hq4x.cpp
)
endif( MSVC AND NOT X64)
else( WIN32 )
set( SYSTEM_SOURCES_DIR sdl )
set( SYSTEM_SOURCES ${PLAT_SDL_SOURCES} )
@ -1057,6 +1047,10 @@ add_executable( zdoom WIN32
gl/renderer/gl_renderer.cpp
gl/renderer/gl_renderstate.cpp
gl/renderer/gl_lightdata.cpp
gl/hqnx/init.cpp
gl/hqnx/hq2x.cpp
gl/hqnx/hq3x.cpp
gl/hqnx/hq4x.cpp
gl/textures/gl_hwtexture.cpp
gl/textures/gl_texture.cpp
gl/textures/gl_material.cpp

File diff suppressed because it is too large Load Diff

View File

@ -1,145 +0,0 @@
//CImage class - loading and saving BMP and TGA files
//----------------------------------------------------------
//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
//
//This program is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//Lesser General Public License for more details.
//
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//#ifdef WIN32
//#define DLL __declspec(dllexport)
//#else
#define DLL
//#endif
#include <stdio.h>
#pragma once
#pragma warning(disable: 4103)
#pragma pack(1)
typedef struct { unsigned char b, g, r; } _BGR;
typedef struct { unsigned char b, g, r, a; } _BGRA;
class CImage
{
public:
DLL CImage();
DLL ~CImage();
enum CImageErrors
{
eConvUnknownFormat = 10,
eConvSourceMemory = 11,
eConvDestMemory = 12,
eSaveBmpFileOpen = 20,
eSaveBmpFileWrite = 21,
eSaveBmpSourceMemory = 22,
eSaveBmpColorDepth = 23,
eLoadBmpFileOpen = 30,
eLoadBmpFileRead = 31,
eLoadBmpBadFormat = 32,
eLoadBmpInit = 33,
eLoadBmpColorDepth = 34,
eSaveTgaFileOpen = 40,
eSaveTgaFileWrite = 41,
eSaveTgaSourceMemory = 42,
eSaveTgaColorDepth = 43,
eLoadTgaFileOpen = 50,
eLoadTgaFileRead = 51,
eLoadTgaBadFormat = 52,
eLoadTgaInit = 53,
eLoadTgaColorDepth = 54,
eLoadFilename = 60,
eSaveFilename = 61,
};
struct _BMPFILEHEADER
{
unsigned short bfType;
long int bfSize, bfRes1, bfOffBits;
};
struct _BMPIMAGEHEADEROLD
{
long int biSize;
unsigned short biWidth, biHeight;
unsigned short biPlanes, biBitCount;
};
struct _BMPIMAGEHEADER
{
long int biSize, biWidth, biHeight;
unsigned short biPlanes, biBitCount;
long int biCompression, biSizeImage;
long int biXPelsPerMeter, biYPelsPerMeter;
long int biClrUsed, biClrImportant;
};
struct _TGAHEADER
{
unsigned char tiIdentSize;
unsigned char tiPaletteIncluded;
unsigned char tiImageType;
unsigned short tiPaletteStart;
unsigned short tiPaletteSize;
unsigned char tiPaletteBpp;
unsigned short tiX0;
unsigned short tiY0;
unsigned short tiXres;
unsigned short tiYres;
unsigned char tiBitPerPixel;
unsigned char tiAttrBits;
};
public:
int DLL Init( int Xres, int Yres, unsigned short BitPerPixel );
int DLL SetImage(unsigned char *img, int width, int height, int bpp);
int DLL Destroy();
int DLL ConvertTo32( void );
int DLL ConvertTo24( void );
int DLL ConvertTo16( void );
int DLL Convert8To17( int transindex );
int DLL Convert32To17( void );
int SaveBmp(char *szFilename);
int LoadBmp(char *szFilename);
int SaveTga(char *szFilename, bool bCompressed );
int LoadTga(char *szFilename);
int DLL Load(char *szFilename);
int DLL Save(char *szFilename);
private:
void Output( char * pcData, int nSize );
void Output( char c );
void Output( void );
unsigned char Input( void );
public:
int m_Xres, m_Yres;
unsigned short m_BitPerPixel;
unsigned short m_BytePerPixel;
unsigned char * m_pBitmap;
_BGR m_Pal[256];
private:
int m_NumPixel;
FILE * f;
int m_nCount;
char m_cBuf[32768];
};
#pragma pack(8)

141
src/gl/hqnx/common.h Normal file
View File

@ -0,0 +1,141 @@
/*
* Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
*
* Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
* Copyright (C) 2011 Francois Gannaz <mytskine@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __HQX_COMMON_H_
#define __HQX_COMMON_H_
#include <stdlib.h>
#include <stdint.h>
#define MASK_2 0x0000FF00
#define MASK_13 0x00FF00FF
#define MASK_RGB 0x00FFFFFF
#define MASK_ALPHA 0xFF000000
#define Ymask 0x00FF0000
#define Umask 0x0000FF00
#define Vmask 0x000000FF
#define trY 0x00300000
#define trU 0x00000700
#define trV 0x00000006
/* RGB to YUV lookup table */
extern uint32_t RGBtoYUV[16777216];
static inline uint32_t rgb_to_yuv(uint32_t c)
{
// Mask against MASK_RGB to discard the alpha channel
return RGBtoYUV[MASK_RGB & c];
}
/* Test if there is difference in color */
static inline int yuv_diff(uint32_t yuv1, uint32_t yuv2) {
return (( abs((yuv1 & Ymask) - (yuv2 & Ymask)) > trY ) ||
( abs((yuv1 & Umask) - (yuv2 & Umask)) > trU ) ||
( abs((yuv1 & Vmask) - (yuv2 & Vmask)) > trV ) );
}
static inline int Diff(uint32_t c1, uint32_t c2)
{
return yuv_diff(rgb_to_yuv(c1), rgb_to_yuv(c2));
}
/* Interpolate functions */
static inline uint32_t Interpolate_2(uint32_t c1, int w1, uint32_t c2, int w2, int s)
{
if (c1 == c2) {
return c1;
}
return
(((((c1 & MASK_ALPHA) >> 24) * w1 + ((c2 & MASK_ALPHA) >> 24) * w2) << (24-s)) & MASK_ALPHA) +
((((c1 & MASK_2) * w1 + (c2 & MASK_2) * w2) >> s) & MASK_2) +
((((c1 & MASK_13) * w1 + (c2 & MASK_13) * w2) >> s) & MASK_13);
}
static inline uint32_t Interpolate_3(uint32_t c1, int w1, uint32_t c2, int w2, uint32_t c3, int w3, int s)
{
return
(((((c1 & MASK_ALPHA) >> 24) * w1 + ((c2 & MASK_ALPHA) >> 24) * w2 + ((c3 & MASK_ALPHA) >> 24) * w3) << (24-s)) & MASK_ALPHA) +
((((c1 & MASK_2) * w1 + (c2 & MASK_2) * w2 + (c3 & MASK_2) * w3) >> s) & MASK_2) +
((((c1 & MASK_13) * w1 + (c2 & MASK_13) * w2 + (c3 & MASK_13) * w3) >> s) & MASK_13);
}
static inline uint32_t Interp1(uint32_t c1, uint32_t c2)
{
//(c1*3+c2) >> 2;
return Interpolate_2(c1, 3, c2, 1, 2);
}
static inline uint32_t Interp2(uint32_t c1, uint32_t c2, uint32_t c3)
{
//(c1*2+c2+c3) >> 2;
return Interpolate_3(c1, 2, c2, 1, c3, 1, 2);
}
static inline uint32_t Interp3(uint32_t c1, uint32_t c2)
{
//(c1*7+c2)/8;
return Interpolate_2(c1, 7, c2, 1, 3);
}
static inline uint32_t Interp4(uint32_t c1, uint32_t c2, uint32_t c3)
{
//(c1*2+(c2+c3)*7)/16;
return Interpolate_3(c1, 2, c2, 7, c3, 7, 4);
}
static inline uint32_t Interp5(uint32_t c1, uint32_t c2)
{
//(c1+c2) >> 1;
return Interpolate_2(c1, 1, c2, 1, 1);
}
static inline uint32_t Interp6(uint32_t c1, uint32_t c2, uint32_t c3)
{
//(c1*5+c2*2+c3)/8;
return Interpolate_3(c1, 5, c2, 2, c3, 1, 3);
}
static inline uint32_t Interp7(uint32_t c1, uint32_t c2, uint32_t c3)
{
//(c1*6+c2+c3)/8;
return Interpolate_3(c1, 6, c2, 1, c3, 1, 3);
}
static inline uint32_t Interp8(uint32_t c1, uint32_t c2)
{
//(c1*5+c2*3)/8;
return Interpolate_2(c1, 5, c2, 3, 3);
}
static inline uint32_t Interp9(uint32_t c1, uint32_t c2, uint32_t c3)
{
//(c1*2+(c2+c3)*3)/8;
return Interpolate_3(c1, 2, c2, 3, c3, 3, 3);
}
static inline uint32_t Interp10(uint32_t c1, uint32_t c2, uint32_t c3)
{
//(c1*14+c2+c3)/16;
return Interpolate_3(c1, 14, c2, 1, c3, 1, 4);
}
#endif

View File

@ -1,232 +1,86 @@
//hq2x filter demo program
//----------------------------------------------------------
//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
//
//This program is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//Lesser General Public License for more details.
//
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/*
* Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
*
* Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "hqnx.h"
#include <stdint.h>
#include "common.h"
#include "hqx.h"
extern int LUT16to32[65536*2];
extern int RGBtoYUV[65536*2];
#define PIXEL00_0 *dp = w[5];
#define PIXEL00_10 *dp = Interp1(w[5], w[1]);
#define PIXEL00_11 *dp = Interp1(w[5], w[4]);
#define PIXEL00_12 *dp = Interp1(w[5], w[2]);
#define PIXEL00_20 *dp = Interp2(w[5], w[4], w[2]);
#define PIXEL00_21 *dp = Interp2(w[5], w[1], w[2]);
#define PIXEL00_22 *dp = Interp2(w[5], w[1], w[4]);
#define PIXEL00_60 *dp = Interp6(w[5], w[2], w[4]);
#define PIXEL00_61 *dp = Interp6(w[5], w[4], w[2]);
#define PIXEL00_70 *dp = Interp7(w[5], w[4], w[2]);
#define PIXEL00_90 *dp = Interp9(w[5], w[4], w[2]);
#define PIXEL00_100 *dp = Interp10(w[5], w[4], w[2]);
#define PIXEL01_0 *(dp+1) = w[5];
#define PIXEL01_10 *(dp+1) = Interp1(w[5], w[3]);
#define PIXEL01_11 *(dp+1) = Interp1(w[5], w[2]);
#define PIXEL01_12 *(dp+1) = Interp1(w[5], w[6]);
#define PIXEL01_20 *(dp+1) = Interp2(w[5], w[2], w[6]);
#define PIXEL01_21 *(dp+1) = Interp2(w[5], w[3], w[6]);
#define PIXEL01_22 *(dp+1) = Interp2(w[5], w[3], w[2]);
#define PIXEL01_60 *(dp+1) = Interp6(w[5], w[6], w[2]);
#define PIXEL01_61 *(dp+1) = Interp6(w[5], w[2], w[6]);
#define PIXEL01_70 *(dp+1) = Interp7(w[5], w[2], w[6]);
#define PIXEL01_90 *(dp+1) = Interp9(w[5], w[2], w[6]);
#define PIXEL01_100 *(dp+1) = Interp10(w[5], w[2], w[6]);
#define PIXEL10_0 *(dp+dpL) = w[5];
#define PIXEL10_10 *(dp+dpL) = Interp1(w[5], w[7]);
#define PIXEL10_11 *(dp+dpL) = Interp1(w[5], w[8]);
#define PIXEL10_12 *(dp+dpL) = Interp1(w[5], w[4]);
#define PIXEL10_20 *(dp+dpL) = Interp2(w[5], w[8], w[4]);
#define PIXEL10_21 *(dp+dpL) = Interp2(w[5], w[7], w[4]);
#define PIXEL10_22 *(dp+dpL) = Interp2(w[5], w[7], w[8]);
#define PIXEL10_60 *(dp+dpL) = Interp6(w[5], w[4], w[8]);
#define PIXEL10_61 *(dp+dpL) = Interp6(w[5], w[8], w[4]);
#define PIXEL10_70 *(dp+dpL) = Interp7(w[5], w[8], w[4]);
#define PIXEL10_90 *(dp+dpL) = Interp9(w[5], w[8], w[4]);
#define PIXEL10_100 *(dp+dpL) = Interp10(w[5], w[8], w[4]);
#define PIXEL11_0 *(dp+dpL+1) = w[5];
#define PIXEL11_10 *(dp+dpL+1) = Interp1(w[5], w[9]);
#define PIXEL11_11 *(dp+dpL+1) = Interp1(w[5], w[6]);
#define PIXEL11_12 *(dp+dpL+1) = Interp1(w[5], w[8]);
#define PIXEL11_20 *(dp+dpL+1) = Interp2(w[5], w[6], w[8]);
#define PIXEL11_21 *(dp+dpL+1) = Interp2(w[5], w[9], w[8]);
#define PIXEL11_22 *(dp+dpL+1) = Interp2(w[5], w[9], w[6]);
#define PIXEL11_60 *(dp+dpL+1) = Interp6(w[5], w[8], w[6]);
#define PIXEL11_61 *(dp+dpL+1) = Interp6(w[5], w[6], w[8]);
#define PIXEL11_70 *(dp+dpL+1) = Interp7(w[5], w[6], w[8]);
#define PIXEL11_90 *(dp+dpL+1) = Interp9(w[5], w[6], w[8]);
#define PIXEL11_100 *(dp+dpL+1) = Interp10(w[5], w[6], w[8]);
static const __int64 reg_blank = 0;
static const __int64 const3 = 0x0003000300030003;
static const __int64 const5 = 0x0005000500050005;
static const __int64 const6 = 0x0006000600060006;
static const __int64 const14 = 0x000E000E000E000E;
static const __int64 treshold = 0x0000000000300706;
inline void Interp1(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1*3+c2)/4;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
pmullw mm1, const3
paddw mm1, mm2
psrlw mm1, 2
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp2(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*2+c2+c3) >> 2;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
psllw mm1, 1
paddw mm1, mm2
paddw mm1, mm3
psrlw mm1, 2
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp5(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1+c2)/2;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
paddw mm1, mm2
psrlw mm1, 1
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp6(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*5+c2*2+c3)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
pmullw mm1, const5
psllw mm2, 1
paddw mm1, mm3
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp7(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*6+c2+c3)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
pmullw mm1, const6
paddw mm2, mm3
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp9(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*2+(c2+c3)*3)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
psllw mm1, 1
paddw mm2, mm3
pmullw mm2, const3
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp10(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*14+c2+c3)/16;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
pmullw mm1, const14
paddw mm2, mm3
paddw mm1, mm2
psrlw mm1, 4
packuswb mm1, reg_blank
movd [eax], mm1
}
}
#define PIXEL00_0 *((int*)(pOut)) = c[5];
#define PIXEL00_10 Interp1(pOut, c[5], c[1]);
#define PIXEL00_11 Interp1(pOut, c[5], c[4]);
#define PIXEL00_12 Interp1(pOut, c[5], c[2]);
#define PIXEL00_20 Interp2(pOut, c[5], c[4], c[2]);
#define PIXEL00_21 Interp2(pOut, c[5], c[1], c[2]);
#define PIXEL00_22 Interp2(pOut, c[5], c[1], c[4]);
#define PIXEL00_60 Interp6(pOut, c[5], c[2], c[4]);
#define PIXEL00_61 Interp6(pOut, c[5], c[4], c[2]);
#define PIXEL00_70 Interp7(pOut, c[5], c[4], c[2]);
#define PIXEL00_90 Interp9(pOut, c[5], c[4], c[2]);
#define PIXEL00_100 Interp10(pOut, c[5], c[4], c[2]);
#define PIXEL01_0 *((int*)(pOut+4)) = c[5];
#define PIXEL01_10 Interp1(pOut+4, c[5], c[3]);
#define PIXEL01_11 Interp1(pOut+4, c[5], c[2]);
#define PIXEL01_12 Interp1(pOut+4, c[5], c[6]);
#define PIXEL01_20 Interp2(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_21 Interp2(pOut+4, c[5], c[3], c[6]);
#define PIXEL01_22 Interp2(pOut+4, c[5], c[3], c[2]);
#define PIXEL01_60 Interp6(pOut+4, c[5], c[6], c[2]);
#define PIXEL01_61 Interp6(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_70 Interp7(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_90 Interp9(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_100 Interp10(pOut+4, c[5], c[2], c[6]);
#define PIXEL10_0 *((int*)(pOut+BpL)) = c[5];
#define PIXEL10_10 Interp1(pOut+BpL, c[5], c[7]);
#define PIXEL10_11 Interp1(pOut+BpL, c[5], c[8]);
#define PIXEL10_12 Interp1(pOut+BpL, c[5], c[4]);
#define PIXEL10_20 Interp2(pOut+BpL, c[5], c[8], c[4]);
#define PIXEL10_21 Interp2(pOut+BpL, c[5], c[7], c[4]);
#define PIXEL10_22 Interp2(pOut+BpL, c[5], c[7], c[8]);
#define PIXEL10_60 Interp6(pOut+BpL, c[5], c[4], c[8]);
#define PIXEL10_61 Interp6(pOut+BpL, c[5], c[8], c[4]);
#define PIXEL10_70 Interp7(pOut+BpL, c[5], c[8], c[4]);
#define PIXEL10_90 Interp9(pOut+BpL, c[5], c[8], c[4]);
#define PIXEL10_100 Interp10(pOut+BpL, c[5], c[8], c[4]);
#define PIXEL11_0 *((int*)(pOut+BpL+4)) = c[5];
#define PIXEL11_10 Interp1(pOut+BpL+4, c[5], c[9]);
#define PIXEL11_11 Interp1(pOut+BpL+4, c[5], c[6]);
#define PIXEL11_12 Interp1(pOut+BpL+4, c[5], c[8]);
#define PIXEL11_20 Interp2(pOut+BpL+4, c[5], c[6], c[8]);
#define PIXEL11_21 Interp2(pOut+BpL+4, c[5], c[9], c[8]);
#define PIXEL11_22 Interp2(pOut+BpL+4, c[5], c[9], c[6]);
#define PIXEL11_60 Interp6(pOut+BpL+4, c[5], c[8], c[6]);
#define PIXEL11_61 Interp6(pOut+BpL+4, c[5], c[6], c[8]);
#define PIXEL11_70 Interp7(pOut+BpL+4, c[5], c[6], c[8]);
#define PIXEL11_90 Interp9(pOut+BpL+4, c[5], c[6], c[8]);
#define PIXEL11_100 Interp10(pOut+BpL+4, c[5], c[6], c[8]);
int Diff(unsigned int w5, unsigned int w1);
void DLL hq2x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
HQX_API void HQX_CALLCONV hq2x_32_rb( uint32_t * sp, uint32_t srb, uint32_t * dp, uint32_t drb, int Xres, int Yres )
{
int i, j, k;
int w[10];
unsigned int c[10];
int prevline, nextline;
uint32_t w[10];
int dpL = (drb >> 2);
int spL = (srb >> 2);
uint8_t *sRowP = (uint8_t *) sp;
uint8_t *dRowP = (uint8_t *) dp;
uint32_t yuv1, yuv2;
// +----+----+----+
// | | | |
@ -241,98 +95,57 @@ void DLL hq2x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
for (j=0; j<Yres; j++)
{
if (j>0) prevline = -spL; else prevline = 0;
if (j<Yres-1) nextline = spL; else nextline = 0;
for (i=0; i<Xres; i++)
{
if (j==0)
{
w[1] = 0;
w[2] = 0;
w[3] = 0;
}
else
{
if (i>0)
{
w[1] = *(pIn - Xres - 1);
}
else
{
w[1] = 0;
}
w[2] = *(pIn - Xres);
if (i<Xres-1)
{
w[3] = *(pIn - Xres + 1);
}
else
{
w[3] = 0;
}
}
w[2] = *(sp + prevline);
w[5] = *sp;
w[8] = *(sp + nextline);
if (i>0)
{
w[4] = *(pIn - 1);
w[1] = *(sp + prevline - 1);
w[4] = *(sp - 1);
w[7] = *(sp + nextline - 1);
}
else
{
w[4] = 0;
w[1] = w[2];
w[4] = w[5];
w[7] = w[8];
}
w[5] = *(pIn);
if (i<Xres-1)
{
w[6] = *(pIn + 1);
w[3] = *(sp + prevline + 1);
w[6] = *(sp + 1);
w[9] = *(sp + nextline + 1);
}
else
{
w[6] = 0;
}
if (j==Yres-1)
{
w[7] = 0;
w[8] = 0;
w[9] = 0;
}
else
{
if (i>0)
{
w[7] = *(pIn + Xres - 1);
}
else
{
w[7] = 0;
}
w[8] = *(pIn + Xres);
if (i<Xres-1)
{
w[9] = *(pIn + Xres + 1);
}
else
{
w[9] = 0;
}
w[3] = w[2];
w[6] = w[5];
w[9] = w[8];
}
int pattern = 0;
int flag = 1;
if ( Diff(w[5],w[1]) ) pattern |= 0x0001;
if ( Diff(w[5],w[2]) ) pattern |= 0x0002;
if ( Diff(w[5],w[3]) ) pattern |= 0x0004;
if ( Diff(w[5],w[4]) ) pattern |= 0x0008;
if ( Diff(w[5],w[6]) ) pattern |= 0x0010;
if ( Diff(w[5],w[7]) ) pattern |= 0x0020;
if ( Diff(w[5],w[8]) ) pattern |= 0x0040;
if ( Diff(w[5],w[9]) ) pattern |= 0x0080;
yuv1 = rgb_to_yuv(w[5]);
for (k=1; k<=9; k++)
{
c[k] = LUT16to32[w[k]];
if (k==5) continue;
if ( w[k] != w[5] )
{
yuv2 = rgb_to_yuv(w[k]);
if (yuv_diff(yuv1, yuv2))
pattern |= flag;
}
flag <<= 1;
}
switch (pattern)
@ -2977,11 +2790,20 @@ void DLL hq2x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
break;
}
}
pIn++;
pOut+=8;
}
pOut+=BpL;
}
__asm emms
sp++;
dp += 2;
}
sRowP += srb;
sp = (uint32_t *) sRowP;
dRowP += drb * 2;
dp = (uint32_t *) dRowP;
}
}
HQX_API void HQX_CALLCONV hq2x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
{
uint32_t rowBytesL = Xres * 4;
hq2x_32_rb(sp, rowBytesL, dp, rowBytesL * 2, Xres, Yres);
}

View File

@ -1,188 +1,91 @@
//hq3x filter demo program
//----------------------------------------------------------
//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
//
//This program is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//Lesser General Public License for more details.
//
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/*
* Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
*
* Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "hqnx.h"
#include <stdint.h>
#include "common.h"
#include "hqx.h"
extern int LUT16to32[65536*2];
extern int RGBtoYUV[65536*2];
#define PIXEL00_1M *dp = Interp1(w[5], w[1]);
#define PIXEL00_1U *dp = Interp1(w[5], w[2]);
#define PIXEL00_1L *dp = Interp1(w[5], w[4]);
#define PIXEL00_2 *dp = Interp2(w[5], w[4], w[2]);
#define PIXEL00_4 *dp = Interp4(w[5], w[4], w[2]);
#define PIXEL00_5 *dp = Interp5(w[4], w[2]);
#define PIXEL00_C *dp = w[5];
static const __int64 reg_blank = 0;
static const __int64 const3 = 0x0003000300030003;
static const __int64 const7 = 0x0007000700070007;
static const __int64 treshold = 0x0000000000300706;
#define PIXEL01_1 *(dp+1) = Interp1(w[5], w[2]);
#define PIXEL01_3 *(dp+1) = Interp3(w[5], w[2]);
#define PIXEL01_6 *(dp+1) = Interp1(w[2], w[5]);
#define PIXEL01_C *(dp+1) = w[5];
inline void Interp1(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1*3+c2)/4;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
pmullw mm1, const3
paddw mm1, mm2
psrlw mm1, 2
packuswb mm1, reg_blank
movd [eax], mm1
}
}
#define PIXEL02_1M *(dp+2) = Interp1(w[5], w[3]);
#define PIXEL02_1U *(dp+2) = Interp1(w[5], w[2]);
#define PIXEL02_1R *(dp+2) = Interp1(w[5], w[6]);
#define PIXEL02_2 *(dp+2) = Interp2(w[5], w[2], w[6]);
#define PIXEL02_4 *(dp+2) = Interp4(w[5], w[2], w[6]);
#define PIXEL02_5 *(dp+2) = Interp5(w[2], w[6]);
#define PIXEL02_C *(dp+2) = w[5];
inline void Interp2(unsigned char * pc, int c1, int c2, int c3)
{
// *((int*)pc) = (c1*2+c2+c3)/4;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
psllw mm1, 1
paddw mm1, mm2
paddw mm1, mm3
psrlw mm1, 2
packuswb mm1, reg_blank
movd [eax], mm1
}
}
#define PIXEL10_1 *(dp+dpL) = Interp1(w[5], w[4]);
#define PIXEL10_3 *(dp+dpL) = Interp3(w[5], w[4]);
#define PIXEL10_6 *(dp+dpL) = Interp1(w[4], w[5]);
#define PIXEL10_C *(dp+dpL) = w[5];
inline void Interp3(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1*7+c2)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
pmullw mm1, const7
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
#define PIXEL11 *(dp+dpL+1) = w[5];
inline void Interp4(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*2+(c2+c3)*7)/16;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
psllw mm1, 1
paddw mm2, mm3
pmullw mm2, const7
paddw mm1, mm2
psrlw mm1, 4
packuswb mm1, reg_blank
movd [eax], mm1
}
}
#define PIXEL12_1 *(dp+dpL+2) = Interp1(w[5], w[6]);
#define PIXEL12_3 *(dp+dpL+2) = Interp3(w[5], w[6]);
#define PIXEL12_6 *(dp+dpL+2) = Interp1(w[6], w[5]);
#define PIXEL12_C *(dp+dpL+2) = w[5];
inline void Interp5(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1+c2)/2;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
paddw mm1, mm2
psrlw mm1, 1
packuswb mm1, reg_blank
movd [eax], mm1
}
}
#define PIXEL20_1M *(dp+dpL+dpL) = Interp1(w[5], w[7]);
#define PIXEL20_1D *(dp+dpL+dpL) = Interp1(w[5], w[8]);
#define PIXEL20_1L *(dp+dpL+dpL) = Interp1(w[5], w[4]);
#define PIXEL20_2 *(dp+dpL+dpL) = Interp2(w[5], w[8], w[4]);
#define PIXEL20_4 *(dp+dpL+dpL) = Interp4(w[5], w[8], w[4]);
#define PIXEL20_5 *(dp+dpL+dpL) = Interp5(w[8], w[4]);
#define PIXEL20_C *(dp+dpL+dpL) = w[5];
#define PIXEL00_1M Interp1(pOut, c[5], c[1]);
#define PIXEL00_1U Interp1(pOut, c[5], c[2]);
#define PIXEL00_1L Interp1(pOut, c[5], c[4]);
#define PIXEL00_2 Interp2(pOut, c[5], c[4], c[2]);
#define PIXEL00_4 Interp4(pOut, c[5], c[4], c[2]);
#define PIXEL00_5 Interp5(pOut, c[4], c[2]);
#define PIXEL00_C *((int*)(pOut)) = c[5];
#define PIXEL21_1 *(dp+dpL+dpL+1) = Interp1(w[5], w[8]);
#define PIXEL21_3 *(dp+dpL+dpL+1) = Interp3(w[5], w[8]);
#define PIXEL21_6 *(dp+dpL+dpL+1) = Interp1(w[8], w[5]);
#define PIXEL21_C *(dp+dpL+dpL+1) = w[5];
#define PIXEL01_1 Interp1(pOut+4, c[5], c[2]);
#define PIXEL01_3 Interp3(pOut+4, c[5], c[2]);
#define PIXEL01_6 Interp1(pOut+4, c[2], c[5]);
#define PIXEL01_C *((int*)(pOut+4)) = c[5];
#define PIXEL22_1M *(dp+dpL+dpL+2) = Interp1(w[5], w[9]);
#define PIXEL22_1D *(dp+dpL+dpL+2) = Interp1(w[5], w[8]);
#define PIXEL22_1R *(dp+dpL+dpL+2) = Interp1(w[5], w[6]);
#define PIXEL22_2 *(dp+dpL+dpL+2) = Interp2(w[5], w[6], w[8]);
#define PIXEL22_4 *(dp+dpL+dpL+2) = Interp4(w[5], w[6], w[8]);
#define PIXEL22_5 *(dp+dpL+dpL+2) = Interp5(w[6], w[8]);
#define PIXEL22_C *(dp+dpL+dpL+2) = w[5];
#define PIXEL02_1M Interp1(pOut+8, c[5], c[3]);
#define PIXEL02_1U Interp1(pOut+8, c[5], c[2]);
#define PIXEL02_1R Interp1(pOut+8, c[5], c[6]);
#define PIXEL02_2 Interp2(pOut+8, c[5], c[2], c[6]);
#define PIXEL02_4 Interp4(pOut+8, c[5], c[2], c[6]);
#define PIXEL02_5 Interp5(pOut+8, c[2], c[6]);
#define PIXEL02_C *((int*)(pOut+8)) = c[5];
#define PIXEL10_1 Interp1(pOut+BpL, c[5], c[4]);
#define PIXEL10_3 Interp3(pOut+BpL, c[5], c[4]);
#define PIXEL10_6 Interp1(pOut+BpL, c[4], c[5]);
#define PIXEL10_C *((int*)(pOut+BpL)) = c[5];
#define PIXEL11 *((int*)(pOut+BpL+4)) = c[5];
#define PIXEL12_1 Interp1(pOut+BpL+8, c[5], c[6]);
#define PIXEL12_3 Interp3(pOut+BpL+8, c[5], c[6]);
#define PIXEL12_6 Interp1(pOut+BpL+8, c[6], c[5]);
#define PIXEL12_C *((int*)(pOut+BpL+8)) = c[5];
#define PIXEL20_1M Interp1(pOut+BpL+BpL, c[5], c[7]);
#define PIXEL20_1D Interp1(pOut+BpL+BpL, c[5], c[8]);
#define PIXEL20_1L Interp1(pOut+BpL+BpL, c[5], c[4]);
#define PIXEL20_2 Interp2(pOut+BpL+BpL, c[5], c[8], c[4]);
#define PIXEL20_4 Interp4(pOut+BpL+BpL, c[5], c[8], c[4]);
#define PIXEL20_5 Interp5(pOut+BpL+BpL, c[8], c[4]);
#define PIXEL20_C *((int*)(pOut+BpL+BpL)) = c[5];
#define PIXEL21_1 Interp1(pOut+BpL+BpL+4, c[5], c[8]);
#define PIXEL21_3 Interp3(pOut+BpL+BpL+4, c[5], c[8]);
#define PIXEL21_6 Interp1(pOut+BpL+BpL+4, c[8], c[5]);
#define PIXEL21_C *((int*)(pOut+BpL+BpL+4)) = c[5];
#define PIXEL22_1M Interp1(pOut+BpL+BpL+8, c[5], c[9]);
#define PIXEL22_1D Interp1(pOut+BpL+BpL+8, c[5], c[8]);
#define PIXEL22_1R Interp1(pOut+BpL+BpL+8, c[5], c[6]);
#define PIXEL22_2 Interp2(pOut+BpL+BpL+8, c[5], c[6], c[8]);
#define PIXEL22_4 Interp4(pOut+BpL+BpL+8, c[5], c[6], c[8]);
#define PIXEL22_5 Interp5(pOut+BpL+BpL+8, c[6], c[8]);
#define PIXEL22_C *((int*)(pOut+BpL+BpL+8)) = c[5];
int Diff(unsigned int w5, unsigned int w1);
void DLL hq3x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
HQX_API void HQX_CALLCONV hq3x_32_rb( uint32_t * sp, uint32_t srb, uint32_t * dp, uint32_t drb, int Xres, int Yres )
{
int i, j, k;
int w[10];
unsigned int c[10];
int prevline, nextline;
uint32_t w[10];
int dpL = (drb >> 2);
int spL = (srb >> 2);
uint8_t *sRowP = (uint8_t *) sp;
uint8_t *dRowP = (uint8_t *) dp;
uint32_t yuv1, yuv2;
// +----+----+----+
// | | | |
@ -197,51 +100,58 @@ void DLL hq3x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
for (j=0; j<Yres; j++)
{
if (j>0) prevline = -spL; else prevline = 0;
if (j<Yres-1) nextline = spL; else nextline = 0;
for (i=0; i<Xres; i++)
{
if (j==0)
w[2] = *(sp + prevline);
w[5] = *sp;
w[8] = *(sp + nextline);
if (i>0)
{
w[1] = 0;
w[2] = 0;
w[3] = 0;
w[1] = *(sp + prevline - 1);
w[4] = *(sp - 1);
w[7] = *(sp + nextline - 1);
}
else
{
if (i>0) w[1] = *(pIn - Xres - 1); else w[1] = 0;
w[2] = *(pIn - Xres);
if (i<Xres-1) w[3] = *(pIn - Xres + 1); else w[3] = 0;
w[1] = w[2];
w[4] = w[5];
w[7] = w[8];
}
if (i>0) w[4] = *(pIn - 1); else w[4] = 0;
w[5] = *(pIn);
if (i<Xres-1) w[6] = *(pIn + 1); else w[6] = 0;
if (j==Yres-1)
if (i<Xres-1)
{
w[7] = 0;
w[8] = 0;
w[9] = 0;
w[3] = *(sp + prevline + 1);
w[6] = *(sp + 1);
w[9] = *(sp + nextline + 1);
}
else
{
if (i>0) w[7] = *(pIn + Xres - 1); else w[7] = 0;
w[8] = *(pIn + Xres);
if (i<Xres-1) w[9] = *(pIn + Xres + 1); else w[9] = 0;
w[3] = w[2];
w[6] = w[5];
w[9] = w[8];
}
int pattern = 0;
int flag = 1;
if ( Diff(w[5],w[1]) ) pattern |= 0x0001;
if ( Diff(w[5],w[2]) ) pattern |= 0x0002;
if ( Diff(w[5],w[3]) ) pattern |= 0x0004;
if ( Diff(w[5],w[4]) ) pattern |= 0x0008;
if ( Diff(w[5],w[6]) ) pattern |= 0x0010;
if ( Diff(w[5],w[7]) ) pattern |= 0x0020;
if ( Diff(w[5],w[8]) ) pattern |= 0x0040;
if ( Diff(w[5],w[9]) ) pattern |= 0x0080;
yuv1 = rgb_to_yuv(w[5]);
for (k=1; k<=9; k++)
c[k] = LUT16to32[w[k]];
{
if (k==5) continue;
if ( w[k] != w[5] )
{
yuv2 = rgb_to_yuv(w[k]);
if (yuv_diff(yuv1, yuv2))
pattern |= flag;
}
flag <<= 1;
}
switch (pattern)
{
@ -3858,11 +3768,20 @@ void DLL hq3x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
break;
}
}
pIn++;
pOut+=12;
sp++;
dp += 3;
}
pOut+=BpL;
pOut+=BpL;
sRowP += srb;
sp = (uint32_t *) sRowP;
dRowP += drb * 3;
dp = (uint32_t *) dRowP;
}
__asm emms
}
HQX_API void HQX_CALLCONV hq3x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
{
uint32_t rowBytesL = Xres * 4;
hq3x_32_rb(sp, rowBytesL, dp, rowBytesL * 3, Xres, Yres);
}

View File

@ -1,349 +1,178 @@
//hq4x filter demo program
//----------------------------------------------------------
//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
//
//This program is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//Lesser General Public License for more details.
//
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/*
* Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
*
* Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdint.h>
#include "common.h"
#include "hqx.h"
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <string.h>
#include "hqnx.h"
#define PIXEL00_0 *dp = w[5];
#define PIXEL00_11 *dp = Interp1(w[5], w[4]);
#define PIXEL00_12 *dp = Interp1(w[5], w[2]);
#define PIXEL00_20 *dp = Interp2(w[5], w[2], w[4]);
#define PIXEL00_50 *dp = Interp5(w[2], w[4]);
#define PIXEL00_80 *dp = Interp8(w[5], w[1]);
#define PIXEL00_81 *dp = Interp8(w[5], w[4]);
#define PIXEL00_82 *dp = Interp8(w[5], w[2]);
#define PIXEL01_0 *(dp+1) = w[5];
#define PIXEL01_10 *(dp+1) = Interp1(w[5], w[1]);
#define PIXEL01_12 *(dp+1) = Interp1(w[5], w[2]);
#define PIXEL01_14 *(dp+1) = Interp1(w[2], w[5]);
#define PIXEL01_21 *(dp+1) = Interp2(w[2], w[5], w[4]);
#define PIXEL01_31 *(dp+1) = Interp3(w[5], w[4]);
#define PIXEL01_50 *(dp+1) = Interp5(w[2], w[5]);
#define PIXEL01_60 *(dp+1) = Interp6(w[5], w[2], w[4]);
#define PIXEL01_61 *(dp+1) = Interp6(w[5], w[2], w[1]);
#define PIXEL01_82 *(dp+1) = Interp8(w[5], w[2]);
#define PIXEL01_83 *(dp+1) = Interp8(w[2], w[4]);
#define PIXEL02_0 *(dp+2) = w[5];
#define PIXEL02_10 *(dp+2) = Interp1(w[5], w[3]);
#define PIXEL02_11 *(dp+2) = Interp1(w[5], w[2]);
#define PIXEL02_13 *(dp+2) = Interp1(w[2], w[5]);
#define PIXEL02_21 *(dp+2) = Interp2(w[2], w[5], w[6]);
#define PIXEL02_32 *(dp+2) = Interp3(w[5], w[6]);
#define PIXEL02_50 *(dp+2) = Interp5(w[2], w[5]);
#define PIXEL02_60 *(dp+2) = Interp6(w[5], w[2], w[6]);
#define PIXEL02_61 *(dp+2) = Interp6(w[5], w[2], w[3]);
#define PIXEL02_81 *(dp+2) = Interp8(w[5], w[2]);
#define PIXEL02_83 *(dp+2) = Interp8(w[2], w[6]);
#define PIXEL03_0 *(dp+3) = w[5];
#define PIXEL03_11 *(dp+3) = Interp1(w[5], w[2]);
#define PIXEL03_12 *(dp+3) = Interp1(w[5], w[6]);
#define PIXEL03_20 *(dp+3) = Interp2(w[5], w[2], w[6]);
#define PIXEL03_50 *(dp+3) = Interp5(w[2], w[6]);
#define PIXEL03_80 *(dp+3) = Interp8(w[5], w[3]);
#define PIXEL03_81 *(dp+3) = Interp8(w[5], w[2]);
#define PIXEL03_82 *(dp+3) = Interp8(w[5], w[6]);
#define PIXEL10_0 *(dp+dpL) = w[5];
#define PIXEL10_10 *(dp+dpL) = Interp1(w[5], w[1]);
#define PIXEL10_11 *(dp+dpL) = Interp1(w[5], w[4]);
#define PIXEL10_13 *(dp+dpL) = Interp1(w[4], w[5]);
#define PIXEL10_21 *(dp+dpL) = Interp2(w[4], w[5], w[2]);
#define PIXEL10_32 *(dp+dpL) = Interp3(w[5], w[2]);
#define PIXEL10_50 *(dp+dpL) = Interp5(w[4], w[5]);
#define PIXEL10_60 *(dp+dpL) = Interp6(w[5], w[4], w[2]);
#define PIXEL10_61 *(dp+dpL) = Interp6(w[5], w[4], w[1]);
#define PIXEL10_81 *(dp+dpL) = Interp8(w[5], w[4]);
#define PIXEL10_83 *(dp+dpL) = Interp8(w[4], w[2]);
#define PIXEL11_0 *(dp+dpL+1) = w[5];
#define PIXEL11_30 *(dp+dpL+1) = Interp3(w[5], w[1]);
#define PIXEL11_31 *(dp+dpL+1) = Interp3(w[5], w[4]);
#define PIXEL11_32 *(dp+dpL+1) = Interp3(w[5], w[2]);
#define PIXEL11_70 *(dp+dpL+1) = Interp7(w[5], w[4], w[2]);
#define PIXEL12_0 *(dp+dpL+2) = w[5];
#define PIXEL12_30 *(dp+dpL+2) = Interp3(w[5], w[3]);
#define PIXEL12_31 *(dp+dpL+2) = Interp3(w[5], w[2]);
#define PIXEL12_32 *(dp+dpL+2) = Interp3(w[5], w[6]);
#define PIXEL12_70 *(dp+dpL+2) = Interp7(w[5], w[6], w[2]);
#define PIXEL13_0 *(dp+dpL+3) = w[5];
#define PIXEL13_10 *(dp+dpL+3) = Interp1(w[5], w[3]);
#define PIXEL13_12 *(dp+dpL+3) = Interp1(w[5], w[6]);
#define PIXEL13_14 *(dp+dpL+3) = Interp1(w[6], w[5]);
#define PIXEL13_21 *(dp+dpL+3) = Interp2(w[6], w[5], w[2]);
#define PIXEL13_31 *(dp+dpL+3) = Interp3(w[5], w[2]);
#define PIXEL13_50 *(dp+dpL+3) = Interp5(w[6], w[5]);
#define PIXEL13_60 *(dp+dpL+3) = Interp6(w[5], w[6], w[2]);
#define PIXEL13_61 *(dp+dpL+3) = Interp6(w[5], w[6], w[3]);
#define PIXEL13_82 *(dp+dpL+3) = Interp8(w[5], w[6]);
#define PIXEL13_83 *(dp+dpL+3) = Interp8(w[6], w[2]);
#define PIXEL20_0 *(dp+dpL+dpL) = w[5];
#define PIXEL20_10 *(dp+dpL+dpL) = Interp1(w[5], w[7]);
#define PIXEL20_12 *(dp+dpL+dpL) = Interp1(w[5], w[4]);
#define PIXEL20_14 *(dp+dpL+dpL) = Interp1(w[4], w[5]);
#define PIXEL20_21 *(dp+dpL+dpL) = Interp2(w[4], w[5], w[8]);
#define PIXEL20_31 *(dp+dpL+dpL) = Interp3(w[5], w[8]);
#define PIXEL20_50 *(dp+dpL+dpL) = Interp5(w[4], w[5]);
#define PIXEL20_60 *(dp+dpL+dpL) = Interp6(w[5], w[4], w[8]);
#define PIXEL20_61 *(dp+dpL+dpL) = Interp6(w[5], w[4], w[7]);
#define PIXEL20_82 *(dp+dpL+dpL) = Interp8(w[5], w[4]);
#define PIXEL20_83 *(dp+dpL+dpL) = Interp8(w[4], w[8]);
#define PIXEL21_0 *(dp+dpL+dpL+1) = w[5];
#define PIXEL21_30 *(dp+dpL+dpL+1) = Interp3(w[5], w[7]);
#define PIXEL21_31 *(dp+dpL+dpL+1) = Interp3(w[5], w[8]);
#define PIXEL21_32 *(dp+dpL+dpL+1) = Interp3(w[5], w[4]);
#define PIXEL21_70 *(dp+dpL+dpL+1) = Interp7(w[5], w[4], w[8]);
#define PIXEL22_0 *(dp+dpL+dpL+2) = w[5];
#define PIXEL22_30 *(dp+dpL+dpL+2) = Interp3(w[5], w[9]);
#define PIXEL22_31 *(dp+dpL+dpL+2) = Interp3(w[5], w[6]);
#define PIXEL22_32 *(dp+dpL+dpL+2) = Interp3(w[5], w[8]);
#define PIXEL22_70 *(dp+dpL+dpL+2) = Interp7(w[5], w[6], w[8]);
#define PIXEL23_0 *(dp+dpL+dpL+3) = w[5];
#define PIXEL23_10 *(dp+dpL+dpL+3) = Interp1(w[5], w[9]);
#define PIXEL23_11 *(dp+dpL+dpL+3) = Interp1(w[5], w[6]);
#define PIXEL23_13 *(dp+dpL+dpL+3) = Interp1(w[6], w[5]);
#define PIXEL23_21 *(dp+dpL+dpL+3) = Interp2(w[6], w[5], w[8]);
#define PIXEL23_32 *(dp+dpL+dpL+3) = Interp3(w[5], w[8]);
#define PIXEL23_50 *(dp+dpL+dpL+3) = Interp5(w[6], w[5]);
#define PIXEL23_60 *(dp+dpL+dpL+3) = Interp6(w[5], w[6], w[8]);
#define PIXEL23_61 *(dp+dpL+dpL+3) = Interp6(w[5], w[6], w[9]);
#define PIXEL23_81 *(dp+dpL+dpL+3) = Interp8(w[5], w[6]);
#define PIXEL23_83 *(dp+dpL+dpL+3) = Interp8(w[6], w[8]);
#define PIXEL30_0 *(dp+dpL+dpL+dpL) = w[5];
#define PIXEL30_11 *(dp+dpL+dpL+dpL) = Interp1(w[5], w[8]);
#define PIXEL30_12 *(dp+dpL+dpL+dpL) = Interp1(w[5], w[4]);
#define PIXEL30_20 *(dp+dpL+dpL+dpL) = Interp2(w[5], w[8], w[4]);
#define PIXEL30_50 *(dp+dpL+dpL+dpL) = Interp5(w[8], w[4]);
#define PIXEL30_80 *(dp+dpL+dpL+dpL) = Interp8(w[5], w[7]);
#define PIXEL30_81 *(dp+dpL+dpL+dpL) = Interp8(w[5], w[8]);
#define PIXEL30_82 *(dp+dpL+dpL+dpL) = Interp8(w[5], w[4]);
#define PIXEL31_0 *(dp+dpL+dpL+dpL+1) = w[5];
#define PIXEL31_10 *(dp+dpL+dpL+dpL+1) = Interp1(w[5], w[7]);
#define PIXEL31_11 *(dp+dpL+dpL+dpL+1) = Interp1(w[5], w[8]);
#define PIXEL31_13 *(dp+dpL+dpL+dpL+1) = Interp1(w[8], w[5]);
#define PIXEL31_21 *(dp+dpL+dpL+dpL+1) = Interp2(w[8], w[5], w[4]);
#define PIXEL31_32 *(dp+dpL+dpL+dpL+1) = Interp3(w[5], w[4]);
#define PIXEL31_50 *(dp+dpL+dpL+dpL+1) = Interp5(w[8], w[5]);
#define PIXEL31_60 *(dp+dpL+dpL+dpL+1) = Interp6(w[5], w[8], w[4]);
#define PIXEL31_61 *(dp+dpL+dpL+dpL+1) = Interp6(w[5], w[8], w[7]);
#define PIXEL31_81 *(dp+dpL+dpL+dpL+1) = Interp8(w[5], w[8]);
#define PIXEL31_83 *(dp+dpL+dpL+dpL+1) = Interp8(w[8], w[4]);
#define PIXEL32_0 *(dp+dpL+dpL+dpL+2) = w[5];
#define PIXEL32_10 *(dp+dpL+dpL+dpL+2) = Interp1(w[5], w[9]);
#define PIXEL32_12 *(dp+dpL+dpL+dpL+2) = Interp1(w[5], w[8]);
#define PIXEL32_14 *(dp+dpL+dpL+dpL+2) = Interp1(w[8], w[5]);
#define PIXEL32_21 *(dp+dpL+dpL+dpL+2) = Interp2(w[8], w[5], w[6]);
#define PIXEL32_31 *(dp+dpL+dpL+dpL+2) = Interp3(w[5], w[6]);
#define PIXEL32_50 *(dp+dpL+dpL+dpL+2) = Interp5(w[8], w[5]);
#define PIXEL32_60 *(dp+dpL+dpL+dpL+2) = Interp6(w[5], w[8], w[6]);
#define PIXEL32_61 *(dp+dpL+dpL+dpL+2) = Interp6(w[5], w[8], w[9]);
#define PIXEL32_82 *(dp+dpL+dpL+dpL+2) = Interp8(w[5], w[8]);
#define PIXEL32_83 *(dp+dpL+dpL+dpL+2) = Interp8(w[8], w[6]);
#define PIXEL33_0 *(dp+dpL+dpL+dpL+3) = w[5];
#define PIXEL33_11 *(dp+dpL+dpL+dpL+3) = Interp1(w[5], w[6]);
#define PIXEL33_12 *(dp+dpL+dpL+dpL+3) = Interp1(w[5], w[8]);
#define PIXEL33_20 *(dp+dpL+dpL+dpL+3) = Interp2(w[5], w[8], w[6]);
#define PIXEL33_50 *(dp+dpL+dpL+dpL+3) = Interp5(w[8], w[6]);
#define PIXEL33_80 *(dp+dpL+dpL+dpL+3) = Interp8(w[5], w[9]);
#define PIXEL33_81 *(dp+dpL+dpL+dpL+3) = Interp8(w[5], w[6]);
#define PIXEL33_82 *(dp+dpL+dpL+dpL+3) = Interp8(w[5], w[8]);
int LUT16to32[65536*2];
int RGBtoYUV[65536*2];
static const __int64 reg_blank = 0;
static const __int64 const3 = 0x0003000300030003;
static const __int64 const5 = 0x0005000500050005;
static const __int64 const6 = 0x0006000600060006;
static const __int64 const7 = 0x0007000700070007;
static const __int64 treshold = 0x0000000000300706;
inline void Interp1(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1*3+c2)/4;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
pmullw mm1, const3
paddw mm1, mm2
psrlw mm1, 2
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp2(unsigned char * pc, int c1, int c2, int c3)
{
// *((int*)pc) = (c1*2+c2+c3)/4;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
psllw mm1, 1
paddw mm1, mm2
paddw mm1, mm3
psrlw mm1, 2
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp3(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1*7+c2)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
pmullw mm1, const7
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp5(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1+c2)/2;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
paddw mm1, mm2
psrlw mm1, 1
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp6(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*5+c2*2+c3)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
pmullw mm1, const5
psllw mm2, 1
paddw mm1, mm3
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp7(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*6+c2+c3)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
movd mm3, c3
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
punpcklbw mm3, reg_blank
pmullw mm1, const6
paddw mm2, mm3
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
inline void Interp8(unsigned char * pc, int c1, int c2)
{
//*((int*)pc) = (c1*5+c2*3)/8;
__asm
{
mov eax, pc
movd mm1, c1
movd mm2, c2
punpcklbw mm1, reg_blank
punpcklbw mm2, reg_blank
pmullw mm1, const5
pmullw mm2, const3
paddw mm1, mm2
psrlw mm1, 3
packuswb mm1, reg_blank
movd [eax], mm1
}
}
#define PIXEL00_0 *((int*)(pOut)) = c[5];
#define PIXEL00_11 Interp1(pOut, c[5], c[4]);
#define PIXEL00_12 Interp1(pOut, c[5], c[2]);
#define PIXEL00_20 Interp2(pOut, c[5], c[2], c[4]);
#define PIXEL00_50 Interp5(pOut, c[2], c[4]);
#define PIXEL00_80 Interp8(pOut, c[5], c[1]);
#define PIXEL00_81 Interp8(pOut, c[5], c[4]);
#define PIXEL00_82 Interp8(pOut, c[5], c[2]);
#define PIXEL01_0 *((int*)(pOut+4)) = c[5];
#define PIXEL01_10 Interp1(pOut+4, c[5], c[1]);
#define PIXEL01_12 Interp1(pOut+4, c[5], c[2]);
#define PIXEL01_14 Interp1(pOut+4, c[2], c[5]);
#define PIXEL01_21 Interp2(pOut+4, c[2], c[5], c[4]);
#define PIXEL01_31 Interp3(pOut+4, c[5], c[4]);
#define PIXEL01_50 Interp5(pOut+4, c[2], c[5]);
#define PIXEL01_60 Interp6(pOut+4, c[5], c[2], c[4]);
#define PIXEL01_61 Interp6(pOut+4, c[5], c[2], c[1]);
#define PIXEL01_82 Interp8(pOut+4, c[5], c[2]);
#define PIXEL01_83 Interp8(pOut+4, c[2], c[4]);
#define PIXEL02_0 *((int*)(pOut+8)) = c[5];
#define PIXEL02_10 Interp1(pOut+8, c[5], c[3]);
#define PIXEL02_11 Interp1(pOut+8, c[5], c[2]);
#define PIXEL02_13 Interp1(pOut+8, c[2], c[5]);
#define PIXEL02_21 Interp2(pOut+8, c[2], c[5], c[6]);
#define PIXEL02_32 Interp3(pOut+8, c[5], c[6]);
#define PIXEL02_50 Interp5(pOut+8, c[2], c[5]);
#define PIXEL02_60 Interp6(pOut+8, c[5], c[2], c[6]);
#define PIXEL02_61 Interp6(pOut+8, c[5], c[2], c[3]);
#define PIXEL02_81 Interp8(pOut+8, c[5], c[2]);
#define PIXEL02_83 Interp8(pOut+8, c[2], c[6]);
#define PIXEL03_0 *((int*)(pOut+12)) = c[5];
#define PIXEL03_11 Interp1(pOut+12, c[5], c[2]);
#define PIXEL03_12 Interp1(pOut+12, c[5], c[6]);
#define PIXEL03_20 Interp2(pOut+12, c[5], c[2], c[6]);
#define PIXEL03_50 Interp5(pOut+12, c[2], c[6]);
#define PIXEL03_80 Interp8(pOut+12, c[5], c[3]);
#define PIXEL03_81 Interp8(pOut+12, c[5], c[2]);
#define PIXEL03_82 Interp8(pOut+12, c[5], c[6]);
#define PIXEL10_0 *((int*)(pOut+BpL)) = c[5];
#define PIXEL10_10 Interp1(pOut+BpL, c[5], c[1]);
#define PIXEL10_11 Interp1(pOut+BpL, c[5], c[4]);
#define PIXEL10_13 Interp1(pOut+BpL, c[4], c[5]);
#define PIXEL10_21 Interp2(pOut+BpL, c[4], c[5], c[2]);
#define PIXEL10_32 Interp3(pOut+BpL, c[5], c[2]);
#define PIXEL10_50 Interp5(pOut+BpL, c[4], c[5]);
#define PIXEL10_60 Interp6(pOut+BpL, c[5], c[4], c[2]);
#define PIXEL10_61 Interp6(pOut+BpL, c[5], c[4], c[1]);
#define PIXEL10_81 Interp8(pOut+BpL, c[5], c[4]);
#define PIXEL10_83 Interp8(pOut+BpL, c[4], c[2]);
#define PIXEL11_0 *((int*)(pOut+BpL+4)) = c[5];
#define PIXEL11_30 Interp3(pOut+BpL+4, c[5], c[1]);
#define PIXEL11_31 Interp3(pOut+BpL+4, c[5], c[4]);
#define PIXEL11_32 Interp3(pOut+BpL+4, c[5], c[2]);
#define PIXEL11_70 Interp7(pOut+BpL+4, c[5], c[4], c[2]);
#define PIXEL12_0 *((int*)(pOut+BpL+8)) = c[5];
#define PIXEL12_30 Interp3(pOut+BpL+8, c[5], c[3]);
#define PIXEL12_31 Interp3(pOut+BpL+8, c[5], c[2]);
#define PIXEL12_32 Interp3(pOut+BpL+8, c[5], c[6]);
#define PIXEL12_70 Interp7(pOut+BpL+8, c[5], c[6], c[2]);
#define PIXEL13_0 *((int*)(pOut+BpL+12)) = c[5];
#define PIXEL13_10 Interp1(pOut+BpL+12, c[5], c[3]);
#define PIXEL13_12 Interp1(pOut+BpL+12, c[5], c[6]);
#define PIXEL13_14 Interp1(pOut+BpL+12, c[6], c[5]);
#define PIXEL13_21 Interp2(pOut+BpL+12, c[6], c[5], c[2]);
#define PIXEL13_31 Interp3(pOut+BpL+12, c[5], c[2]);
#define PIXEL13_50 Interp5(pOut+BpL+12, c[6], c[5]);
#define PIXEL13_60 Interp6(pOut+BpL+12, c[5], c[6], c[2]);
#define PIXEL13_61 Interp6(pOut+BpL+12, c[5], c[6], c[3]);
#define PIXEL13_82 Interp8(pOut+BpL+12, c[5], c[6]);
#define PIXEL13_83 Interp8(pOut+BpL+12, c[6], c[2]);
#define PIXEL20_0 *((int*)(pOut+BpL+BpL)) = c[5];
#define PIXEL20_10 Interp1(pOut+BpL+BpL, c[5], c[7]);
#define PIXEL20_12 Interp1(pOut+BpL+BpL, c[5], c[4]);
#define PIXEL20_14 Interp1(pOut+BpL+BpL, c[4], c[5]);
#define PIXEL20_21 Interp2(pOut+BpL+BpL, c[4], c[5], c[8]);
#define PIXEL20_31 Interp3(pOut+BpL+BpL, c[5], c[8]);
#define PIXEL20_50 Interp5(pOut+BpL+BpL, c[4], c[5]);
#define PIXEL20_60 Interp6(pOut+BpL+BpL, c[5], c[4], c[8]);
#define PIXEL20_61 Interp6(pOut+BpL+BpL, c[5], c[4], c[7]);
#define PIXEL20_82 Interp8(pOut+BpL+BpL, c[5], c[4]);
#define PIXEL20_83 Interp8(pOut+BpL+BpL, c[4], c[8]);
#define PIXEL21_0 *((int*)(pOut+BpL+BpL+4)) = c[5];
#define PIXEL21_30 Interp3(pOut+BpL+BpL+4, c[5], c[7]);
#define PIXEL21_31 Interp3(pOut+BpL+BpL+4, c[5], c[8]);
#define PIXEL21_32 Interp3(pOut+BpL+BpL+4, c[5], c[4]);
#define PIXEL21_70 Interp7(pOut+BpL+BpL+4, c[5], c[4], c[8]);
#define PIXEL22_0 *((int*)(pOut+BpL+BpL+8)) = c[5];
#define PIXEL22_30 Interp3(pOut+BpL+BpL+8, c[5], c[9]);
#define PIXEL22_31 Interp3(pOut+BpL+BpL+8, c[5], c[6]);
#define PIXEL22_32 Interp3(pOut+BpL+BpL+8, c[5], c[8]);
#define PIXEL22_70 Interp7(pOut+BpL+BpL+8, c[5], c[6], c[8]);
#define PIXEL23_0 *((int*)(pOut+BpL+BpL+12)) = c[5];
#define PIXEL23_10 Interp1(pOut+BpL+BpL+12, c[5], c[9]);
#define PIXEL23_11 Interp1(pOut+BpL+BpL+12, c[5], c[6]);
#define PIXEL23_13 Interp1(pOut+BpL+BpL+12, c[6], c[5]);
#define PIXEL23_21 Interp2(pOut+BpL+BpL+12, c[6], c[5], c[8]);
#define PIXEL23_32 Interp3(pOut+BpL+BpL+12, c[5], c[8]);
#define PIXEL23_50 Interp5(pOut+BpL+BpL+12, c[6], c[5]);
#define PIXEL23_60 Interp6(pOut+BpL+BpL+12, c[5], c[6], c[8]);
#define PIXEL23_61 Interp6(pOut+BpL+BpL+12, c[5], c[6], c[9]);
#define PIXEL23_81 Interp8(pOut+BpL+BpL+12, c[5], c[6]);
#define PIXEL23_83 Interp8(pOut+BpL+BpL+12, c[6], c[8]);
#define PIXEL30_0 *((int*)(pOut+BpL+BpL+BpL)) = c[5];
#define PIXEL30_11 Interp1(pOut+BpL+BpL+BpL, c[5], c[8]);
#define PIXEL30_12 Interp1(pOut+BpL+BpL+BpL, c[5], c[4]);
#define PIXEL30_20 Interp2(pOut+BpL+BpL+BpL, c[5], c[8], c[4]);
#define PIXEL30_50 Interp5(pOut+BpL+BpL+BpL, c[8], c[4]);
#define PIXEL30_80 Interp8(pOut+BpL+BpL+BpL, c[5], c[7]);
#define PIXEL30_81 Interp8(pOut+BpL+BpL+BpL, c[5], c[8]);
#define PIXEL30_82 Interp8(pOut+BpL+BpL+BpL, c[5], c[4]);
#define PIXEL31_0 *((int*)(pOut+BpL+BpL+BpL+4)) = c[5];
#define PIXEL31_10 Interp1(pOut+BpL+BpL+BpL+4, c[5], c[7]);
#define PIXEL31_11 Interp1(pOut+BpL+BpL+BpL+4, c[5], c[8]);
#define PIXEL31_13 Interp1(pOut+BpL+BpL+BpL+4, c[8], c[5]);
#define PIXEL31_21 Interp2(pOut+BpL+BpL+BpL+4, c[8], c[5], c[4]);
#define PIXEL31_32 Interp3(pOut+BpL+BpL+BpL+4, c[5], c[4]);
#define PIXEL31_50 Interp5(pOut+BpL+BpL+BpL+4, c[8], c[5]);
#define PIXEL31_60 Interp6(pOut+BpL+BpL+BpL+4, c[5], c[8], c[4]);
#define PIXEL31_61 Interp6(pOut+BpL+BpL+BpL+4, c[5], c[8], c[7]);
#define PIXEL31_81 Interp8(pOut+BpL+BpL+BpL+4, c[5], c[8]);
#define PIXEL31_83 Interp8(pOut+BpL+BpL+BpL+4, c[8], c[4]);
#define PIXEL32_0 *((int*)(pOut+BpL+BpL+BpL+8)) = c[5];
#define PIXEL32_10 Interp1(pOut+BpL+BpL+BpL+8, c[5], c[9]);
#define PIXEL32_12 Interp1(pOut+BpL+BpL+BpL+8, c[5], c[8]);
#define PIXEL32_14 Interp1(pOut+BpL+BpL+BpL+8, c[8], c[5]);
#define PIXEL32_21 Interp2(pOut+BpL+BpL+BpL+8, c[8], c[5], c[6]);
#define PIXEL32_31 Interp3(pOut+BpL+BpL+BpL+8, c[5], c[6]);
#define PIXEL32_50 Interp5(pOut+BpL+BpL+BpL+8, c[8], c[5]);
#define PIXEL32_60 Interp6(pOut+BpL+BpL+BpL+8, c[5], c[8], c[6]);
#define PIXEL32_61 Interp6(pOut+BpL+BpL+BpL+8, c[5], c[8], c[9]);
#define PIXEL32_82 Interp8(pOut+BpL+BpL+BpL+8, c[5], c[8]);
#define PIXEL32_83 Interp8(pOut+BpL+BpL+BpL+8, c[8], c[6]);
#define PIXEL33_0 *((int*)(pOut+BpL+BpL+BpL+12)) = c[5];
#define PIXEL33_11 Interp1(pOut+BpL+BpL+BpL+12, c[5], c[6]);
#define PIXEL33_12 Interp1(pOut+BpL+BpL+BpL+12, c[5], c[8]);
#define PIXEL33_20 Interp2(pOut+BpL+BpL+BpL+12, c[5], c[8], c[6]);
#define PIXEL33_50 Interp5(pOut+BpL+BpL+BpL+12, c[8], c[6]);
#define PIXEL33_80 Interp8(pOut+BpL+BpL+BpL+12, c[5], c[9]);
#define PIXEL33_81 Interp8(pOut+BpL+BpL+BpL+12, c[5], c[6]);
#define PIXEL33_82 Interp8(pOut+BpL+BpL+BpL+12, c[5], c[8]);
#pragma warning(disable: 4035)
int Diff(unsigned int w5, unsigned int w1)
{
__asm
{
xor eax,eax
mov ebx,w5
mov edx,w1
cmp ebx,edx
je FIN
mov ecx,offset RGBtoYUV
movd mm1,[ecx + ebx*4]
movq mm5,mm1
movd mm2,[ecx + edx*4]
psubusb mm1,mm2
psubusb mm2,mm5
por mm1,mm2
psubusb mm1,treshold
movd eax,mm1
FIN:
}
}
// returns result in eax register
#pragma warning(default: 4035)
void DLL hq4x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
HQX_API void HQX_CALLCONV hq4x_32_rb( uint32_t * sp, uint32_t srb, uint32_t * dp, uint32_t drb, int Xres, int Yres )
{
int i, j, k;
int w[10];
int c[10];
int prevline, nextline;
uint32_t w[10];
int dpL = (drb >> 2);
int spL = (srb >> 2);
uint8_t *sRowP = (uint8_t *) sp;
uint8_t *dRowP = (uint8_t *) dp;
uint32_t yuv1, yuv2;
// +----+----+----+
// | | | |
@ -358,75 +187,58 @@ void DLL hq4x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
for (j=0; j<Yres; j++)
{
if (j>0) prevline = -spL; else prevline = 0;
if (j<Yres-1) nextline = spL; else nextline = 0;
for (i=0; i<Xres; i++)
{
if (j == 0)
w[2] = *(sp + prevline);
w[5] = *sp;
w[8] = *(sp + nextline);
if (i>0)
{
w[1] = 0;
w[2] = 0;
w[3] = 0;
w[1] = *(sp + prevline - 1);
w[4] = *(sp - 1);
w[7] = *(sp + nextline - 1);
}
else
{
if (i > 0)
w[1] = *(pIn - Xres - 1);
else
w[1] = 0;
w[2] = *(pIn - Xres);
if (i < Xres - 1)
w[3] = *(pIn - Xres + 1);
else
w[3] = 0;
w[1] = w[2];
w[4] = w[5];
w[7] = w[8];
}
if (i > 0)
w[4] = *(pIn - 1);
else
w[4] = 0;
w[5] = *(pIn);
if (i<Xres-1)
w[6] = *(pIn + 1);
else
w[6] = 0;
if (j == Yres - 1)
{
w[7] = 0;
w[8] = 0;
w[9] = 0;
w[3] = *(sp + prevline + 1);
w[6] = *(sp + 1);
w[9] = *(sp + nextline + 1);
}
else
{
if (i > 0)
w[7] = *(pIn + Xres - 1);
else
w[7] = 0;
w[8] = *(pIn + Xres);
if (i < Xres-1)
w[9] = *(pIn + Xres + 1);
else
w[9] = 0;
w[3] = w[2];
w[6] = w[5];
w[9] = w[8];
}
int pattern = 0;
int flag = 1;
if ( Diff(w[5],w[1]) ) pattern |= 0x0001;
if ( Diff(w[5],w[2]) ) pattern |= 0x0002;
if ( Diff(w[5],w[3]) ) pattern |= 0x0004;
if ( Diff(w[5],w[4]) ) pattern |= 0x0008;
if ( Diff(w[5],w[6]) ) pattern |= 0x0010;
if ( Diff(w[5],w[7]) ) pattern |= 0x0020;
if ( Diff(w[5],w[8]) ) pattern |= 0x0040;
if ( Diff(w[5],w[9]) ) pattern |= 0x0080;
yuv1 = rgb_to_yuv(w[5]);
for (k=1; k<=9; k++)
c[k] = LUT16to32[w[k]];
{
if (k==5) continue;
if ( w[k] != w[5] )
{
yuv2 = rgb_to_yuv(w[k]);
if (yuv_diff(yuv1, yuv2))
pattern |= flag;
}
flag <<= 1;
}
switch (pattern)
{
@ -5402,68 +5214,20 @@ void DLL hq4x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL )
break;
}
}
pIn++; // next source pixel (just increment since it's an int*)
pOut += 16; // skip 4 pixels (4 bytes * 4 pixels)
}
pOut += BpL; // skip next 3 rows
pOut += BpL;
pOut += BpL;
}
__asm emms
sp++;
dp += 4;
}
void DLL InitLUTs()
sRowP += srb;
sp = (uint32_t *) sRowP;
dRowP += drb * 4;
dp = (uint32_t *) dRowP;
}
}
HQX_API void HQX_CALLCONV hq4x_32( uint32_t * sp, uint32_t * dp, int Xres, int Yres )
{
int i, j, k, r, g, b, Y, u, v;
#if 0 // colorOutlines() after hqresize
for (i=0; i<65536; i++)
LUT16to32[i] = 0x00404040;
for (i=0; i<65536; i++)
LUT16to32[i+65536] = 0xFF000000 + ((i & 0xF800) << 8) + ((i & 0x07E0) << 5) + ((i & 0x001F) << 3);
#else // colorOutlines() before hqresize
for (i=0; i<65536; i++)
LUT16to32[i] = ((i & 0xF800) << 8) + ((i & 0x07E0) << 5) + ((i & 0x001F) << 3);
for (i=0; i<65536; i++)
LUT16to32[i+65536] = 0xFF000000 + LUT16to32[i];
#endif
for (i=0; i<65536; i++)
RGBtoYUV[i] = 0xFF000000;
for (i=0; i<32; i++)
for (j=0; j<64; j++)
for (k=0; k<32; k++)
{
r = i << 3;
g = j << 2;
b = k << 3;
Y = (r + g + b) >> 2;
u = 128 + ((r - b) >> 2);
v = 128 + ((-r + 2*g -b)>>3);
RGBtoYUV[ 65536 + (i << 11) + (j << 5) + k ] = (Y<<16) + (u<<8) + v;
uint32_t rowBytesL = Xres * 4;
hq4x_32_rb(sp, rowBytesL, dp, rowBytesL * 4, Xres, Yres);
}
}
/*
int DLL hq4x_32 ( CImage &ImageIn, CImage &ImageOut )
{
if ( ImageIn.Convert32To17() != 0 )
{
printf( "ERROR: conversion to 17 bit failed\n" );
return 1;
}
if ( ImageOut.Init( ImageIn.m_Xres*4, ImageIn.m_Yres*4, 32 ) != 0 )
{
printf( "ERROR: ImageOut.Init()\n" );
return 1;
};
InitLUTs();
hq4x_32( (int*)ImageIn.m_pBitmap, ImageOut.m_pBitmap, ImageIn.m_Xres, ImageIn.m_Yres, ImageOut.m_Xres*4 );
printf( "\nOK\n" );
return 0;
}
*/

View File

@ -1,35 +0,0 @@
//hqnx filter library
//----------------------------------------------------------
//Copyright (C) 2003 MaxSt ( maxst@hiend3d.com )
//Copyright (C) 2009 Benjamin Berkels
//
//This program is free software; you can redistribute it and/or
//modify it under the terms of the GNU Lesser General Public
//License as published by the Free Software Foundation; either
//version 2.1 of the License, or (at your option) any later version.
//
//This program is distributed in the hope that it will be useful,
//but WITHOUT ANY WARRANTY; without even the implied warranty of
//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
//Lesser General Public License for more details.
//
//You should have received a copy of the GNU Lesser General Public
//License along with this program; if not, write to the Free Software
//Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#ifndef __HQNX_H__
#define __HQNX_H__
#pragma warning(disable:4799)
#include "Image.h"
void DLL hq2x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL );
void DLL hq3x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL );
void DLL hq4x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL );
int DLL hq4x_32 ( CImage &ImageIn, CImage &ImageOut );
void DLL InitLUTs();
#endif //__HQNX_H__

55
src/gl/hqnx/hqx.h Normal file
View File

@ -0,0 +1,55 @@
/*
* Copyright (C) 2003 Maxim Stepin ( maxst@hiend3d.com )
*
* Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __HQX_H_
#define __HQX_H_
#include <stdint.h>
#if defined( __GNUC__ )
#ifdef __MINGW32__
#define HQX_CALLCONV __stdcall
#else
#define HQX_CALLCONV
#endif
#else
#define HQX_CALLCONV
#endif
#if defined(_WIN32)
#ifdef DLL_EXPORT
#define HQX_API __declspec(dllexport)
#else
#define HQX_API __declspec(dllimport)
#endif
#else
#define HQX_API
#endif
HQX_API void HQX_CALLCONV hqxInit(void);
HQX_API void HQX_CALLCONV hq2x_32( uint32_t * src, uint32_t * dest, int width, int height );
HQX_API void HQX_CALLCONV hq3x_32( uint32_t * src, uint32_t * dest, int width, int height );
HQX_API void HQX_CALLCONV hq4x_32( uint32_t * src, uint32_t * dest, int width, int height );
HQX_API void HQX_CALLCONV hq2x_32_rb( uint32_t * src, uint32_t src_rowBytes, uint32_t * dest, uint32_t dest_rowBytes, int width, int height );
HQX_API void HQX_CALLCONV hq3x_32_rb( uint32_t * src, uint32_t src_rowBytes, uint32_t * dest, uint32_t dest_rowBytes, int width, int height );
HQX_API void HQX_CALLCONV hq4x_32_rb( uint32_t * src, uint32_t src_rowBytes, uint32_t * dest, uint32_t dest_rowBytes, int width, int height );
#endif

38
src/gl/hqnx/init.cpp Normal file
View File

@ -0,0 +1,38 @@
/*
* Copyright (C) 2010 Cameron Zemek ( grom@zeminvaders.net)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdint.h>
#include "hqx.h"
uint32_t RGBtoYUV[16777216];
uint32_t YUV1, YUV2;
HQX_API void HQX_CALLCONV hqxInit(void)
{
/* Initalize RGB to YUV lookup table */
uint32_t c, r, g, b, y, u, v;
for (c = 0; c < 16777215; c++) {
r = (c & 0xFF0000) >> 16;
g = (c & 0x00FF00) >> 8;
b = c & 0x0000FF;
y = (uint32_t)(0.299*r + 0.587*g + 0.114*b);
u = (uint32_t)(-0.169*r - 0.331*g + 0.5*b) + 128;
v = (uint32_t)(0.5*r - 0.419*g - 0.081*b) + 128;
RGBtoYUV[c] = (y << 16) + (u << 8) + v;
}
}

View File

@ -59,21 +59,6 @@ CUSTOM_CVAR (Float, vid_contrast, 1.f, CVAR_ARCHIVE|CVAR_GLOBALCONFIG)
// when they are actually valid.
void gl_SetupMenu()
{
#ifndef _MSC_VER
FOptionValues **opt = OptionValues.CheckKey("HqResizeModes");
if (opt != NULL)
{
for(int i = (*opt)->mValues.Size()-1; i>=0; i--)
{
// Delete HQnX resize modes for non MSVC targets
if ((*opt)->mValues[i].Value >= 4.0)
{
(*opt)->mValues.Delete(i);
}
}
}
#endif
if (gl.shadermodel < 4)
{
// Radial fog and Doom lighting are not available in SM < 4 cards

View File

@ -39,18 +39,11 @@
#include "gl/renderer/gl_renderer.h"
#include "gl/textures/gl_texture.h"
#include "c_cvars.h"
// [BB] hqnx scaling is only supported with the MS compiler.
#if (defined _MSC_VER) && (!defined _WIN64)
#include "gl/hqnx/hqnx.h"
#endif
#include "gl/hqnx/hqx.h"
CUSTOM_CVAR(Int, gl_texture_hqresize, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL)
{
#ifdef _MSC_VER
if (self < 0 || self > 6)
#else
if (self < 0 || self > 3)
#endif
self = 0;
GLRenderer->FlushTextures();
}
@ -186,9 +179,7 @@ static unsigned char *scaleNxHelper( void (*scaleNxFunction) ( uint32* , uint32*
return newBuffer;
}
// [BB] hqnx scaling is only supported with the MS compiler.
#if (defined _MSC_VER) && (!defined _WIN64)
static unsigned char *hqNxHelper( void (*hqNxFunction) ( int*, unsigned char*, int, int, int ),
static unsigned char *hqNxHelper( void (*hqNxFunction) ( unsigned*, unsigned*, int, int ),
const int N,
unsigned char *inputBuffer,
const int inWidth,
@ -200,22 +191,17 @@ static unsigned char *hqNxHelper( void (*hqNxFunction) ( int*, unsigned char*, i
if (!initdone)
{
InitLUTs();
hqxInit();
initdone = true;
}
outWidth = N * inWidth;
outHeight = N *inHeight;
CImage cImageIn;
cImageIn.SetImage(inputBuffer, inWidth, inHeight, 32);
cImageIn.Convert32To17();
unsigned char * newBuffer = new unsigned char[outWidth*outHeight*4];
hqNxFunction( reinterpret_cast<int*>(cImageIn.m_pBitmap), newBuffer, cImageIn.m_Xres, cImageIn.m_Yres, outWidth*4 );
hqNxFunction( reinterpret_cast<unsigned*>(inputBuffer), reinterpret_cast<unsigned*>(newBuffer), inWidth, inHeight );
delete[] inputBuffer;
return newBuffer;
}
#endif
//===========================================================================
//
@ -263,11 +249,13 @@ unsigned char *gl_CreateUpsampledTextureBuffer ( const FTexture *inputTexture, u
outWidth = inWidth;
outHeight = inHeight;
int type = gl_texture_hqresize;
#if 0
// hqNx does not preserve the alpha channel so fall back to ScaleNx for such textures
if (hasAlpha && type > 3)
{
type -= 3;
}
#endif
switch (type)
{
@ -277,15 +265,12 @@ unsigned char *gl_CreateUpsampledTextureBuffer ( const FTexture *inputTexture, u
return scaleNxHelper( &scale3x, 3, inputBuffer, inWidth, inHeight, outWidth, outHeight );
case 3:
return scaleNxHelper( &scale4x, 4, inputBuffer, inWidth, inHeight, outWidth, outHeight );
// [BB] hqnx scaling is only supported with the MS compiler.
#if (defined _MSC_VER) && (!defined _WIN64)
case 4:
return hqNxHelper( &hq2x_32, 2, inputBuffer, inWidth, inHeight, outWidth, outHeight );
case 5:
return hqNxHelper( &hq3x_32, 3, inputBuffer, inWidth, inHeight, outWidth, outHeight );
case 6:
return hqNxHelper( &hq4x_32, 4, inputBuffer, inWidth, inHeight, outWidth, outHeight );
#endif
}
}
return inputBuffer;