//hq2x filter demo program //---------------------------------------------------------- //Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) // //This program is free software; you can redistribute it and/or //modify it under the terms of the GNU Lesser General Public //License as published by the Free Software Foundation; either //version 2.1 of the License, or (at your option) any later version. // //This program is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //Lesser General Public License for more details. // //You should have received a copy of the GNU Lesser General Public //License along with this program; if not, write to the Free Software //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "hqnx_asm.h" namespace HQnX_asm { extern int LUT16to32[65536*2]; extern int RGBtoYUV[65536*2]; static const __int64 reg_blank = 0; static const __int64 const3 = 0x0003000300030003; static const __int64 const5 = 0x0005000500050005; static const __int64 const6 = 0x0006000600060006; static const __int64 const14 = 0x000E000E000E000E; static const __int64 treshold = 0x0000000000300706; inline void Interp1(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1*3+c2)/4; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank pmullw mm1, const3 paddw mm1, mm2 psrlw mm1, 2 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp2(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*2+c2+c3) >> 2; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 movd mm3, c3 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank punpcklbw mm3, reg_blank psllw mm1, 1 paddw mm1, mm2 paddw mm1, mm3 psrlw mm1, 2 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp5(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1+c2)/2; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank paddw mm1, mm2 psrlw mm1, 1 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp6(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*5+c2*2+c3)/8; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 movd mm3, c3 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank punpcklbw mm3, reg_blank pmullw mm1, const5 psllw mm2, 1 paddw mm1, mm3 paddw mm1, mm2 psrlw mm1, 3 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp7(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*6+c2+c3)/8; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 movd mm3, c3 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank punpcklbw mm3, reg_blank pmullw mm1, const6 paddw mm2, mm3 paddw mm1, mm2 psrlw mm1, 3 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp9(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*2+(c2+c3)*3)/8; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 movd mm3, c3 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank punpcklbw mm3, reg_blank psllw mm1, 1 paddw mm2, mm3 pmullw mm2, const3 paddw mm1, mm2 psrlw mm1, 3 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp10(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*14+c2+c3)/16; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 movd mm3, c3 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank punpcklbw mm3, reg_blank pmullw mm1, const14 paddw mm2, mm3 paddw mm1, mm2 psrlw mm1, 4 packuswb mm1, reg_blank movd [eax], mm1 } } #define PIXEL00_0 *((int*)(pOut)) = c[5]; #define PIXEL00_10 Interp1(pOut, c[5], c[1]); #define PIXEL00_11 Interp1(pOut, c[5], c[4]); #define PIXEL00_12 Interp1(pOut, c[5], c[2]); #define PIXEL00_20 Interp2(pOut, c[5], c[4], c[2]); #define PIXEL00_21 Interp2(pOut, c[5], c[1], c[2]); #define PIXEL00_22 Interp2(pOut, c[5], c[1], c[4]); #define PIXEL00_60 Interp6(pOut, c[5], c[2], c[4]); #define PIXEL00_61 Interp6(pOut, c[5], c[4], c[2]); #define PIXEL00_70 Interp7(pOut, c[5], c[4], c[2]); #define PIXEL00_90 Interp9(pOut, c[5], c[4], c[2]); #define PIXEL00_100 Interp10(pOut, c[5], c[4], c[2]); #define PIXEL01_0 *((int*)(pOut+4)) = c[5]; #define PIXEL01_10 Interp1(pOut+4, c[5], c[3]); #define PIXEL01_11 Interp1(pOut+4, c[5], c[2]); #define PIXEL01_12 Interp1(pOut+4, c[5], c[6]); #define PIXEL01_20 Interp2(pOut+4, c[5], c[2], c[6]); #define PIXEL01_21 Interp2(pOut+4, c[5], c[3], c[6]); #define PIXEL01_22 Interp2(pOut+4, c[5], c[3], c[2]); #define PIXEL01_60 Interp6(pOut+4, c[5], c[6], c[2]); #define PIXEL01_61 Interp6(pOut+4, c[5], c[2], c[6]); #define PIXEL01_70 Interp7(pOut+4, c[5], c[2], c[6]); #define PIXEL01_90 Interp9(pOut+4, c[5], c[2], c[6]); #define PIXEL01_100 Interp10(pOut+4, c[5], c[2], c[6]); #define PIXEL10_0 *((int*)(pOut+BpL)) = c[5]; #define PIXEL10_10 Interp1(pOut+BpL, c[5], c[7]); #define PIXEL10_11 Interp1(pOut+BpL, c[5], c[8]); #define PIXEL10_12 Interp1(pOut+BpL, c[5], c[4]); #define PIXEL10_20 Interp2(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_21 Interp2(pOut+BpL, c[5], c[7], c[4]); #define PIXEL10_22 Interp2(pOut+BpL, c[5], c[7], c[8]); #define PIXEL10_60 Interp6(pOut+BpL, c[5], c[4], c[8]); #define PIXEL10_61 Interp6(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_70 Interp7(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_90 Interp9(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_100 Interp10(pOut+BpL, c[5], c[8], c[4]); #define PIXEL11_0 *((int*)(pOut+BpL+4)) = c[5]; #define PIXEL11_10 Interp1(pOut+BpL+4, c[5], c[9]); #define PIXEL11_11 Interp1(pOut+BpL+4, c[5], c[6]); #define PIXEL11_12 Interp1(pOut+BpL+4, c[5], c[8]); #define PIXEL11_20 Interp2(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_21 Interp2(pOut+BpL+4, c[5], c[9], c[8]); #define PIXEL11_22 Interp2(pOut+BpL+4, c[5], c[9], c[6]); #define PIXEL11_60 Interp6(pOut+BpL+4, c[5], c[8], c[6]); #define PIXEL11_61 Interp6(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_70 Interp7(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_90 Interp9(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_100 Interp10(pOut+BpL+4, c[5], c[6], c[8]); int Diff(unsigned int w5, unsigned int w1); void DLL hq2x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL ) { int i, j, k; int w[10]; unsigned int c[10]; // +----+----+----+ // | | | | // | w1 | w2 | w3 | // +----+----+----+ // | | | | // | w4 | w5 | w6 | // +----+----+----+ // | | | | // | w7 | w8 | w9 | // +----+----+----+ for (j=0; j0) { w[1] = *(pIn - Xres - 1); } else { w[1] = 0; } w[2] = *(pIn - Xres); if (i0) { w[4] = *(pIn - 1); } else { w[4] = 0; } w[5] = *(pIn); if (i0) { w[7] = *(pIn + Xres - 1); } else { w[7] = 0; } w[8] = *(pIn + Xres); if (i