//hq3x filter demo program //---------------------------------------------------------- //Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) // //This program is free software; you can redistribute it and/or //modify it under the terms of the GNU Lesser General Public //License as published by the Free Software Foundation; either //version 2.1 of the License, or (at your option) any later version. // //This program is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //Lesser General Public License for more details. // //You should have received a copy of the GNU Lesser General Public //License along with this program; if not, write to the Free Software //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "hqnx_asm.h" namespace HQnX_asm { extern int LUT16to32[65536*2]; extern int RGBtoYUV[65536*2]; static const __int64 reg_blank = 0; static const __int64 const3 = 0x0003000300030003; static const __int64 const7 = 0x0007000700070007; static const __int64 treshold = 0x0000000000300706; inline void Interp1(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1*3+c2)/4; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank pmullw mm1, const3 paddw mm1, mm2 psrlw mm1, 2 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp2(unsigned char * pc, int c1, int c2, int c3) { // *((int*)pc) = (c1*2+c2+c3)/4; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 movd mm3, c3 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank punpcklbw mm3, reg_blank psllw mm1, 1 paddw mm1, mm2 paddw mm1, mm3 psrlw mm1, 2 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp3(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1*7+c2)/8; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank pmullw mm1, const7 paddw mm1, mm2 psrlw mm1, 3 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp4(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*2+(c2+c3)*7)/16; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 movd mm3, c3 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank punpcklbw mm3, reg_blank psllw mm1, 1 paddw mm2, mm3 pmullw mm2, const7 paddw mm1, mm2 psrlw mm1, 4 packuswb mm1, reg_blank movd [eax], mm1 } } inline void Interp5(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1+c2)/2; __asm { mov eax, pc movd mm1, c1 movd mm2, c2 punpcklbw mm1, reg_blank punpcklbw mm2, reg_blank paddw mm1, mm2 psrlw mm1, 1 packuswb mm1, reg_blank movd [eax], mm1 } } #define PIXEL00_1M Interp1(pOut, c[5], c[1]); #define PIXEL00_1U Interp1(pOut, c[5], c[2]); #define PIXEL00_1L Interp1(pOut, c[5], c[4]); #define PIXEL00_2 Interp2(pOut, c[5], c[4], c[2]); #define PIXEL00_4 Interp4(pOut, c[5], c[4], c[2]); #define PIXEL00_5 Interp5(pOut, c[4], c[2]); #define PIXEL00_C *((int*)(pOut)) = c[5]; #define PIXEL01_1 Interp1(pOut+4, c[5], c[2]); #define PIXEL01_3 Interp3(pOut+4, c[5], c[2]); #define PIXEL01_6 Interp1(pOut+4, c[2], c[5]); #define PIXEL01_C *((int*)(pOut+4)) = c[5]; #define PIXEL02_1M Interp1(pOut+8, c[5], c[3]); #define PIXEL02_1U Interp1(pOut+8, c[5], c[2]); #define PIXEL02_1R Interp1(pOut+8, c[5], c[6]); #define PIXEL02_2 Interp2(pOut+8, c[5], c[2], c[6]); #define PIXEL02_4 Interp4(pOut+8, c[5], c[2], c[6]); #define PIXEL02_5 Interp5(pOut+8, c[2], c[6]); #define PIXEL02_C *((int*)(pOut+8)) = c[5]; #define PIXEL10_1 Interp1(pOut+BpL, c[5], c[4]); #define PIXEL10_3 Interp3(pOut+BpL, c[5], c[4]); #define PIXEL10_6 Interp1(pOut+BpL, c[4], c[5]); #define PIXEL10_C *((int*)(pOut+BpL)) = c[5]; #define PIXEL11 *((int*)(pOut+BpL+4)) = c[5]; #define PIXEL12_1 Interp1(pOut+BpL+8, c[5], c[6]); #define PIXEL12_3 Interp3(pOut+BpL+8, c[5], c[6]); #define PIXEL12_6 Interp1(pOut+BpL+8, c[6], c[5]); #define PIXEL12_C *((int*)(pOut+BpL+8)) = c[5]; #define PIXEL20_1M Interp1(pOut+BpL+BpL, c[5], c[7]); #define PIXEL20_1D Interp1(pOut+BpL+BpL, c[5], c[8]); #define PIXEL20_1L Interp1(pOut+BpL+BpL, c[5], c[4]); #define PIXEL20_2 Interp2(pOut+BpL+BpL, c[5], c[8], c[4]); #define PIXEL20_4 Interp4(pOut+BpL+BpL, c[5], c[8], c[4]); #define PIXEL20_5 Interp5(pOut+BpL+BpL, c[8], c[4]); #define PIXEL20_C *((int*)(pOut+BpL+BpL)) = c[5]; #define PIXEL21_1 Interp1(pOut+BpL+BpL+4, c[5], c[8]); #define PIXEL21_3 Interp3(pOut+BpL+BpL+4, c[5], c[8]); #define PIXEL21_6 Interp1(pOut+BpL+BpL+4, c[8], c[5]); #define PIXEL21_C *((int*)(pOut+BpL+BpL+4)) = c[5]; #define PIXEL22_1M Interp1(pOut+BpL+BpL+8, c[5], c[9]); #define PIXEL22_1D Interp1(pOut+BpL+BpL+8, c[5], c[8]); #define PIXEL22_1R Interp1(pOut+BpL+BpL+8, c[5], c[6]); #define PIXEL22_2 Interp2(pOut+BpL+BpL+8, c[5], c[6], c[8]); #define PIXEL22_4 Interp4(pOut+BpL+BpL+8, c[5], c[6], c[8]); #define PIXEL22_5 Interp5(pOut+BpL+BpL+8, c[6], c[8]); #define PIXEL22_C *((int*)(pOut+BpL+BpL+8)) = c[5]; int Diff(unsigned int w5, unsigned int w1); void DLL hq3x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL ) { int i, j, k; int w[10]; unsigned int c[10]; // +----+----+----+ // | | | | // | w1 | w2 | w3 | // +----+----+----+ // | | | | // | w4 | w5 | w6 | // +----+----+----+ // | | | | // | w7 | w8 | w9 | // +----+----+----+ for (j=0; j0) w[1] = *(pIn - Xres - 1); else w[1] = 0; w[2] = *(pIn - Xres); if (i0) w[4] = *(pIn - 1); else w[4] = 0; w[5] = *(pIn); if (i0) w[7] = *(pIn + Xres - 1); else w[7] = 0; w[8] = *(pIn + Xres); if (i