//hq3x filter demo program //---------------------------------------------------------- //Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) //Copyright (C) 2012-2014 Alexey Lysiuk // //This program is free software; you can redistribute it and/or //modify it under the terms of the GNU Lesser General Public //License as published by the Free Software Foundation; either //version 2.1 of the License, or (at your option) any later version. // //This program is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //Lesser General Public License for more details. // //You should have received a copy of the GNU Lesser General Public //License along with this program; if not, write to the Free Software //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "hqnx_asm.h" namespace HQnX_asm { extern int LUT16to32[65536*2]; extern int RGBtoYUV[65536*2]; static const hq_vec const3 = hq_vec::expand(0x0003); static const hq_vec const7 = hq_vec::expand(0x0007); inline void Interp1(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1*3+c2)/4; hq_vec result = hq_vec::load(c1); result *= const3; result += hq_vec::load(c2); result >> 2; result.store(pc); } inline void Interp2(unsigned char * pc, int c1, int c2, int c3) { // *((int*)pc) = (c1*2+c2+c3)/4; hq_vec result = hq_vec::load(c1); result << 1; result += hq_vec::load(c2); result += hq_vec::load(c3); result >> 2; result.store(pc); } inline void Interp3(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1*7+c2)/8; hq_vec result = hq_vec::load(c1); result *= const7; result += hq_vec::load(c2); result >> 3; result.store(pc); } inline void Interp4(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*2+(c2+c3)*7)/16; hq_vec result = hq_vec::load(c2); result += hq_vec::load(c3); result *= const7; result += hq_vec::load(c1) << 1; result >> 4; result.store(pc); } inline void Interp5(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1+c2)/2; hq_vec result = hq_vec::load(c1); result += hq_vec::load(c2); result >> 1; result.store(pc); } #define PIXEL00_1M Interp1(pOut, c[5], c[1]); #define PIXEL00_1U Interp1(pOut, c[5], c[2]); #define PIXEL00_1L Interp1(pOut, c[5], c[4]); #define PIXEL00_2 Interp2(pOut, c[5], c[4], c[2]); #define PIXEL00_4 Interp4(pOut, c[5], c[4], c[2]); #define PIXEL00_5 Interp5(pOut, c[4], c[2]); #define PIXEL00_C *((int*)(pOut)) = c[5]; #define PIXEL01_1 Interp1(pOut+4, c[5], c[2]); #define PIXEL01_3 Interp3(pOut+4, c[5], c[2]); #define PIXEL01_6 Interp1(pOut+4, c[2], c[5]); #define PIXEL01_C *((int*)(pOut+4)) = c[5]; #define PIXEL02_1M Interp1(pOut+8, c[5], c[3]); #define PIXEL02_1U Interp1(pOut+8, c[5], c[2]); #define PIXEL02_1R Interp1(pOut+8, c[5], c[6]); #define PIXEL02_2 Interp2(pOut+8, c[5], c[2], c[6]); #define PIXEL02_4 Interp4(pOut+8, c[5], c[2], c[6]); #define PIXEL02_5 Interp5(pOut+8, c[2], c[6]); #define PIXEL02_C *((int*)(pOut+8)) = c[5]; #define PIXEL10_1 Interp1(pOut+BpL, c[5], c[4]); #define PIXEL10_3 Interp3(pOut+BpL, c[5], c[4]); #define PIXEL10_6 Interp1(pOut+BpL, c[4], c[5]); #define PIXEL10_C *((int*)(pOut+BpL)) = c[5]; #define PIXEL11 *((int*)(pOut+BpL+4)) = c[5]; #define PIXEL12_1 Interp1(pOut+BpL+8, c[5], c[6]); #define PIXEL12_3 Interp3(pOut+BpL+8, c[5], c[6]); #define PIXEL12_6 Interp1(pOut+BpL+8, c[6], c[5]); #define PIXEL12_C *((int*)(pOut+BpL+8)) = c[5]; #define PIXEL20_1M Interp1(pOut+BpL+BpL, c[5], c[7]); #define PIXEL20_1D Interp1(pOut+BpL+BpL, c[5], c[8]); #define PIXEL20_1L Interp1(pOut+BpL+BpL, c[5], c[4]); #define PIXEL20_2 Interp2(pOut+BpL+BpL, c[5], c[8], c[4]); #define PIXEL20_4 Interp4(pOut+BpL+BpL, c[5], c[8], c[4]); #define PIXEL20_5 Interp5(pOut+BpL+BpL, c[8], c[4]); #define PIXEL20_C *((int*)(pOut+BpL+BpL)) = c[5]; #define PIXEL21_1 Interp1(pOut+BpL+BpL+4, c[5], c[8]); #define PIXEL21_3 Interp3(pOut+BpL+BpL+4, c[5], c[8]); #define PIXEL21_6 Interp1(pOut+BpL+BpL+4, c[8], c[5]); #define PIXEL21_C *((int*)(pOut+BpL+BpL+4)) = c[5]; #define PIXEL22_1M Interp1(pOut+BpL+BpL+8, c[5], c[9]); #define PIXEL22_1D Interp1(pOut+BpL+BpL+8, c[5], c[8]); #define PIXEL22_1R Interp1(pOut+BpL+BpL+8, c[5], c[6]); #define PIXEL22_2 Interp2(pOut+BpL+BpL+8, c[5], c[6], c[8]); #define PIXEL22_4 Interp4(pOut+BpL+BpL+8, c[5], c[6], c[8]); #define PIXEL22_5 Interp5(pOut+BpL+BpL+8, c[6], c[8]); #define PIXEL22_C *((int*)(pOut+BpL+BpL+8)) = c[5]; bool Diff(const unsigned int, const unsigned int); void DLL hq3x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL ) { int i, j, k; int w[10]; unsigned int c[10]; // +----+----+----+ // | | | | // | w1 | w2 | w3 | // +----+----+----+ // | | | | // | w4 | w5 | w6 | // +----+----+----+ // | | | | // | w7 | w8 | w9 | // +----+----+----+ for (j=0; j0) w[1] = *(pIn - Xres - 1); else w[1] = 0; w[2] = *(pIn - Xres); if (i0) w[4] = *(pIn - 1); else w[4] = 0; w[5] = *(pIn); if (i0) w[7] = *(pIn + Xres - 1); else w[7] = 0; w[8] = *(pIn + Xres); if (i