//hq2x filter demo program //---------------------------------------------------------- //Copyright (C) 2003 MaxSt ( maxst@hiend3d.com ) //Copyright (C) 2012-2014 Alexey Lysiuk // //This program is free software; you can redistribute it and/or //modify it under the terms of the GNU Lesser General Public //License as published by the Free Software Foundation; either //version 2.1 of the License, or (at your option) any later version. // //This program is distributed in the hope that it will be useful, //but WITHOUT ANY WARRANTY; without even the implied warranty of //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //Lesser General Public License for more details. // //You should have received a copy of the GNU Lesser General Public //License along with this program; if not, write to the Free Software //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA #include "hqnx_asm.h" namespace HQnX_asm { extern int LUT16to32[65536*2]; extern int RGBtoYUV[65536*2]; static const hq_vec const3 = hq_vec::expand(0x0003); static const hq_vec const5 = hq_vec::expand(0x0005); static const hq_vec const6 = hq_vec::expand(0x0006); static const hq_vec const14 = hq_vec::expand(0x000E); inline void Interp1(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1*3+c2)/4; hq_vec result = hq_vec::load(c1); result *= const3; result += hq_vec::load(c2); result >> 2; result.store(pc); } inline void Interp2(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*2+c2+c3) >> 2; hq_vec result = hq_vec::load(c1); result << 1; result += hq_vec::load(c2); result += hq_vec::load(c3); result >> 2; result.store(pc); } inline void Interp5(unsigned char * pc, int c1, int c2) { //*((int*)pc) = (c1+c2)/2; hq_vec result = hq_vec::load(c1); result += hq_vec::load(c2); result >> 1; result.store(pc); } inline void Interp6(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*5+c2*2+c3)/8; hq_vec result = hq_vec::load(c1); result *= const5; result += hq_vec::load(c2) << 1; result += hq_vec::load(c3); result >> 3; result.store(pc); } inline void Interp7(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*6+c2+c3)/8; hq_vec result = hq_vec::load(c1); result *= const6; result += hq_vec::load(c2); result += hq_vec::load(c3); result >> 3; result.store(pc); } inline void Interp9(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*2+(c2+c3)*3)/8; hq_vec result = hq_vec::load(c2); result += hq_vec::load(c3); result *= const3; result += hq_vec::load(c1) << 1; result >> 3; result.store(pc); } inline void Interp10(unsigned char * pc, int c1, int c2, int c3) { //*((int*)pc) = (c1*14+c2+c3)/16; hq_vec result = hq_vec::load(c1); result *= const14; result += hq_vec::load(c2); result += hq_vec::load(c3); result >> 4; result.store(pc); } #define PIXEL00_0 *((int*)(pOut)) = c[5]; #define PIXEL00_10 Interp1(pOut, c[5], c[1]); #define PIXEL00_11 Interp1(pOut, c[5], c[4]); #define PIXEL00_12 Interp1(pOut, c[5], c[2]); #define PIXEL00_20 Interp2(pOut, c[5], c[4], c[2]); #define PIXEL00_21 Interp2(pOut, c[5], c[1], c[2]); #define PIXEL00_22 Interp2(pOut, c[5], c[1], c[4]); #define PIXEL00_60 Interp6(pOut, c[5], c[2], c[4]); #define PIXEL00_61 Interp6(pOut, c[5], c[4], c[2]); #define PIXEL00_70 Interp7(pOut, c[5], c[4], c[2]); #define PIXEL00_90 Interp9(pOut, c[5], c[4], c[2]); #define PIXEL00_100 Interp10(pOut, c[5], c[4], c[2]); #define PIXEL01_0 *((int*)(pOut+4)) = c[5]; #define PIXEL01_10 Interp1(pOut+4, c[5], c[3]); #define PIXEL01_11 Interp1(pOut+4, c[5], c[2]); #define PIXEL01_12 Interp1(pOut+4, c[5], c[6]); #define PIXEL01_20 Interp2(pOut+4, c[5], c[2], c[6]); #define PIXEL01_21 Interp2(pOut+4, c[5], c[3], c[6]); #define PIXEL01_22 Interp2(pOut+4, c[5], c[3], c[2]); #define PIXEL01_60 Interp6(pOut+4, c[5], c[6], c[2]); #define PIXEL01_61 Interp6(pOut+4, c[5], c[2], c[6]); #define PIXEL01_70 Interp7(pOut+4, c[5], c[2], c[6]); #define PIXEL01_90 Interp9(pOut+4, c[5], c[2], c[6]); #define PIXEL01_100 Interp10(pOut+4, c[5], c[2], c[6]); #define PIXEL10_0 *((int*)(pOut+BpL)) = c[5]; #define PIXEL10_10 Interp1(pOut+BpL, c[5], c[7]); #define PIXEL10_11 Interp1(pOut+BpL, c[5], c[8]); #define PIXEL10_12 Interp1(pOut+BpL, c[5], c[4]); #define PIXEL10_20 Interp2(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_21 Interp2(pOut+BpL, c[5], c[7], c[4]); #define PIXEL10_22 Interp2(pOut+BpL, c[5], c[7], c[8]); #define PIXEL10_60 Interp6(pOut+BpL, c[5], c[4], c[8]); #define PIXEL10_61 Interp6(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_70 Interp7(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_90 Interp9(pOut+BpL, c[5], c[8], c[4]); #define PIXEL10_100 Interp10(pOut+BpL, c[5], c[8], c[4]); #define PIXEL11_0 *((int*)(pOut+BpL+4)) = c[5]; #define PIXEL11_10 Interp1(pOut+BpL+4, c[5], c[9]); #define PIXEL11_11 Interp1(pOut+BpL+4, c[5], c[6]); #define PIXEL11_12 Interp1(pOut+BpL+4, c[5], c[8]); #define PIXEL11_20 Interp2(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_21 Interp2(pOut+BpL+4, c[5], c[9], c[8]); #define PIXEL11_22 Interp2(pOut+BpL+4, c[5], c[9], c[6]); #define PIXEL11_60 Interp6(pOut+BpL+4, c[5], c[8], c[6]); #define PIXEL11_61 Interp6(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_70 Interp7(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_90 Interp9(pOut+BpL+4, c[5], c[6], c[8]); #define PIXEL11_100 Interp10(pOut+BpL+4, c[5], c[6], c[8]); bool Diff(const unsigned int, const unsigned int); void DLL hq2x_32( int * pIn, unsigned char * pOut, int Xres, int Yres, int BpL ) { int i, j, k; int w[10]; unsigned int c[10]; // +----+----+----+ // | | | | // | w1 | w2 | w3 | // +----+----+----+ // | | | | // | w4 | w5 | w6 | // +----+----+----+ // | | | | // | w7 | w8 | w9 | // +----+----+----+ for (j=0; j0) { w[1] = *(pIn - Xres - 1); } else { w[1] = 0; } w[2] = *(pIn - Xres); if (i0) { w[4] = *(pIn - 1); } else { w[4] = 0; } w[5] = *(pIn); if (i0) { w[7] = *(pIn + Xres - 1); } else { w[7] = 0; } w[8] = *(pIn + Xres); if (i