mirror of
https://github.com/gnustep/libs-back.git
synced 2025-04-22 23:42:16 +00:00
Blitter optimizations and rounding fixes.
git-svn-id: svn+ssh://svn.gna.org/svn/gnustep/libs/back/trunk@18360 72102866-910b-0410-8b05-ffd578937521
This commit is contained in:
parent
27190e1de4
commit
2291618e83
3 changed files with 418 additions and 32 deletions
13
ChangeLog
13
ChangeLog
|
@ -1,3 +1,16 @@
|
|||
2004-01-10 16:25 Alexander Malmberg <alexander@malmberg.org>
|
||||
|
||||
* Source/art/blit.m: Replace uses DI_16_B5G5R5A1 and DI_16_B5G6R5
|
||||
with uses of DI_16_B5_G5_R5_A1 and DI_16_B5_G6_R5.
|
||||
(sover_ao): Add an optimized version for the 16/15 bpp modes.
|
||||
(satop_aa): Simplify calculation of da'.
|
||||
(datop_aa): Simplify calculation of da'. Fix the rounding.
|
||||
(xor_aa): Fix the rounding.
|
||||
(DI_16_B5_G5_R5_A1, DI_16_B5_G6_R5): Unpack pixels in a more
|
||||
efficient way.
|
||||
|
||||
* Source/art/blit_scrapheap.m: New file.
|
||||
|
||||
2004-01-07 14:51 Alexander Malmberg <alexander@malmberg.org>
|
||||
|
||||
* Source/art/composite.m (-_composite_func::::::): If the source has
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Copyright (C) 2002 Free Software Foundation, Inc.
|
||||
Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
|
||||
|
||||
Author: Alexander Malmberg <alexander@malmberg.org>
|
||||
|
||||
|
@ -47,6 +47,13 @@ still need to fix all the remaining spots
|
|||
rounding is way off (but not more than 1, hopefully) for near-boundary
|
||||
cases. at least pure black stays pure black and pure white stays pure white
|
||||
|
||||
2004-01-10: Test suite now says that all composite functions are 'correct',
|
||||
in the sense that the difference from the correct result is never larger
|
||||
than 0.5, and that's the best I can get with finite precision'. :)
|
||||
|
||||
(However, should probably check whether the results here always match the
|
||||
correctly rounded correct result.)
|
||||
|
||||
TODO: (optional?) proper gamma handling?
|
||||
|
||||
|
||||
|
@ -275,7 +282,7 @@ static void MPRE(blit_mono_a) (unsigned char *adst, unsigned char *dsta,
|
|||
nb = (b * a + nb * (255 - a) + 0xff) >> 8;
|
||||
BLEND_WRITE(dst, nr, ng, nb)
|
||||
BLEND_INC(dst)
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
BLEND_INC(dst)
|
||||
|
@ -336,7 +343,7 @@ static void MPRE(blit_subpixel) (unsigned char *adst, const unsigned char *asrc,
|
|||
|
||||
static void MPRE(run_opaque) (render_run_t *ri, int num)
|
||||
{
|
||||
#if FORMAT_HOW == DI_16_B5G5R5A1 || FORMAT_HOW == DI_16_B5G6R5
|
||||
#if FORMAT_HOW == DI_16_B5_G5_R5_A1 || FORMAT_HOW == DI_16_B5_G6_R5
|
||||
unsigned int v;
|
||||
unsigned short *dst = (unsigned short *)ri->dst;
|
||||
|
||||
|
@ -495,6 +502,141 @@ static void MPRE(sover_aa) (composite_run_t *c, int num)
|
|||
|
||||
static void MPRE(sover_ao) (composite_run_t *c, int num)
|
||||
{
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5 || FORMAT_HOW == DI_16_B5_G5_R5_A1
|
||||
BLEND_TYPE *s = (BLEND_TYPE *)c->src, *d = (BLEND_TYPE *)c->dst;
|
||||
unsigned char *src_alpha = c->srca;
|
||||
int sr, sg, sb, sa;
|
||||
#undef i386
|
||||
#ifndef i386
|
||||
int dr, dg, db;
|
||||
#endif
|
||||
|
||||
unsigned int temp;
|
||||
|
||||
for (; num; num--)
|
||||
{
|
||||
ALPHA_READ(s, src_alpha, sa)
|
||||
if (!sa)
|
||||
{
|
||||
ALPHA_INC(s, src_alpha)
|
||||
BLEND_INC(d)
|
||||
continue;
|
||||
}
|
||||
if (sa == 255)
|
||||
{
|
||||
BLEND_READ(s, sr, sg, sb)
|
||||
BLEND_WRITE(d, sr, sg, sb)
|
||||
ALPHA_INC(s, src_alpha)
|
||||
BLEND_INC(d)
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
#ifdef i386
|
||||
/*
|
||||
The basic idea here is to scale all components using one multiply,
|
||||
and to do so without losing any accuracy. To do this, we move the
|
||||
components around so we get 8 empty bits above each component. In
|
||||
a 32-bit word, we don't have enough space for this, but by moving
|
||||
green to the top 6 bits and using ix86 'mul', we'll get the top 32
|
||||
bits of the multiplication in another register and can later recombine
|
||||
the components relatively easily.
|
||||
|
||||
Mostly equivalent c (16bpp case):
|
||||
|
||||
unsigned long long int temp;
|
||||
|
||||
sa = 255-sa;
|
||||
|
||||
temp = d[0];
|
||||
temp = ((temp|(temp<<21))&0xfc00001f)|((temp&0xf800)<<2);
|
||||
temp = temp*sa;
|
||||
temp = temp+0x000000020003e01fLL;
|
||||
temp = temp>>8;
|
||||
temp = (temp&0x1f) | ((temp&0x3e000)>>2) | ((temp&0xfc000000)>>21);
|
||||
d[0]=temp + s[0];
|
||||
|
||||
|
||||
16bpp:
|
||||
original: 0000 0000 0000 0000 bbbb bggg gggr rrrr
|
||||
after unpacking: gggg gg00 0000 00bb bbb0 0000 000r rrrr
|
||||
after 'mul': .... 0000 0000 gggg gggg gggg ggbb bbbb bbbb bbbr rrrr rrrr rrrr
|
||||
|
||||
15bpp:
|
||||
original: 0000 0000 0000 0000 0bbb bbgg gggr rrrr
|
||||
after unpacking: gggg g000 0000 00bb bbb0 0000 000r rrrr
|
||||
after 'mul': .... 0000 0000 gggg gggg gggg g0bb bbbb bbbb bbbr rrrr rrrr rrrr
|
||||
|
||||
*/
|
||||
temp = d[0];
|
||||
sa = 255 - sa;
|
||||
asm (
|
||||
"movl %0,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"shll $21,%%eax\n"
|
||||
#else
|
||||
"shll $22,%%eax\n"
|
||||
#endif
|
||||
"orl %0,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"andl $0xfc00001f,%%eax\n"
|
||||
"andl $0xf800,%0\n"
|
||||
"shll $2,%0\n"
|
||||
#else
|
||||
"andl $0xf800001f,%%eax\n"
|
||||
"andl $0x7c00,%0\n"
|
||||
"shll $3,%0\n"
|
||||
#endif
|
||||
"orl %0,%%eax\n"
|
||||
"mul %2\n"
|
||||
"addl $0x0003e01f,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"addl $0x02,%%edx\n"
|
||||
"andl $0xfc,%%edx\n"
|
||||
"shll $3,%%edx\n"
|
||||
#else
|
||||
"addl $0x01,%%edx\n"
|
||||
"andl $0xf8,%%edx\n"
|
||||
"shll $2,%%edx\n"
|
||||
#endif
|
||||
"shrl $8,%%eax\n"
|
||||
"movl %%eax,%0\n"
|
||||
"andl $0x1f,%0\n"
|
||||
"orl %%edx,%0\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"shrl $2,%%eax\n"
|
||||
"andl $0xf800,%%eax\n"
|
||||
#else
|
||||
"shrl $3,%%eax\n"
|
||||
"andl $0x7c00,%%eax\n"
|
||||
#endif
|
||||
"orl %%eax,%0\n"
|
||||
: "=r" (temp)
|
||||
: "0" (temp), "g" (sa)
|
||||
: "eax", "edx");
|
||||
d[0] = temp + s[0];
|
||||
#else
|
||||
/*
|
||||
Generic, non-ix86 code. Can't use the really optimized path, but
|
||||
we can still add in the entire source pixel instead of unpacking
|
||||
it.
|
||||
*/
|
||||
BLEND_READ(d, dr, dg, db)
|
||||
|
||||
sa = 255 - sa;
|
||||
dr = ((dr * sa + 0xff) >> 8);
|
||||
dg = ((dg * sa + 0xff) >> 8);
|
||||
db = ((db * sa + 0xff) >> 8);
|
||||
|
||||
COPY_ASSEMBLE_PIXEL(temp, dr, dg, db)
|
||||
|
||||
d[0] = temp + s[0];
|
||||
#endif
|
||||
|
||||
ALPHA_INC(s, src_alpha)
|
||||
BLEND_INC(d)
|
||||
}
|
||||
#else
|
||||
BLEND_TYPE *s = (BLEND_TYPE *)c->src, *d = (BLEND_TYPE *)c->dst;
|
||||
#ifndef INLINE_ALPHA
|
||||
unsigned char *src_alpha = c->srca;
|
||||
|
@ -532,6 +674,7 @@ static void MPRE(sover_ao) (composite_run_t *c, int num)
|
|||
ALPHA_INC(s, src_alpha)
|
||||
BLEND_INC(d)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* dsta : 0 */
|
||||
|
@ -749,7 +892,7 @@ static void MPRE(satop_aa) (composite_run_t *c, int num)
|
|||
dr = (sr * da + dr * sa + 0xff) >> 8;
|
||||
dg = (sg * da + dg * sa + 0xff) >> 8;
|
||||
db = (sb * da + db * sa + 0xff) >> 8;
|
||||
da = ((255 - sa) * da + da * sa + 0xff) >> 8;
|
||||
/* For alpha, satop simplifies to da' = da. */
|
||||
|
||||
BLEND_WRITE_ALPHA(d, dst_alpha, dr, dg, db, da)
|
||||
|
||||
|
@ -977,10 +1120,11 @@ static void MPRE(datop_aa) (composite_run_t *c, int num)
|
|||
|
||||
da = 255 - da;
|
||||
|
||||
dr = (dr * sa + sr * da + 0x80) >> 8;
|
||||
dg = (dg * sa + sg * da + 0x80) >> 8;
|
||||
db = (db * sa + sb * da + 0x80) >> 8;
|
||||
da = ((255 - da) * sa + sa * da + 0x80) >> 8;
|
||||
dr = (dr * sa + sr * da + 0xff) >> 8;
|
||||
dg = (dg * sa + sg * da + 0xff) >> 8;
|
||||
db = (db * sa + sb * da + 0xff) >> 8;
|
||||
/* For alpha, datop simplifies to da' = sa. */
|
||||
da = sa;
|
||||
|
||||
BLEND_WRITE_ALPHA(d, dst_alpha, dr, dg, db, da)
|
||||
|
||||
|
@ -1040,10 +1184,10 @@ static void MPRE(xor_aa) (composite_run_t *c, int num)
|
|||
|
||||
da = 255 - da;
|
||||
sa = 255 - sa;
|
||||
dr = ((dr * sa + sr * da + 0x80) >> 8);
|
||||
dg = ((dg * sa + sg * da + 0x80) >> 8);
|
||||
db = ((db * sa + sb * da + 0x80) >> 8);
|
||||
da = ((da * (255 - sa) + sa * (255 - da) + 0x80) >> 8);
|
||||
dr = ((dr * sa + sr * da + 0xff) >> 8);
|
||||
dg = ((dg * sa + sg * da + 0xff) >> 8);
|
||||
db = ((db * sa + sb * da + 0xff) >> 8);
|
||||
da = ((da * (255 - sa) + sa * (255 - da) + 0xff) >> 8);
|
||||
|
||||
BLEND_WRITE_ALPHA(d, dst_alpha, dr, dg, db, da)
|
||||
|
||||
|
@ -1532,15 +1676,15 @@ ourself.
|
|||
|
||||
/* 16-bit 5 bits blue, 6 bits green, 5 bits red */
|
||||
#define FORMAT_INSTANCE b5g6r5
|
||||
#define FORMAT_HOW DI_16_B5G6R5
|
||||
#define FORMAT_HOW DI_16_B5_G6_R5
|
||||
#warning B5G6R5
|
||||
|
||||
#define BLEND_TYPE unsigned short
|
||||
#define BLEND_READ(p,nr,ng,nb) \
|
||||
{ \
|
||||
unsigned short _s=p[0]; \
|
||||
nr=(_s>>11)<<3; \
|
||||
ng=((_s>>5)<<2)&0xff; \
|
||||
nr=(_s>>8); \
|
||||
ng=(_s>>3)&0xff; \
|
||||
nb=(_s<<3)&0xff; \
|
||||
}
|
||||
#define BLEND_READ_ALPHA(p,pa,nr,ng,nb,na) \
|
||||
|
@ -1570,15 +1714,15 @@ ourself.
|
|||
|
||||
/* 16-bit 5 bits blue, 5 bits green, 5 bits red */
|
||||
#define FORMAT_INSTANCE b5g5r5a1
|
||||
#define FORMAT_HOW DI_16_B5G5R5A1
|
||||
#define FORMAT_HOW DI_16_B5_G5_R5_A1
|
||||
#warning B5G5R5A1
|
||||
|
||||
#define BLEND_TYPE unsigned short
|
||||
#define BLEND_READ(p,nr,ng,nb) \
|
||||
{ \
|
||||
unsigned short _s=p[0]; \
|
||||
nr=(_s>>10)<<3; \
|
||||
ng=((_s>>5)<<3)&0xff; \
|
||||
nr=(_s>>7); \
|
||||
ng=(_s>>2)&0xff; \
|
||||
nb=(_s<<3)&0xff; \
|
||||
}
|
||||
#define BLEND_READ_ALPHA(p,pa,nr,ng,nb,na) \
|
||||
|
@ -1834,20 +1978,7 @@ Xor 1 - dstA 1 - srcA noop noop
|
|||
|
||||
|
||||
PlusL dst=src+dst , clamp to 1.0; dsta=srca+dsta, clamp to 1.0
|
||||
PlisD dst=src+dst-1, clamp to 0.0; dsta=srca+dsta, clamp to 1.0
|
||||
|
||||
these are incorrect:
|
||||
|
||||
PlusD
|
||||
[PlusD does not follow the general equation. The equation is dst'=(1-dst)+(1-src).
|
||||
If the result is less than 0 (black), then the result is 0.]
|
||||
N/A
|
||||
N/A
|
||||
|
||||
PlusL
|
||||
[For PlusL, the addition saturates. That is, if (src+dst) > white), the result is white.]
|
||||
1
|
||||
1
|
||||
PlusD dst=src+dst-1, clamp to 0.0; dsta=srca+dsta, clamp to 1.0
|
||||
|
||||
*/
|
||||
|
||||
|
|
242
Source/art/blit_scrapheap.m
Normal file
242
Source/art/blit_scrapheap.m
Normal file
|
@ -0,0 +1,242 @@
|
|||
/*
|
||||
Copyright (C) 2004 Free Software Foundation, Inc.
|
||||
|
||||
Author: Alexander Malmberg <alexander@malmberg.org>
|
||||
|
||||
This file is part of GNUstep.
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Library General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Library General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public
|
||||
License along with this library; if not, write to the Free
|
||||
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
This file is never compiled. It just contains alternate and/or partial
|
||||
implementations of some functions in blit.m that are interesting but not
|
||||
(in their current form) better than the implementations in blit.m .
|
||||
*/
|
||||
|
||||
/**** blit_alpha_opaque for 16/15 bpp ****/
|
||||
|
||||
/*
|
||||
Do two pixels at once. Turned out to be slower than a straight loop.
|
||||
*/
|
||||
|
||||
const unsigned char *src = asrc;
|
||||
BLEND_TYPE *dst = (BLEND_TYPE *)adst;
|
||||
unsigned int nr, ng, nb, a;
|
||||
|
||||
/*
|
||||
|
||||
bbbb bggg gggr rrrr a0
|
||||
b bbbb gggg ggrr rrr a1
|
||||
bb bbbg gggg grrr rr a2
|
||||
bbb bbgg gggg rrrr r a3
|
||||
bbbb bggg gggr rrrr a4
|
||||
b bbbb gggg ggrr rrr a5
|
||||
bb bbbg gggg grrr rr a6
|
||||
bbb bbgg gggg rrrr r a7
|
||||
|
||||
R rrrr rrrr rrrr
|
||||
Ggg gggg gggg ggg
|
||||
Bbbb bbbb bbbb b
|
||||
|
||||
bbbb b000 0000 0000 bbbb b000 0000 0000 a0
|
||||
b bbbb 0000 0000 000b bbbb 0000 0000 000 a1
|
||||
bb bbb0 0000 0000 00bb bbb0 0000 0000 00 a2
|
||||
bbb bb00 0000 0000 0bbb bb00 0000 0000 0 a3
|
||||
bbbb b000 0000 0000 bbbb b000 0000 0000 a4
|
||||
b bbbb 0000 0000 000b bbbb 0000 0000 000 a5
|
||||
bb bbb0 0000 0000 00bb bbb0 0000 0000 00 a6
|
||||
bbb bb00 0000 0000 0bbb bb00 0000 0000 0 a7
|
||||
bbbb b000 0000 0000 bbbb b000 0000 0000 b0
|
||||
b bbbb 0000 0000 000b bbbb 0000 0000 000 b1
|
||||
bb bbb0 0000 0000 00bb bbb0 0000 0000 00 b2
|
||||
bbb bb00 0000 0000 0bbb bb00 0000 0000 0 b3
|
||||
bbbb b000 0000 0000 bbbb b000 0000 0000 b4
|
||||
b bbbb 0000 0000 000b bbbb 0000 0000 000 b5
|
||||
bb bbb0 0000 0000 00bb bbb0 0000 0000 00 b6
|
||||
bbb bb00 0000 0000 0bbb bb00 0000 0000 0 b7
|
||||
BBBB BBBB BBBB B000 XXXX XXXX XXXX XXXX XXXX X000 AAAA AAAA AAAA A000 0000 0000
|
||||
|
||||
|
||||
bbbb bbbb 0000 0000 0000 0000 aaaa aaaa
|
||||
|
||||
*/
|
||||
|
||||
//printf("call with color=%02x %02x %02x\n",r,g,b);
|
||||
while (num >= 2)
|
||||
{
|
||||
unsigned int v1,v2;
|
||||
unsigned int a1;
|
||||
unsigned int temp;
|
||||
|
||||
a = *((unsigned short *)src);
|
||||
// printf("alpha=%04x\n",a);
|
||||
a = a&0xffff;
|
||||
/* if (!a)
|
||||
{
|
||||
num -= 2;
|
||||
src += 2;
|
||||
dst += 2;
|
||||
continue;
|
||||
}*/
|
||||
a = (a|(a<<8))&0xff00ff;
|
||||
// printf("unpack to %08x\n",a);
|
||||
|
||||
v1 = b*a;
|
||||
v1 = (v1>>11)&0x001f001f;
|
||||
// printf("blue: %08x\n",v1);
|
||||
|
||||
v2 = g*a;
|
||||
v2 = (v2>> 5)&0x07e007e0;
|
||||
// printf("green: %08x\n",v2);
|
||||
v1 = v1|v2;
|
||||
|
||||
v2 = r*a;
|
||||
v2 = v2 &0xf800f800;
|
||||
// printf("red: %08x\n",v2);
|
||||
v1 = v1|v2;
|
||||
// printf("result: %08x\n",v1);
|
||||
|
||||
a = 0xff00ff - a;
|
||||
|
||||
temp = dst[0];
|
||||
// printf("p1: %04x\n",temp);
|
||||
asm (
|
||||
"movl %0,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"shll $21,%%eax\n"
|
||||
#else
|
||||
"shll $22,%%eax\n"
|
||||
#endif
|
||||
"orl %0,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"andl $0xfc00001f,%%eax\n"
|
||||
"andl $0xf800,%0\n"
|
||||
"shll $2,%0\n"
|
||||
#else
|
||||
"andl $0xf800001f,%%eax\n"
|
||||
"andl $0x7c00,%0\n"
|
||||
"shll $3,%0\n"
|
||||
#endif
|
||||
"orl %0,%%eax\n"
|
||||
"mul %2\n"
|
||||
"addl $0x0003e01f,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"addl $0x02,%%edx\n"
|
||||
"andl $0xfc,%%edx\n"
|
||||
"shll $3,%%edx\n"
|
||||
#else
|
||||
"addl $0x01,%%edx\n"
|
||||
"andl $0xf8,%%edx\n"
|
||||
"shll $2,%%edx\n"
|
||||
#endif
|
||||
"shrl $8,%%eax\n"
|
||||
"movl %%eax,%0\n"
|
||||
"andl $0x1f,%0\n"
|
||||
"orl %%edx,%0\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"shrl $2,%%eax\n"
|
||||
"andl $0xf800,%%eax\n"
|
||||
#else
|
||||
"shrl $3,%%eax\n"
|
||||
"andl $0x7c00,%%eax\n"
|
||||
#endif
|
||||
"orl %%eax,%0\n"
|
||||
: "=r" (temp)
|
||||
: "0" (temp), "c" (a&0xff)
|
||||
: "eax", "edx");
|
||||
// printf("to: %04x\n",temp);
|
||||
v1 += temp;
|
||||
// printf("add in gives: %08x\n",v1);
|
||||
|
||||
temp = dst[1];
|
||||
// printf("p2: %04x\n",temp);
|
||||
asm (
|
||||
"movl %0,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"shll $21,%%eax\n"
|
||||
#else
|
||||
"shll $22,%%eax\n"
|
||||
#endif
|
||||
"orl %0,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"andl $0xfc00001f,%%eax\n"
|
||||
"andl $0xf800,%0\n"
|
||||
"shll $2,%0\n"
|
||||
#else
|
||||
"andl $0xf800001f,%%eax\n"
|
||||
"andl $0x7c00,%0\n"
|
||||
"shll $3,%0\n"
|
||||
#endif
|
||||
"orl %0,%%eax\n"
|
||||
"mul %2\n"
|
||||
"addl $0x0003e01f,%%eax\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"addl $0x02,%%edx\n"
|
||||
"andl $0xfc,%%edx\n"
|
||||
"shll $3,%%edx\n"
|
||||
#else
|
||||
"addl $0x01,%%edx\n"
|
||||
"andl $0xf8,%%edx\n"
|
||||
"shll $2,%%edx\n"
|
||||
#endif
|
||||
"shrl $8,%%eax\n"
|
||||
"movl %%eax,%0\n"
|
||||
"andl $0x1f,%0\n"
|
||||
"orl %%edx,%0\n"
|
||||
#if FORMAT_HOW == DI_16_B5_G6_R5
|
||||
"shrl $2,%%eax\n"
|
||||
"andl $0xf800,%%eax\n"
|
||||
#else
|
||||
"shrl $3,%%eax\n"
|
||||
"andl $0x7c00,%%eax\n"
|
||||
#endif
|
||||
"orl %%eax,%0\n"
|
||||
: "=r" (temp)
|
||||
: "0" (temp), "c" (a>>16)
|
||||
: "eax", "edx");
|
||||
// printf("to: %04x\n",temp);
|
||||
v1 += (temp)<<16;
|
||||
// printf("add in gives: %08x\n",v1);
|
||||
|
||||
*((unsigned int *)dst)=v1;
|
||||
|
||||
num -= 2;
|
||||
dst += 2;
|
||||
src += 2;
|
||||
}
|
||||
|
||||
if (num)
|
||||
{
|
||||
a = *src;
|
||||
// if (a)
|
||||
{
|
||||
BLEND_READ(dst, nr, ng, nb)
|
||||
/* nr = inv_gamma_table[nr];
|
||||
ng = inv_gamma_table[ng];
|
||||
nb = inv_gamma_table[nb];*/
|
||||
nr = (r * a + nr * (255 - a) + 0xff) >> 8;
|
||||
ng = (g * a + ng * (255 - a) + 0xff) >> 8;
|
||||
nb = (b * a + nb * (255 - a) + 0xff) >> 8;
|
||||
/* nr = gamma_table[nr];
|
||||
ng = gamma_table[ng];
|
||||
nb = gamma_table[nb];*/
|
||||
BLEND_WRITE(dst, nr, ng, nb)
|
||||
BLEND_INC(dst)
|
||||
}
|
||||
}
|
||||
|
||||
/**** ****/
|
||||
|
Loading…
Reference in a new issue