mirror of
https://github.com/nzp-team/fteqw.git
synced 2024-11-25 21:31:37 +00:00
813c8c54ff
git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@5687 fc73d0e0-1445-4013-8a0c-d673dee63da5
1749 lines
51 KiB
C
1749 lines
51 KiB
C
//Note: this code does not claim to be bit-correct.
|
|
//It doesn't support volume textures.
|
|
//It doesn't validate block extents (and is generally unaware of more than one block anyway)
|
|
//It doesn't implement all validation checks, either.
|
|
//Do NOT use this code to validate any encoders...
|
|
|
|
//Based upon documentation here: https://www.khronos.org/registry/OpenGL/extensions/OES/OES_texture_compression_astc.txt
|
|
|
|
#ifndef ASTC_PUBLIC
|
|
#define ASTC_PUBLIC
|
|
#endif
|
|
|
|
#define ASTC_WITH_LDR //comment out this line to disable pure-LDR decoding (the hdr code can still be used).
|
|
#define ASTC_WITH_HDR //comment out this line to disable HDR decoding.
|
|
#define ASTC_WITH_HDRTEST //comment out this line to disable checking for which profile is needed.
|
|
//#define ASTC_WITH_3D
|
|
|
|
#ifdef ASTC_WITH_LDR
|
|
ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA8 data (gives error colour for hdr blocks!)
|
|
#endif
|
|
#ifdef ASTC_WITH_HDR
|
|
ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA16F data.
|
|
#endif
|
|
#ifdef ASTC_WITH_HDRTEST
|
|
ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd); //returns true if n consecutive blocks require the HDR profile (ie: detects when you need to soft-decode for drivers with partial support, as opposed to just always decompressing).
|
|
#endif
|
|
|
|
|
|
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#ifndef Vector4Set
|
|
#define Vector4Set(r,x,y,z,w) {(r)[0] = x; (r)[1] = y;(r)[2] = z;(r)[3]=w;}
|
|
#endif
|
|
#ifndef countof
|
|
#define countof(array) (sizeof(array)/sizeof(array[0]))
|
|
#endif
|
|
#if defined(ASTC_WITH_LDR) || defined(ASTC_WITH_HDR)
|
|
#define ASTC_WITH_DECODE
|
|
#endif
|
|
enum astc_status_e
|
|
{
|
|
//valid blocks
|
|
ASTC_OKAY, //we can decode at least part of this normally (hdr endpoints may still result in per-endpoint errors).
|
|
ASTC_VOID_LDR, //not an error - the block is a single LDR colour, with an RGBA16 colour in the last 8 bytes.
|
|
ASTC_VOID_HDR, //not an error - the block is a single HDR colour, with an RGBA16F colour in the last 8 bytes.
|
|
|
|
//invalid blocks
|
|
ASTC_ERROR, //validation errors
|
|
ASTC_UNSUPPORTED, //basically just volume textures
|
|
ASTC_RESERVED, //reserved bits. basically an error but might not be in the future.
|
|
};
|
|
struct astc_block_info
|
|
{
|
|
unsigned char *in; //the 16 bytes of the block
|
|
unsigned char blocksize[3]; //block width, height, depth(1 for 2d).
|
|
|
|
enum astc_status_e status; //block status/type.
|
|
unsigned char dualplane; //two sets of weights instead of one.
|
|
unsigned char ccs; //second set applies to this component
|
|
|
|
unsigned char precision; //defines the precision of the weights
|
|
|
|
int wcount[4]; //x,y,z,total weight counts
|
|
int weight_bits; //size of weights section.
|
|
int config_bits; //size of header before the endpoint bits
|
|
int ep_bits; //size available to endpoints
|
|
unsigned char weights[64]; //official limit to the number of weights stored
|
|
|
|
unsigned char partitions; //number of active partitions to select from (and number of endpoints to read)
|
|
unsigned short partindex; //used for deciding which partition each pixel belongs in
|
|
struct astc_part
|
|
{
|
|
unsigned char mode; //endpoint modes
|
|
#ifdef ASTC_WITH_HDR
|
|
unsigned char hdr; //endpoint colour mode - &1=rgb, &2=alpha
|
|
#endif
|
|
int ep[2][4];
|
|
} part[4];
|
|
};
|
|
|
|
static unsigned char ASTC_readbits(unsigned char *in, unsigned int offset, unsigned int count)
|
|
{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
|
|
unsigned short s;
|
|
in += offset>>3;
|
|
offset &= 7;
|
|
s = in[0];
|
|
if (offset+count>8)
|
|
s |= (in[1]<<8);
|
|
s>>=offset;
|
|
return s & ((1u<<count)-1);
|
|
}
|
|
static unsigned int ASTC_readmanybits(unsigned char *in, unsigned int offset, unsigned int count)
|
|
{
|
|
unsigned int r = 0;
|
|
while(count > 8)
|
|
{
|
|
count -= 8;
|
|
r |= ASTC_readbits(in, offset+count, 8)<<count;
|
|
}
|
|
r |= ASTC_readbits(in, offset, count);
|
|
return r;
|
|
}
|
|
|
|
//weights cover a range of 0-64 inclusive
|
|
//>32 is +1 (otherwise it would be 0-63)
|
|
//high bits are folded over
|
|
static unsigned char dequant_weight_1b[1<<1] = {0x00,0x40};
|
|
static unsigned char dequant_weight_2b[1<<2] = {0x00,0x15,0x2b,0x40};
|
|
static unsigned char dequant_weight_3b[1<<3] = {0x00,0x09,0x12,0x1b,0x25,0x2e,0x37,0x40};
|
|
static unsigned char dequant_weight_4b[1<<4] = {0x00,0x04,0x08,0x0c,0x11,0x15,0x19,0x1d,0x23,0x27,0x2b,0x2f,0x34,0x38,0x3c,0x40};
|
|
static unsigned char dequant_weight_5b[1<<5] = {0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,0x40};
|
|
static unsigned char dequant_weight_0t[3] = {0,32,64};
|
|
static unsigned char dequant_weight_1t[6] = {0x00,0x40,0x0c,0x34,0x19,0x27};
|
|
static unsigned char dequant_weight_2t[12] = {0x00,0x40,0x11,0x2f,0x06,0x3a,0x17,0x29,0x0c,0x34,0x1d,0x23};
|
|
static unsigned char dequant_weight_3t[24] = {0x00,0x40,0x08,0x38,0x10,0x30,0x18,0x28,0x02,0x3e,0x0b,0x35,0x13,0x2d,0x1b,0x25,0x05,0x3b,0x0d,0x33,0x16,0x2a,0x1e,0x22};
|
|
static unsigned char dequant_weight_0q[5] = {0,16,32,48,64};
|
|
static unsigned char dequant_weight_1q[10] = {0x00,0x40,0x05,0x3b,0x0b,0x35,0x11,0x2f,0x17,0x29};
|
|
static unsigned char dequant_weight_2q[20] = {0x00,0x40,0x10,0x30,0x03,0x3d,0x13,0x2d,0x06,0x3a,0x17,0x29,0x09,0x37,0x1a,0x26,0x0d,0x33,0x1d,0x23};
|
|
static const struct
|
|
{
|
|
unsigned char extra, bits, *dequant;
|
|
} astc_weightmode[] =
|
|
{
|
|
{0,0, NULL}, //invalid
|
|
{0,0, NULL}, //invalid
|
|
{0,1, dequant_weight_1b}, //2
|
|
{1,0, dequant_weight_0t}, //3
|
|
{0,2, dequant_weight_2b}, //4
|
|
{2,0, dequant_weight_0q}, //5
|
|
{1,1, dequant_weight_1t}, //6
|
|
{0,3, dequant_weight_3b}, //8
|
|
{0,0, NULL}, //invalid
|
|
{0,0, NULL}, //invalid
|
|
{2,1, dequant_weight_1q}, //10
|
|
{1,2, dequant_weight_2t}, //12
|
|
{0,4, dequant_weight_4b}, //16
|
|
{2,2, dequant_weight_2q}, //20
|
|
{1,3, dequant_weight_3t}, //24
|
|
{0,5, dequant_weight_5b}, //32
|
|
};
|
|
static unsigned int ASTC_DecodeSize(unsigned int count, unsigned int bits, unsigned char extra)
|
|
{
|
|
return ((extra==1)?((count*8)+4)/5:0) +
|
|
((extra==2)?((count*7)+2)/3:0) +
|
|
count*bits;
|
|
}
|
|
|
|
|
|
static void ASTC_ReadBlockMode(struct astc_block_info *b)
|
|
{
|
|
unsigned char *in = b->in;
|
|
unsigned short s = ASTC_readmanybits(in, 0, 13);//in[0] | (in[1]<<8);
|
|
b->config_bits = 13;
|
|
|
|
if ((s&0x1ff)==0x1fc)
|
|
{ //void extent
|
|
if (s&0x200)
|
|
b->status = ASTC_VOID_HDR;
|
|
else
|
|
b->status = ASTC_VOID_LDR;
|
|
b->dualplane = b->precision = b->wcount[0] = b->wcount[1] = b->wcount[2] = b->partitions = 0;
|
|
return;
|
|
}
|
|
b->status = ASTC_OKAY;
|
|
b->dualplane = (s>>10)&1; //Dp
|
|
b->precision = (s>>(9-3))&(1<<3);//P
|
|
b->precision |= (s>>4)&1; //p0
|
|
if (b->blocksize[2] != 1)
|
|
{ //3d blocks have a different header layout
|
|
#ifdef ASTC_WITH_3D
|
|
if (s&3)
|
|
{
|
|
b->precision|=(s&3)<<1; //p2, p1
|
|
b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>7)&3)+2, b->wcount[2] = ((s>>2)&3)+2;
|
|
}
|
|
else
|
|
{
|
|
b->precision|=(s&0xc)>>1; //p2, p1
|
|
if ((s&0x180)!=0x180)
|
|
{
|
|
b->dualplane = 0; //always single plane.
|
|
b->precision &= 7; //clear the high precision bit (reused for 'b')
|
|
if (!(s&0x180))
|
|
b->wcount[0] = 6, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = ((s>>5)&3)+2;
|
|
else if (!(s&0x80))
|
|
b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = 6, b->wcount[2] = ((s>>9)&3)+2;
|
|
else
|
|
b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = 6;
|
|
}
|
|
else if ((s&0x60)!=0x60)
|
|
{
|
|
if (!(s&0x60))
|
|
b->wcount[0] = 6, b->wcount[1] = 2, b->wcount[2] = 2;
|
|
else if (!(s&0x20))
|
|
b->wcount[0] = 2, b->wcount[1] = 6, b->wcount[2] = 2;
|
|
else //40
|
|
b->wcount[0] = 2, b->wcount[1] = 2, b->wcount[2] = 6;
|
|
}
|
|
else
|
|
b->status = ASTC_RESERVED; //reserved (or void extent, but those were handled above)
|
|
}
|
|
#else
|
|
b->status = ASTC_UNSUPPORTED;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
b->wcount[2] = 1;
|
|
if (s&3)
|
|
{ //one of the first 5 layouts...
|
|
b->precision|=(s&3)<<1; //p2, p1
|
|
if (!(s&8))
|
|
{ //first two layouts...
|
|
if (!(s&4))
|
|
{ //layout0
|
|
b->wcount[0] = ((s>>7)&3)+4;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
else
|
|
{ //layout1
|
|
b->wcount[0] = ((s>>7)&3)+8;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
}
|
|
else if (!(s&4))
|
|
{ //layout2
|
|
b->wcount[0] = ((s>>5)&3)+2;
|
|
b->wcount[1] = ((s>>7)&3)+8;
|
|
}
|
|
else if (!(s&256))
|
|
{ //layout3
|
|
b->wcount[0] = ((s>>5)&3)+2;
|
|
b->wcount[1] = ((s>>7)&1)+6;
|
|
}
|
|
else
|
|
{ //layout4
|
|
b->wcount[0] = ((s>>7)&1)+2;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
}
|
|
else
|
|
{ //one of the later layouts
|
|
b->precision|=(s&0xc)>>1; //p2, p1
|
|
if (!(s&384))
|
|
{
|
|
b->wcount[0] = 12;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
else if ((s&384)==128)
|
|
{
|
|
b->wcount[0] = ((s>>5)&3)+2;
|
|
b->wcount[1] = 12;
|
|
}
|
|
else if ((s&480)==384)
|
|
{
|
|
b->wcount[0] = 6;
|
|
b->wcount[1] = 10;
|
|
}
|
|
else if ((s&480)==416)
|
|
{
|
|
b->wcount[0] = 10;
|
|
b->wcount[1] = 6;
|
|
}
|
|
else if ((s&384)==256)
|
|
{
|
|
b->wcount[0] = ((s>>5)&3)+6;
|
|
b->wcount[1] = ((s>>9)&3)+6;
|
|
b->dualplane = 0; //forget the Dp bit, its reused in this layout
|
|
b->precision &= 7; //forget the P bit, too
|
|
}
|
|
else
|
|
b->status = ASTC_RESERVED; //reserved
|
|
}
|
|
}
|
|
b->partitions = ((s>>11)&3)+1;
|
|
|
|
if (b->partitions > 3 && b->dualplane)
|
|
b->status = ASTC_ERROR; //apparently.
|
|
|
|
if (b->wcount[0] > b->blocksize[0] || b->wcount[1] > b->blocksize[1] || b->wcount[2] > b->blocksize[2])
|
|
b->status = ASTC_ERROR; //invalid weight counts.
|
|
|
|
b->wcount[3] = b->wcount[0] * b->wcount[1] * b->wcount[2];
|
|
b->wcount[3]<<=b->dualplane; //dual-plane has twice the weights - interleaved.
|
|
if (b->wcount[3] > countof(b->weights))
|
|
b->status = ASTC_ERROR; //more than 64 weights are banned, for some reason
|
|
b->weight_bits = ASTC_DecodeSize(b->wcount[3], astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra);
|
|
}
|
|
|
|
static void ASTC_ReadPartitions(struct astc_block_info *b)
|
|
{
|
|
int sel;
|
|
int i;
|
|
unsigned char *in = b->in;
|
|
int weight_bits = b->weight_bits;
|
|
|
|
if (b->partitions == 1)
|
|
{ //single-partition mode, simple CEM
|
|
b->partindex = 0;
|
|
b->part[0].mode = ASTC_readbits(in, b->config_bits, 4);
|
|
b->config_bits += 4;
|
|
}
|
|
else
|
|
{ //multi
|
|
b->partindex = ASTC_readmanybits(in, b->config_bits, 10);
|
|
b->config_bits += 10;
|
|
sel = ASTC_readbits(in, b->config_bits, 6);
|
|
b->config_bits += 6;
|
|
if (!(sel&3))
|
|
{
|
|
sel = (sel>>2)&0xf;
|
|
for (i = 0; i < b->partitions; i++)
|
|
b->part[i].mode = sel; //all the same
|
|
}
|
|
else
|
|
{
|
|
int shift = 2;
|
|
int highbits = b->partitions*3 - 4;
|
|
|
|
weight_bits += highbits;
|
|
sel |= ASTC_readbits(in, 128-weight_bits, highbits)<<6; //I don't know why this is separate. it seems like an unnecessary complication to me.
|
|
|
|
for (i = 0; i < b->partitions; i++, shift++)
|
|
{
|
|
b->part[i].mode = ((sel&3)-1)<<2; //class groups
|
|
b->part[i].mode += ((sel>>shift)&1)<<2;//class
|
|
}
|
|
for (i = 0; i < b->partitions; i++, shift+=2)
|
|
b->part[i].mode += (sel>>shift)&3; //specific mode info
|
|
}
|
|
}
|
|
if (b->dualplane)
|
|
{
|
|
weight_bits += 2;
|
|
b->ccs = ASTC_readbits(in, 128-weight_bits, 2);
|
|
}
|
|
else
|
|
b->ccs = 0;
|
|
|
|
b->ep_bits = 128 - weight_bits - b->config_bits;
|
|
//weights are at 128-weight_bits to 128
|
|
//epdata is at config_bits to config_bits+ep_bits
|
|
}
|
|
|
|
#ifdef ASTC_WITH_HDRTEST
|
|
ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd)
|
|
{
|
|
struct astc_block_info b;
|
|
int i;
|
|
size_t blocks = datasize/16;
|
|
b.in = in;
|
|
b.blocksize[0] = bw;
|
|
b.blocksize[1] = bh;
|
|
b.blocksize[2] = bd;
|
|
while(blocks --> 0)
|
|
{
|
|
ASTC_ReadBlockMode(&b);
|
|
if (b.status == ASTC_VOID_HDR)
|
|
return 1; //if we're getting hdr blocks then we can decode properly only with hdr
|
|
if (b.status == ASTC_VOID_LDR)
|
|
return 0; //if we're getting ldr blocks, then its unlikely that there's any hdr blocks in there.
|
|
if (b.status != ASTC_OKAY)
|
|
continue;
|
|
ASTC_ReadPartitions(&b);
|
|
for (i = 0; i < b.partitions; i++)
|
|
{
|
|
switch(b.part[i].mode)
|
|
{
|
|
case 2:
|
|
case 3:
|
|
case 7:
|
|
case 11:
|
|
case 14:
|
|
case 15:
|
|
return 1;
|
|
}
|
|
}
|
|
b.in += 16;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_DECODE
|
|
static unsigned char ASTC_readbits2(unsigned char *in, unsigned int *offset, unsigned int count)
|
|
{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
|
|
unsigned char r = ASTC_readbits(in, *offset, count);
|
|
*offset += count;
|
|
return r;
|
|
}
|
|
static void ASTC_Decode(unsigned char *in, unsigned char *out, int count, unsigned int offset, int bits, int extra, unsigned char *dequant)
|
|
{
|
|
unsigned char block[5];
|
|
int j;
|
|
|
|
//unfortunately these trits depend upon the values of the later bits in each block.
|
|
//if only it were a nice simple modulo...
|
|
if (extra==1)
|
|
{
|
|
//read it 5 samples at a time
|
|
while(count > 0)
|
|
{
|
|
unsigned int t, c;
|
|
|
|
block[0] = ASTC_readbits2(in, &offset, bits);
|
|
t = ASTC_readbits2(in, &offset, 2);
|
|
if (count > 1)
|
|
{
|
|
block[1] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<2;
|
|
}
|
|
else
|
|
block[1] = 0;
|
|
if (count > 2)
|
|
{
|
|
block[2] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 1)<<4;
|
|
}
|
|
else
|
|
block[2] = 0;
|
|
if (count > 3)
|
|
{
|
|
block[3] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<5;
|
|
}
|
|
else
|
|
block[3] = 0;
|
|
if (count > 4)
|
|
{
|
|
block[4] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 1)<<7;
|
|
}
|
|
else
|
|
block[4] = 0;
|
|
|
|
//okay, we read the block, now figure out the trits and pack them into the high part of the result
|
|
if ((t&0x1c) == 0x1c)
|
|
{
|
|
c = ((t>>3)&0x1c) | (t&3);
|
|
block[4] |= 2<<bits;
|
|
block[3] |= 2<<bits;
|
|
}
|
|
else
|
|
{
|
|
c = t&0x1f;
|
|
if ((t&0x60) == 0x60)
|
|
{
|
|
block[4] |= 2<<bits;
|
|
block[3] |= (t>>7)<<bits;
|
|
}
|
|
else
|
|
{
|
|
block[4] |= (t>>7)<<bits;
|
|
block[3] |= ((t>>5)&3)<<bits;
|
|
}
|
|
}
|
|
if ((c&3)==3)
|
|
{
|
|
block[2] |= 2<<bits;
|
|
block[1] |= ((c>>4)&1)<<bits;
|
|
block[0] |= (((c>>2)&2) | ((c>>2)&~(c>>3)&1))<<bits;
|
|
}
|
|
else if ((c&0xc)==0xc)
|
|
{
|
|
block[2] |= 2<<bits;
|
|
block[1] |= 2<<bits;
|
|
block[0] |= (c&3)<<bits;
|
|
}
|
|
else
|
|
{
|
|
block[2] |= ((c>>4)&1)<<bits;
|
|
block[1] |= ((c>>2)&3)<<bits;
|
|
block[0] |= ((c&2)|(c&1&~(c>>1)))<<bits;
|
|
}
|
|
|
|
//spit out the result
|
|
for (j = 0; j < 5 && j < count; j++)
|
|
*out++ = dequant[block[j]];
|
|
count -= 5;
|
|
}
|
|
}
|
|
else if (extra == 2)
|
|
{
|
|
//read it 3 samples at a time
|
|
while(count > 0)
|
|
{
|
|
unsigned int t, c;
|
|
|
|
block[0] = ASTC_readbits2(in, &offset, bits);
|
|
t = ASTC_readbits2(in, &offset, 3);
|
|
if (count > 1)
|
|
{
|
|
block[1] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<3;
|
|
}
|
|
else
|
|
block[1] = 0;
|
|
if (count > 2)
|
|
{
|
|
block[2] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<5;
|
|
}
|
|
else
|
|
block[2] = 0;
|
|
|
|
//okay, we read the block, now figure out the trits and pack them into the high part of the result
|
|
if ((t&6)==6 && !(t&0x60))
|
|
{
|
|
block[2] |= (((t&1)<<2) | (((t>>4)&~t&1)<<1) | ((t>>3)&~t&1))<<bits;
|
|
block[1] |= 4<<bits;
|
|
block[0] |= 4<<bits;
|
|
}
|
|
else
|
|
{
|
|
if ((t&6) == 6)
|
|
{
|
|
block[2] |= 4<<bits;
|
|
c = ((t>>3)&3)<<3;
|
|
c |= (~(t>>5)&3)<<1;
|
|
c |= t&1;
|
|
}
|
|
else
|
|
{
|
|
block[2] |= ((t>>5)&3)<<bits;
|
|
c = t&0x1f;
|
|
}
|
|
|
|
if ((c&7) == 5)
|
|
{
|
|
block[1] |= 4<<bits;
|
|
block[0] |= ((c>>3)&3)<<bits;
|
|
}
|
|
else
|
|
{
|
|
block[1] |= ((c>>3)&3)<<bits;
|
|
block[0] |= (c&7)<<bits;
|
|
}
|
|
}
|
|
|
|
//spit out the result
|
|
for (j = 0; j < 3 && j < count; j++)
|
|
*out++ = dequant[block[j]];
|
|
count -= 3;
|
|
}
|
|
}
|
|
else while(count --> 0) //pure bits, nice and simple
|
|
{
|
|
unsigned char val = ASTC_readbits2(in, &offset, bits);
|
|
|
|
*out++ = dequant[val];
|
|
}
|
|
}
|
|
|
|
//endpoints have a logical value between 0 and 255.
|
|
//bit replication is used to fill in missing precision
|
|
static unsigned char dequant_ep_1b[1<<1] = {0,255};
|
|
static unsigned char dequant_ep_2b[1<<2] = {0x00,0x55,0xaa,0xff};
|
|
static unsigned char dequant_ep_3b[1<<3] = {0x00,0x24,0x49,0x6d,0x92,0xb6,0xdb,0xff};
|
|
static unsigned char dequant_ep_4b[1<<4] = {
|
|
0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff};
|
|
static unsigned char dequant_ep_5b[1<<5] = {
|
|
0x00,0x08,0x10,0x18,0x21,0x29,0x31,0x39,0x42,0x4a,0x52,0x5a,0x63,0x6b,0x73,0x7b,
|
|
0x84,0x8c,0x94,0x9c,0xa5,0xad,0xb5,0xbd,0xc6,0xce,0xd6,0xde,0xe7,0xef,0xf7,0xff};
|
|
static unsigned char dequant_ep_6b[1<<6] = {
|
|
0x00,0x04,0x08,0x0c,0x10,0x14,0x18,0x1c,0x20,0x24,0x28,0x2c,0x30,0x34,0x38,0x3c,
|
|
0x41,0x45,0x49,0x4d,0x51,0x55,0x59,0x5d,0x61,0x65,0x69,0x6d,0x71,0x75,0x79,0x7d,
|
|
0x82,0x86,0x8a,0x8e,0x92,0x96,0x9a,0x9e,0xa2,0xa6,0xaa,0xae,0xb2,0xb6,0xba,0xbe,
|
|
0xc3,0xc7,0xcb,0xcf,0xd3,0xd7,0xdb,0xdf,0xe3,0xe7,0xeb,0xef,0xf3,0xf7,0xfb,0xff};
|
|
static unsigned char dequant_ep_7b[1<<7] = {
|
|
0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
|
|
0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
|
|
0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
|
|
0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
|
|
0x81,0x83,0x85,0x87,0x89,0x8b,0x8d,0x8f,0x91,0x93,0x95,0x97,0x99,0x9b,0x9d,0x9f,
|
|
0xa1,0xa3,0xa5,0xa7,0xa9,0xab,0xad,0xaf,0xb1,0xb3,0xb5,0xb7,0xb9,0xbb,0xbd,0xbf,
|
|
0xc1,0xc3,0xc5,0xc7,0xc9,0xcb,0xcd,0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf,
|
|
0xe1,0xe3,0xe5,0xe7,0xe9,0xeb,0xed,0xef,0xf1,0xf3,0xf5,0xf7,0xf9,0xfb,0xfd,0xff};
|
|
static unsigned char dequant_ep_8b[1<<8] = {
|
|
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
|
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
|
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
|
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
|
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
|
|
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
|
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
|
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
|
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
|
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
|
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
|
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
|
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
|
|
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
|
|
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
|
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff};
|
|
static unsigned char dequant_ep_0t[3] = {0,128,255};
|
|
static unsigned char dequant_ep_1t[6] = {0x00,0xff,0x33,0xcc,0x66,0x99};
|
|
static unsigned char dequant_ep_2t[12] = {0x00,0xff,0x45,0xba,0x17,0xe8,0x5c,0xa3,0x2e,0xd1,0x74,0x8b};
|
|
static unsigned char dequant_ep_3t[24] = {0x00,0xff,0x21,0xde,0x42,0xbd,0x63,0x9c,0x0b,0xf4,0x2c,0xd3,0x4d,0xb2,0x6e,0x91,0x16,0xe9,0x37,0xc8,0x58,0xa7,0x79,0x86};
|
|
static unsigned char dequant_ep_4t[48] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x41,0xbe,0x51,0xae,0x61,0x9e,0x71,0x8e,0x05,0xfa,0x15,0xea,0x26,0xd9,0x36,0xc9,0x46,0xb9,0x56,0xa9,0x67,0x98,0x77,0x88,0x0b,0xf4,0x1b,0xe4,0x2b,0xd4,0x3b,0xc4,0x4c,0xb3,0x5c,0xa3,0x6c,0x93,0x7c,0x83};
|
|
static unsigned char dequant_ep_5t[96] = {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x02,0xfd,0x0a,0xf5,0x12,0xed,0x1a,0xe5,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x05,0xfa,0x0d,0xf2,0x15,0xea,0x1d,0xe2,0x25,0xda,0x2d,0xd2,0x35,0xca,0x3d,0xc2,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
|
|
static unsigned char dequant_ep_6t[192]= {0x00,0xff,0x04,0xfb,0x08,0xf7,0x0c,0xf3,0x10,0xef,0x14,0xeb,0x18,0xe7,0x1c,0xe3,0x20,0xdf,0x24,0xdb,0x28,0xd7,0x2c,0xd3,0x30,0xcf,0x34,0xcb,0x38,0xc7,0x3c,0xc3,0x40,0xbf,0x44,0xbb,0x48,0xb7,0x4c,0xb3,0x50,0xaf,0x54,0xab,0x58,0xa7,0x5c,0xa3,0x60,0x9f,0x64,0x9b,0x68,0x97,0x6c,0x93,0x70,0x8f,0x74,0x8b,0x78,0x87,0x7c,0x83,0x01,0xfe,0x05,0xfa,0x09,0xf6,0x0d,0xf2,0x11,0xee,0x15,0xea,0x19,0xe6,0x1d,0xe2,0x21,0xde,0x25,0xda,0x29,0xd6,0x2d,0xd2,0x31,0xce,0x35,0xca,0x39,0xc6,0x3d,0xc2,0x41,0xbe,0x45,0xba,0x49,0xb6,0x4d,0xb2,0x51,0xae,0x55,0xaa,0x59,0xa6,0x5d,0xa2,0x61,0x9e,0x65,0x9a,0x69,0x96,0x6d,0x92,0x71,0x8e,0x75,0x8a,0x79,0x86,0x7d,0x82,0x02,0xfd,0x06,0xf9,0x0a,0xf5,0x0e,0xf1,0x12,0xed,0x16,0xe9,0x1a,0xe5,0x1e,0xe1,0x22,0xdd,0x26,0xd9,0x2a,0xd5,0x2e,0xd1,0x32,0xcd,0x36,0xc9,0x3a,0xc5,0x3e,0xc1,0x42,0xbd,0x46,0xb9,0x4a,0xb5,0x4e,0xb1,0x52,0xad,0x56,0xa9,0x5a,0xa5,0x5e,0xa1,0x62,0x9d,0x66,0x99,0x6a,0x95,0x6e,0x91,0x72,0x8d,0x76,0x89,0x7a,0x85,0x7e,0x81};
|
|
static unsigned char dequant_ep_0q[5] = {0,64,128,192,255};
|
|
static unsigned char dequant_ep_1q[10] = {0x00,0xff,0x1c,0xe3,0x38,0xc7,0x54,0xab,0x71,0x8e};
|
|
static unsigned char dequant_ep_2q[20] = {0x00,0xff,0x43,0xbc,0x0d,0xf2,0x50,0xaf,0x1b,0xe4,0x5e,0xa1,0x28,0xd7,0x6b,0x94,0x36,0xc9,0x79,0x86};
|
|
static unsigned char dequant_ep_3q[40] = {0x00,0xff,0x20,0xdf,0x41,0xbe,0x61,0x9e,0x06,0xf9,0x27,0xd8,0x47,0xb8,0x68,0x97,0x0d,0xf2,0x2d,0xd2,0x4e,0xb1,0x6e,0x91,0x13,0xec,0x34,0xcb,0x54,0xab,0x75,0x8a,0x1a,0xe5,0x3a,0xc5,0x5b,0xa4,0x7b,0x84};
|
|
static unsigned char dequant_ep_4q[80] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x40,0xbf,0x50,0xaf,0x60,0x9f,0x70,0x8f,0x03,0xfc,0x13,0xec,0x23,0xdc,0x33,0xcc,0x43,0xbc,0x53,0xac,0x64,0x9b,0x74,0x8b,0x06,0xf9,0x16,0xe9,0x26,0xd9,0x36,0xc9,0x47,0xb8,0x57,0xa8,0x67,0x98,0x77,0x88,0x09,0xf6,0x19,0xe6,0x2a,0xd5,0x3a,0xc5,0x4a,0xb5,0x5a,0xa5,0x6a,0x95,0x7a,0x85,0x0d,0xf2,0x1d,0xe2,0x2d,0xd2,0x3d,0xc2,0x4d,0xb2,0x5d,0xa2,0x6d,0x92,0x7d,0x82};
|
|
static unsigned char dequant_ep_5q[160]= {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x01,0xfe,0x09,0xf6,0x11,0xee,0x19,0xe6,0x21,0xde,0x29,0xd6,0x31,0xce,0x39,0xc6,0x41,0xbe,0x49,0xb6,0x51,0xae,0x59,0xa6,0x61,0x9e,0x69,0x96,0x71,0x8e,0x79,0x86,0x03,0xfc,0x0b,0xf4,0x13,0xec,0x1b,0xe4,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x04,0xfb,0x0c,0xf3,0x14,0xeb,0x1c,0xe3,0x24,0xdb,0x2c,0xd3,0x34,0xcb,0x3c,0xc3,0x44,0xbb,0x4c,0xb3,0x54,0xab,0x5c,0xa3,0x64,0x9b,0x6c,0x93,0x74,0x8b,0x7c,0x83,0x06,0xf9,0x0e,0xf1,0x16,0xe9,0x1e,0xe1,0x26,0xd9,0x2e,0xd1,0x36,0xc9,0x3e,0xc1,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
|
|
static const struct
|
|
{
|
|
unsigned char extra, bits, *dequant;
|
|
} astc_epvmode[] =
|
|
{
|
|
{0,1, dequant_ep_1b}, //2
|
|
{1,0, dequant_ep_0t}, //3
|
|
{0,2, dequant_ep_2b}, //4
|
|
{2,0, dequant_ep_0q}, //5
|
|
{1,1, dequant_ep_1t}, //6
|
|
{0,3, dequant_ep_3b}, //8
|
|
{2,1, dequant_ep_1q}, //10
|
|
{1,2, dequant_ep_2t}, //12
|
|
{0,4, dequant_ep_4b}, //16
|
|
{2,2, dequant_ep_2q}, //20
|
|
{1,3, dequant_ep_3t}, //24
|
|
{0,5, dequant_ep_5b}, //32
|
|
{2,3, dequant_ep_3q}, //40
|
|
{1,4, dequant_ep_4t}, //48
|
|
{0,6, dequant_ep_6b}, //64
|
|
{2,4, dequant_ep_4q}, //80
|
|
{1,5, dequant_ep_5t}, //96
|
|
{0,7, dequant_ep_7b}, //128
|
|
{2,5, dequant_ep_5q}, //160
|
|
{1,6, dequant_ep_6t}, //192
|
|
{0,8, dequant_ep_8b}, //256
|
|
//other modes don't make any sense
|
|
};
|
|
/*static void ASTC_CalcDequant(void)
|
|
{
|
|
int i;
|
|
|
|
int extra = 0;
|
|
int bits = 1;
|
|
int isweight = 1;
|
|
int targbits = isweight?6:8;
|
|
int v;
|
|
|
|
static qboolean nospam;
|
|
if (nospam)
|
|
return;
|
|
nospam = true;
|
|
|
|
//binary:
|
|
if (!extra)
|
|
{
|
|
for (bits = 1; bits <= (isweight?5:8); bits++)
|
|
{
|
|
Con_Printf("table: %s_%ib", isweight?"weight":"ep", bits);
|
|
for (i = 0; i < (1<<bits); i++)
|
|
{
|
|
v = i;
|
|
v<<=(targbits-bits);
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
|
|
if (isweight && v > 32)
|
|
v++; //0-64 instead of 0-63
|
|
|
|
Con_Printf("0x%02x,", v);
|
|
}
|
|
Con_Printf("\n");
|
|
}
|
|
}
|
|
else if (extra == 1)
|
|
{
|
|
int A,B,C,D;
|
|
|
|
for (bits = 1; bits <= (isweight?3:6); bits++)
|
|
{
|
|
Con_Printf("table: %s_%it:\n", isweight?"weight":"ep", bits);
|
|
for (i = 0; i < ((2<<bits)|(1<<bits)); i++)
|
|
{
|
|
switch(bits)
|
|
{
|
|
case 1:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = 0;
|
|
C = isweight?50:204;
|
|
D = i>>bits;
|
|
break;
|
|
case 2:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = ((i>>1)&1) * (isweight?0b1000101:0b100010110);
|
|
C = isweight?25:93;
|
|
D = i>>bits;
|
|
break;
|
|
case 3:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = ((i>>1)&1) * (isweight?0b0100001:0b010000101); //b
|
|
B|= ((i>>2)&1) * (isweight?0b1000010:0b100001010); //c
|
|
C = isweight?11:44;
|
|
D = i>>bits;
|
|
break;
|
|
case 4:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b001000001; //b
|
|
B|= ((i>>2)&1) * 0b010000010; //c
|
|
B|= ((i>>3)&1) * 0b100000100; //d
|
|
C = 22;
|
|
D = i>>bits;
|
|
break;
|
|
case 5:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b000100000; //b
|
|
B|= ((i>>2)&1) * 0b001000000; //c
|
|
B|= ((i>>3)&1) * 0b010000001; //d
|
|
B|= ((i>>4)&1) * 0b100000010; //e
|
|
C = 11;
|
|
D = i>>bits;
|
|
break;
|
|
case 6:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b000010000; //b
|
|
B|= ((i>>2)&1) * 0b000100000; //c
|
|
B|= ((i>>3)&1) * 0b001000000; //d
|
|
B|= ((i>>4)&1) * 0b010000000; //e
|
|
B|= ((i>>5)&1) * 0b100000001; //f
|
|
C = 5;
|
|
D = i>>bits;
|
|
break;
|
|
}
|
|
v = D * C + B;
|
|
v = v ^ A;
|
|
v = (A & (isweight?0x20:0x80)) | (v >> 2);
|
|
|
|
if (isweight && v > 32)
|
|
v++; //0-64 instead of 0-63
|
|
|
|
Con_Printf("0x%02x,", v);
|
|
}
|
|
Con_Printf("\n");
|
|
}
|
|
}
|
|
else if (extra == 2)
|
|
{
|
|
int A,B,C,D;
|
|
|
|
for (bits = 1; bits <= (isweight?2:5); bits++)
|
|
{
|
|
Con_Printf("table: %s_%iq:\n", isweight?"weight":"ep", bits);
|
|
for (i = 0; i < ((4<<bits)|(1<<bits)); i++)
|
|
{
|
|
switch(bits)
|
|
{
|
|
case 1:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = 0;
|
|
C = isweight?23:113;
|
|
D = i>>bits;
|
|
break;
|
|
case 2:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = ((i>>1)&1) * (isweight?0b1000010:0b100001100);
|
|
C = isweight?13:54;
|
|
D = i>>bits;
|
|
break;
|
|
case 3:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b010000010; //b
|
|
B|= ((i>>2)&1) * 0b100000101; //c
|
|
C = 26;
|
|
D = i>>bits;
|
|
break;
|
|
case 4:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b001000000; //b
|
|
B|= ((i>>2)&1) * 0b010000001; //c
|
|
B|= ((i>>3)&1) * 0b100000010; //d
|
|
C = 13;
|
|
D = i>>bits;
|
|
break;
|
|
case 5:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b000100000; //b
|
|
B|= ((i>>2)&1) * 0b001000000; //c
|
|
B|= ((i>>3)&1) * 0b010000000; //d
|
|
B|= ((i>>4)&1) * 0b100000001; //e
|
|
C = 6;
|
|
D = i>>bits;
|
|
break;
|
|
}
|
|
v = D * C + B;
|
|
v = v ^ A;
|
|
v = (A & (isweight?0x20:0x80)) | (v >> 2);
|
|
|
|
if (isweight && v > 32)
|
|
v++; //0-64 instead of 0-63
|
|
|
|
Con_Printf("0x%02x,", v);
|
|
}
|
|
Con_Printf("\n");
|
|
}
|
|
}
|
|
}*/
|
|
|
|
static void ASTC_blue_contract(int *out, int r, int g, int b, int a)
|
|
{
|
|
out[0] = (r+b) >> 1;
|
|
out[1] = (g+b) >> 1;
|
|
out[2] = b;
|
|
out[3] = a;
|
|
}
|
|
static int ASTC_bit_transfer_signed(int a, unsigned char *b) //returns new value for a.
|
|
{
|
|
*b >>= 1;
|
|
*b |= a & 0x80;
|
|
a >>= 1;
|
|
a &= 0x3F;
|
|
if((a&0x20)!=0)
|
|
a=a-0x40;
|
|
return a;
|
|
}
|
|
static void ASTC_clamp_unorm8(int *c)
|
|
{
|
|
c[0] = bound(0, c[0], 255);
|
|
c[1] = bound(0, c[1], 255);
|
|
c[2] = bound(0, c[2], 255);
|
|
c[3] = bound(0, c[3], 255);
|
|
}
|
|
|
|
#ifdef ASTC_WITH_HDR
|
|
static void ASTC_HDR_Mode_2(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int y0,y1;
|
|
if(v[1] >= v[0])
|
|
{
|
|
y0 = (v[0] << 4);
|
|
y1 = (v[1] << 4);
|
|
}
|
|
else
|
|
{
|
|
y0 = (v[1] << 4) + 8;
|
|
y1 = (v[0] << 4) - 8;
|
|
}
|
|
Vector4Set(p->ep[0], y0, y0, y0, 0x780);
|
|
Vector4Set(p->ep[1], y1, y1, y1, 0x780);
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_3(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int y0, y1, d;
|
|
if((v[0]&0x80) != 0)
|
|
{
|
|
y0 = ((v[1] & 0xE0) << 4) | ((v[0] & 0x7F) << 2);
|
|
d = (v[1] & 0x1F) << 2;
|
|
}
|
|
else
|
|
{
|
|
y0 = ((v[1] & 0xF0) << 4) | ((v[0] & 0x7F) << 1);
|
|
d = (v[1] & 0x0F) << 1;
|
|
}
|
|
|
|
y1 = y0 + d;
|
|
if(y1 > 0xFFF)
|
|
y1 = 0xFFF;
|
|
|
|
Vector4Set(p->ep[0], y0, y0, y0, 0x780);
|
|
Vector4Set(p->ep[1], y1, y1, y1, 0x780);
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_7(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int modeval = ((v[0]&0xC0)>>6) | ((v[1]&0x80)>>5) | ((v[2]&0x80)>>4);
|
|
int majcomp;
|
|
int mode;
|
|
static const int shamts[6] = { 1,1,2,3,4,5 };
|
|
int shamt,t;
|
|
|
|
int red, green, blue, scale;
|
|
int x0,x1,x2,x3,x4,x5,x6,ohm;
|
|
|
|
if( (modeval & 0xC ) != 0xC )
|
|
{
|
|
majcomp = modeval >> 2;
|
|
mode = modeval & 3;
|
|
}
|
|
else if( modeval != 0xF )
|
|
{
|
|
majcomp = modeval & 3;
|
|
mode = 4;
|
|
}
|
|
else
|
|
{
|
|
majcomp = 0; mode = 5;
|
|
}
|
|
|
|
red = v[0] & 0x3f;
|
|
green = v[1] & 0x1f;
|
|
blue = v[2] & 0x1f;
|
|
scale = v[3] & 0x1f;
|
|
|
|
x0 = (v[1] >> 6) & 1; x1 = (v[1] >> 5) & 1;
|
|
x2 = (v[2] >> 6) & 1; x3 = (v[2] >> 5) & 1;
|
|
x4 = (v[3] >> 7) & 1; x5 = (v[3] >> 6) & 1;
|
|
x6 = (v[3] >> 5) & 1;
|
|
|
|
ohm = 1 << mode;
|
|
if( ohm & 0x30 ) green |= x0 << 6;
|
|
if( ohm & 0x3A ) green |= x1 << 5;
|
|
if( ohm & 0x30 ) blue |= x2 << 6;
|
|
if( ohm & 0x3A ) blue |= x3 << 5;
|
|
if( ohm & 0x3D ) scale |= x6 << 5;
|
|
if( ohm & 0x2D ) scale |= x5 << 6;
|
|
if( ohm & 0x04 ) scale |= x4 << 7;
|
|
if( ohm & 0x3B ) red |= x4 << 6;
|
|
if( ohm & 0x04 ) red |= x3 << 6;
|
|
if( ohm & 0x10 ) red |= x5 << 7;
|
|
if( ohm & 0x0F ) red |= x2 << 7;
|
|
if( ohm & 0x05 ) red |= x1 << 8;
|
|
if( ohm & 0x0A ) red |= x0 << 8;
|
|
if( ohm & 0x05 ) red |= x0 << 9;
|
|
if( ohm & 0x02 ) red |= x6 << 9;
|
|
if( ohm & 0x01 ) red |= x3 << 10;
|
|
if( ohm & 0x02 ) red |= x5 << 10;
|
|
|
|
shamt = shamts[mode];
|
|
red <<= shamt; green <<= shamt; blue <<= shamt; scale <<= shamt;
|
|
|
|
if( mode != 5 ) { green = red - green; blue = red - blue; }
|
|
|
|
if( majcomp == 1 )
|
|
{
|
|
t = red;
|
|
red = green;
|
|
green = t;
|
|
}
|
|
if( majcomp == 2 )
|
|
{
|
|
t = red;
|
|
red = blue;
|
|
blue = t;
|
|
}
|
|
|
|
p->ep[1][0] = bound( 0, red, 0xFFF );
|
|
p->ep[1][1] = bound( 0, green, 0xFFF );
|
|
p->ep[1][2] = bound( 0, blue, 0xFFF );
|
|
|
|
p->ep[0][0] = bound( 0, red - scale, 0xFFF );
|
|
p->ep[0][1] = bound( 0, green - scale, 0xFFF );
|
|
p->ep[0][2] = bound( 0, blue - scale, 0xFFF );
|
|
|
|
p->ep[1][3] = p->ep[0][3] = 0x780;
|
|
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_11(struct astc_part *p, unsigned char *v)
|
|
{
|
|
static const int dbitstab[8] = {7,6,7,6,5,6,5,6};
|
|
int shamt;
|
|
int majcomp = ((v[4] & 0x80) >> 7) | ((v[5] & 0x80) >> 6);
|
|
int mode,va,vb0,vb1,vc,vd0,vd1;
|
|
int x0,x1,x2,x3,x4,x5,ohm;
|
|
|
|
if( majcomp == 3 )
|
|
{
|
|
Vector4Set(p->ep[0], v[0] << 4, v[2] << 4, (v[4] & 0x7f) << 5, 0x780);
|
|
Vector4Set(p->ep[1], v[1] << 4, v[3] << 4, (v[5] & 0x7f) << 5, 0x780);
|
|
p->hdr = 0xf;
|
|
return;
|
|
}
|
|
|
|
mode = ((v[1]&0x80)>>7) | ((v[2]&0x80)>>6) | ((v[3]&0x80)>>5);
|
|
va = v[0] | ((v[1] & 0x40) << 2);
|
|
vb0 = v[2] & 0x3f;
|
|
vb1 = v[3] & 0x3f;
|
|
vc = v[1] & 0x3f;
|
|
vd0 = v[4] & 0x7f;
|
|
vd1 = v[5] & 0x7f;
|
|
|
|
if (vd0 & (1<<(dbitstab[mode]-1)))
|
|
vd0 |= -1 & ~((1u<<dbitstab[mode])-1);
|
|
if (vd1 & (1<<(dbitstab[mode]-1)))
|
|
vd1 |= -1 & ~((1u<<dbitstab[mode])-1);
|
|
|
|
x0 = (v[2] >> 6) & 1;
|
|
x1 = (v[3] >> 6) & 1;
|
|
x2 = (v[4] >> 6) & 1;
|
|
x3 = (v[5] >> 6) & 1;
|
|
x4 = (v[4] >> 5) & 1;
|
|
x5 = (v[5] >> 5) & 1;
|
|
|
|
ohm = 1 << mode;
|
|
if( ohm & 0xA4 ) va |= x0 << 9;
|
|
if( ohm & 0x08 ) va |= x2 << 9;
|
|
if( ohm & 0x50 ) va |= x4 << 9;
|
|
if( ohm & 0x50 ) va |= x5 << 10;
|
|
if( ohm & 0xA0 ) va |= x1 << 10;
|
|
if( ohm & 0xC0 ) va |= x2 << 11;
|
|
if( ohm & 0x04 ) vc |= x1 << 6;
|
|
if( ohm & 0xE8 ) vc |= x3 << 6;
|
|
if( ohm & 0x20 ) vc |= x2 << 7;
|
|
if( ohm & 0x5B ) vb0 |= x0 << 6;
|
|
if( ohm & 0x5B ) vb1 |= x1 << 6;
|
|
if( ohm & 0x12 ) vb0 |= x2 << 7;
|
|
if( ohm & 0x12 ) vb1 |= x3 << 7;
|
|
|
|
// Now shift up so that major component is at top of 12-bit value
|
|
shamt = (mode >> 1) ^ 3;
|
|
va <<= shamt; vb0 <<= shamt; vb1 <<= shamt;
|
|
vc <<= shamt; vd0 <<= shamt; vd1 <<= shamt;
|
|
|
|
p->ep[1][0] = bound( 0, va, 0xFFF );
|
|
p->ep[1][1] = bound( 0, va - vb0, 0xFFF );
|
|
p->ep[1][2] = bound( 0, va - vb1, 0xFFF );
|
|
|
|
p->ep[0][0] = bound( 0, va - vc, 0xFFF );
|
|
p->ep[0][1] = bound( 0, va - vb0 - vc - vd0, 0xFFF );
|
|
p->ep[0][2] = bound( 0, va - vb1 - vc - vd1, 0xFFF );
|
|
|
|
if( majcomp == 1 )
|
|
{
|
|
p->ep[0][3] = p->ep[0][0];
|
|
p->ep[0][0] = p->ep[0][1];
|
|
p->ep[0][1] = p->ep[0][3];
|
|
p->ep[1][3] = p->ep[1][0];
|
|
p->ep[1][0] = p->ep[1][1];
|
|
p->ep[1][1] = p->ep[1][3];
|
|
}
|
|
else if( majcomp == 2 )
|
|
{
|
|
p->ep[0][3] = p->ep[0][0];
|
|
p->ep[0][0] = p->ep[0][2];
|
|
p->ep[0][2] = p->ep[0][3];
|
|
p->ep[1][3] = p->ep[1][0];
|
|
p->ep[1][0] = p->ep[1][2];
|
|
p->ep[1][2] = p->ep[1][3];
|
|
}
|
|
|
|
p->ep[0][3] = p->ep[1][3] = 0x780;
|
|
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_14(struct astc_part *p, unsigned char *v)
|
|
{
|
|
ASTC_HDR_Mode_11(p, v);
|
|
|
|
p->ep[0][3] = v[6];
|
|
p->ep[1][3] = v[7];
|
|
p->hdr &= 0x7;
|
|
}
|
|
static void ASTC_HDR_Mode_15(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int v6=v[6], v7=v[7];
|
|
int mode;
|
|
ASTC_HDR_Mode_11(p,v);
|
|
|
|
mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
|
|
v6 &= 0x7F;
|
|
v7 &= 0x7F;
|
|
|
|
if(mode==3)
|
|
{
|
|
p->ep[0][3] = v6 << 5;
|
|
p->ep[1][3] = v7 << 5;
|
|
}
|
|
else
|
|
{
|
|
v6 |= (v7 << (mode+1)) & 0x780;
|
|
v7 &= (0x3F >> mode);
|
|
v7 ^= 0x20 >> mode;
|
|
v7 -= 0x20 >> mode;
|
|
v6 <<= (4-mode);
|
|
v7 <<= (4-mode);
|
|
|
|
v7 += v6;
|
|
v7 = bound(0, v7, 0xFFF);
|
|
p->ep[0][3] = v6;
|
|
p->ep[1][3] = v7;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static void ASTC_DecodeEndpoints(struct astc_block_info *b, unsigned char *v)
|
|
{
|
|
int i, t0, t1, t3, t5, t7;
|
|
|
|
for (i = 0; i < b->partitions; i++)
|
|
{
|
|
#ifdef ASTC_WITH_HDR
|
|
b->part[i].hdr = 0;
|
|
#endif
|
|
switch (b->part[i].mode & 15)
|
|
{
|
|
#ifdef ASTC_WITH_HDR
|
|
case 2: //HDR Luminance, large range
|
|
ASTC_HDR_Mode_2(&b->part[i], v);
|
|
break;
|
|
case 3: //HDR Luminance, small range
|
|
ASTC_HDR_Mode_3(&b->part[i], v);
|
|
break;
|
|
case 7: //HDR RGB, base+scale
|
|
ASTC_HDR_Mode_7(&b->part[i], v);
|
|
break;
|
|
case 11: //HDR RGB
|
|
ASTC_HDR_Mode_11(&b->part[i], v);
|
|
break;
|
|
case 14: //HDR RGB + LDR Alpha
|
|
ASTC_HDR_Mode_14(&b->part[i], v);
|
|
break;
|
|
case 15: //HDR RGB + HDR Alpha
|
|
ASTC_HDR_Mode_15(&b->part[i], v);
|
|
break;
|
|
#endif
|
|
default: //the error colour - for unsupported hdr endpoints. unreachable when hdr is enabled. just fill it with the error colour.
|
|
Vector4Set(b->part[i].ep[0], 0xff, 0, 0xff, 0xff);
|
|
Vector4Set(b->part[i].ep[1], 0xff, 0, 0xff, 0xff);
|
|
break;
|
|
|
|
case 0: //LDR Luminance, direct
|
|
Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], 0xff);
|
|
Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], 0xff);
|
|
break;
|
|
case 1: //LDR Luminance, base+offset
|
|
t0 = (v[0]>>2)|(v[1]&0xc0);
|
|
t1 = t0+(v[1]&0x3f);
|
|
if (t1>0xff)
|
|
t1=0xff;
|
|
Vector4Set(b->part[i].ep[0], t0, t0, t0, 0xff);
|
|
Vector4Set(b->part[i].ep[1], t1, t1, t1, 0xff);
|
|
break;
|
|
case 4: //LDR Luminance+Alpha,direct
|
|
Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], v[2]);
|
|
Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], v[3]);
|
|
break;
|
|
case 5: //LDR Luminance+Alpha, base+offset
|
|
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
|
|
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
|
|
Vector4Set(b->part[i].ep[0],v[0],v[0],v[0],v[2]);
|
|
Vector4Set(b->part[i].ep[1],v[0]+t1,v[0]+t1,v[0]+t1,v[2]+t3);
|
|
ASTC_clamp_unorm8(b->part[i].ep[0]);
|
|
ASTC_clamp_unorm8(b->part[i].ep[1]);
|
|
break;
|
|
case 6: //LDR RGB, base+scale
|
|
Vector4Set(b->part[i].ep[0], ((int)v[0]*(int)v[3])>>8, ((int)v[1]*(int)v[3])>>8, ((int)v[2]*(int)v[3])>>8, 0xff);
|
|
Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], 0xff);
|
|
break;
|
|
case 8: //LDR RGB, Direct
|
|
t0 = (int)v[0]+(int)v[2]+(int)v[4];
|
|
t1 = (int)v[1]+(int)v[3]+(int)v[5];
|
|
if (t1>=t0)
|
|
{
|
|
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],0xff);
|
|
Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],0xff);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5], 0xff);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
|
|
}
|
|
break;
|
|
case 9: //LDR RGB, base+offset
|
|
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
|
|
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
|
|
t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
|
|
if(t1+t3+t5 >= 0)
|
|
{
|
|
Vector4Set(b->part[i].ep[0],v[0],v[2],v[4],0xff);
|
|
Vector4Set(b->part[i].ep[1],v[0]+t1,v[2]+t3,v[4]+t5,0xff);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5, 0xff);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
|
|
}
|
|
ASTC_clamp_unorm8(b->part[i].ep[0]);
|
|
ASTC_clamp_unorm8(b->part[i].ep[1]);
|
|
break;
|
|
case 10: //LDR RGB, base+scale plus two A
|
|
Vector4Set(b->part[i].ep[0], ((int)v[0]*v[3])>>8, ((int)v[1]*v[3])>>8, ((int)v[2]*v[3])>>8, v[4]);
|
|
Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], v[5]);
|
|
break;
|
|
case 12: //LDR RGBA, direct
|
|
if (v[1]+(int)v[3]+v[5]>=v[0]+(int)v[2]+v[4])
|
|
{
|
|
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
|
|
Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],v[7]);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5],v[7]);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
|
|
}
|
|
break;
|
|
case 13: //LDR RGBA, base+offset
|
|
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
|
|
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
|
|
t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
|
|
t7 = ASTC_bit_transfer_signed(v[7],&v[6]);
|
|
if(t1+t3+t5>=0)
|
|
{
|
|
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
|
|
Vector4Set(b->part[i].ep[1], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
|
|
}
|
|
ASTC_clamp_unorm8(b->part[i].ep[0]);
|
|
ASTC_clamp_unorm8(b->part[i].ep[1]);
|
|
break;
|
|
}
|
|
v += ((b->part[i].mode>>2)+1)<<1;
|
|
}
|
|
}
|
|
static void ASTC_ReadEndpoints(struct astc_block_info *b)
|
|
{
|
|
int i;
|
|
int cembits;
|
|
|
|
unsigned char epv[18]; //maximum raw endpoint values,
|
|
char epvalues;
|
|
unsigned char gahffs[16], t;
|
|
|
|
//figure out how many raw values we need
|
|
epvalues = 0;
|
|
for (i = 0; i < b->partitions; i++)
|
|
epvalues += ((b->part[i].mode>>2)+1)<<1;
|
|
if (epvalues > countof(epv))
|
|
{
|
|
b->status = ASTC_ERROR;
|
|
return;
|
|
}
|
|
|
|
//the endpoint bits are encoded using the largest size available that'll still fit, yielding raw values between 0-255.
|
|
for(i = countof(astc_epvmode)-1; i >= 0; i--)
|
|
{
|
|
cembits = ASTC_DecodeSize(epvalues, astc_epvmode[i].bits, astc_epvmode[i].extra);
|
|
if(cembits <= b->ep_bits)
|
|
{
|
|
//read the values.
|
|
ASTC_Decode(b->in, epv, epvalues, b->config_bits, astc_epvmode[i].bits, astc_epvmode[i].extra, astc_epvmode[i].dequant);
|
|
//and decode them.
|
|
ASTC_DecodeEndpoints(b, epv);
|
|
|
|
//weight bits are backwards (gah! ffs!)
|
|
//so swap them around so our decode function doesn't need to care
|
|
for (i = 0; i < countof(gahffs); i++)
|
|
{
|
|
t = b->in[i];
|
|
t = (t>>4)|(t<<4);
|
|
t = ((t&0xcc)>>2)|((t&0x33)<<2);
|
|
t = ((t&0xaa)>>1)|((t&0x55)<<1);
|
|
gahffs[15-i] = t;
|
|
}
|
|
//weights are aligned at the end... now the start. gah! ffs!
|
|
ASTC_Decode(gahffs, b->weights, b->wcount[3], 0, astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra, astc_weightmode[b->precision].dequant);
|
|
return;
|
|
}
|
|
}
|
|
b->status = ASTC_ERROR;
|
|
}
|
|
|
|
static unsigned int hash52(unsigned int p)
|
|
{
|
|
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
|
|
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
|
|
p ^= p << 6; p ^= p >> 17;
|
|
return p;
|
|
}
|
|
static int ASTC_ChoosePartition(int seed, int x, int y, int z, int partitions, int smallblock)
|
|
{
|
|
int sh1, sh2, sh3, a,b,c,d;
|
|
unsigned int rnum;
|
|
unsigned char seed1,seed2,seed3,seed4,seed5,seed6,seed7,seed8,seed9,seed10,seed11,seed12;
|
|
if (partitions==1)
|
|
return 0;
|
|
if (smallblock)
|
|
{
|
|
x <<= 1;
|
|
y <<= 1;
|
|
z <<= 1;
|
|
}
|
|
seed += (partitions-1) * 1024;
|
|
rnum = hash52(seed);
|
|
seed1 = rnum & 0xF;
|
|
seed2 = (rnum >> 4) & 0xF;
|
|
seed3 = (rnum >> 8) & 0xF;
|
|
seed4 = (rnum >> 12) & 0xF;
|
|
seed5 = (rnum >> 16) & 0xF;
|
|
seed6 = (rnum >> 20) & 0xF;
|
|
seed7 = (rnum >> 24) & 0xF;
|
|
seed8 = (rnum >> 28) & 0xF;
|
|
seed9 = (rnum >> 18) & 0xF;
|
|
seed10 = (rnum >> 22) & 0xF;
|
|
seed11 = (rnum >> 26) & 0xF;
|
|
seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
|
|
|
|
seed1 *= seed1; seed2 *= seed2;
|
|
seed3 *= seed3; seed4 *= seed4;
|
|
seed5 *= seed5; seed6 *= seed6;
|
|
seed7 *= seed7; seed8 *= seed8;
|
|
seed9 *= seed9; seed10 *= seed10;
|
|
seed11 *= seed11; seed12 *= seed12;
|
|
|
|
|
|
if (seed & 1)
|
|
{
|
|
sh1 = ((seed&2) ? 4:5);
|
|
sh2 = ((partitions==3) ? 6:5);
|
|
}
|
|
else
|
|
{
|
|
sh1 = ((partitions==3) ? 6:5);
|
|
sh2 = ((seed&2) ? 4:5);
|
|
}
|
|
sh3 = (seed & 0x10) ? sh1 : sh2;
|
|
|
|
seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
|
|
seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
|
|
seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
|
|
|
|
a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
|
|
b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
|
|
c = seed5*x + seed6*y + seed9 *z + (rnum >> 6);
|
|
d = seed7*x + seed8*y + seed10*z + (rnum >> 2);
|
|
|
|
a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
|
|
|
|
if (partitions < 4)
|
|
d = 0;
|
|
if (partitions < 3)
|
|
c = 0;
|
|
|
|
if (a >= b && a >= c && a >= d)
|
|
return 0;
|
|
else if (b >= c && b >= d)
|
|
return 1;
|
|
else if (c >= d)
|
|
return 2;
|
|
else
|
|
return 3;
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_LDR
|
|
//Spits out 8-bit RGBA data for a single block. Any HDR blocks will result in the error colour.
|
|
//sRGB can be applied by the caller, if needed.
|
|
ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride, int layerstride, int bw, int bh, int bd)
|
|
{
|
|
struct astc_block_info b;
|
|
int x, y;
|
|
int stride = pixstride*4;
|
|
#ifdef ASTC_WITH_3D
|
|
int z;
|
|
layerstride = layerstride*4-(stride*bh);
|
|
#else
|
|
if (bd != 1)
|
|
return; //error!
|
|
#endif
|
|
b.in = in;
|
|
b.blocksize[0] = bw;
|
|
b.blocksize[1] = bh;
|
|
b.blocksize[2] = bd;
|
|
ASTC_ReadBlockMode(&b);
|
|
|
|
if (b.status == ASTC_VOID_LDR)
|
|
{ //void extent
|
|
//Note: we don't validate the extents.
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = in[9];
|
|
out[(x<<2)+1] = in[11];
|
|
out[(x<<2)+2] = in[13];
|
|
out[(x<<2)+3] = in[15];
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadPartitions(&b);
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadEndpoints(&b);
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
{
|
|
#define N b.wcount[0]
|
|
#define M b.wcount[1]
|
|
int s1=1<<b.dualplane,s2=N<<b.dualplane; //values for 2d blocks (3d blocks will override)
|
|
int s3=((bd!=1?N*M:0)+N+1)<<b.dualplane; //small variation for 3d blocks.
|
|
|
|
int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
|
|
int fs, s, ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
|
|
int ft, t, dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
|
|
#ifdef ASTC_WITH_3D
|
|
int fr, r, dr = (1024+b.blocksize[2]/2)/(b.blocksize[2]-1);
|
|
#endif
|
|
int v0, w, w00,w01,w10,w11;
|
|
struct astc_part *p;
|
|
|
|
#ifdef ASTC_WITH_HDR
|
|
for (x = 0; x < b.partitions; x++)
|
|
{ //the LDR profile treats HDR endpoints as the error colour. this is per-partition rather than per-block.
|
|
if (b.part[x].hdr)
|
|
{
|
|
Vector4Set(b.part[x].ep[0], 0xff, 0, 0xff, 0xff);
|
|
Vector4Set(b.part[x].ep[1], 0xff, 0, 0xff, 0xff);
|
|
}
|
|
//else FIXME: when spitting out 8bit, we're meant to have an extra 9th bit which is always set, in order to avoid round-to-zero biasing the result of the final 8 bits.
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride-stride*bh)
|
|
#endif
|
|
{
|
|
#ifdef ASTC_WITH_3D
|
|
r = ((dr*z)*(b.wcount[2]-1)+32)>>6;
|
|
fr=r&0xf;
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
{
|
|
t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
|
|
ft=t&0xf;
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
|
|
s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
|
|
fs=s&0xf;
|
|
#ifdef ASTC_WITH_3D
|
|
if (bd != 1)
|
|
{ //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us.
|
|
if (fs>fr)
|
|
{ //figure out which weights/factors to use.
|
|
if (ft>fr)
|
|
{
|
|
if (fs>ft)
|
|
s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr;
|
|
else
|
|
s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr;
|
|
}
|
|
else
|
|
s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft;
|
|
}
|
|
else
|
|
{
|
|
if (fs>ft)
|
|
s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft;
|
|
else
|
|
{
|
|
if (ft>fr)
|
|
s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs;
|
|
else
|
|
s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs;
|
|
}
|
|
}
|
|
|
|
s1 <<= b.dualplane;
|
|
s2 <<= b.dualplane;
|
|
s2+=s1;
|
|
//s3 = (N*M+N+1)<<b.dualplane;
|
|
v0 = ((s>>4)+(t>>4)*N+(r>>4)*N*M) << b.dualplane;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
//s1 = 1<<b.dualplane;
|
|
//s2 = (N)<<b.dualplane;
|
|
//s3 = (N+1)<<b.dualplane;
|
|
w11 = (fs*ft+8) >> 4;
|
|
w10 = ft - w11;
|
|
w01 = fs - w11;
|
|
w00 = 16 - fs - ft + w11;
|
|
v0 = ((s>>4)+(t>>4)*N) << b.dualplane;
|
|
}
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+0] = ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6;
|
|
out[(x<<2)+1] = ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6;
|
|
out[(x<<2)+2] = ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6;
|
|
out[(x<<2)+3] = ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6;
|
|
|
|
if (b.dualplane)
|
|
{ //dual planes has a second set of weights that override a single channel
|
|
v0++;
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+b.ccs] = ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#undef N
|
|
#undef M
|
|
}
|
|
else
|
|
{ //error colour == magenta
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride)
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = 0xff;
|
|
out[(x<<2)+1] = 0;
|
|
out[(x<<2)+2] = 0xff;
|
|
out[(x<<2)+3] = 0xff;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_HDR
|
|
static unsigned short ASTC_GenHalffloat(int hdr, int rawval)
|
|
{
|
|
if (hdr)
|
|
{
|
|
int fp16, m;
|
|
fp16 = (rawval&0xF800) >> 1;
|
|
m = rawval&0x7FF;
|
|
if (m < 512)
|
|
fp16 |= (3*m)>>3;
|
|
else if (m >= 1536)
|
|
fp16 |= (5*m - 2048)>>3;
|
|
else
|
|
fp16 |= (4*m - 512)>>3;
|
|
return fp16;
|
|
}
|
|
else
|
|
{
|
|
union
|
|
{
|
|
float f;
|
|
unsigned int u;
|
|
} u = {rawval/65535.0};
|
|
int e = 0;
|
|
int m;
|
|
|
|
e = ((u.u>>23)&0xff) - 127;
|
|
if (e < -15)
|
|
return 0; //too small exponent, treat it as a 0 denormal
|
|
if (e > 15)
|
|
m = 0; //infinity instead of a nan
|
|
else
|
|
m = (u.u&((1<<23)-1))>>13;
|
|
return ((e+15)<<10) | m;
|
|
}
|
|
}
|
|
|
|
//Spits out half-float RGBA data for a single block.
|
|
ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride, int layerstride, int bw, int bh, int bd)
|
|
{
|
|
int x, y;
|
|
int stride = pixstride*4;
|
|
struct astc_block_info b;
|
|
#ifdef ASTC_WITH_3D
|
|
int z;
|
|
layerstride = layerstride*4-(stride*bh);
|
|
#else
|
|
if (bd != 1)
|
|
return; //error!
|
|
#endif
|
|
b.in = in;
|
|
b.blocksize[0] = bw;
|
|
b.blocksize[1] = bh;
|
|
b.blocksize[2] = bd;
|
|
|
|
ASTC_ReadBlockMode(&b);
|
|
|
|
if (b.status == ASTC_VOID_HDR)
|
|
{ //void extent
|
|
//Note: we don't validate the extents.
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{ //hdr void extents already use fp16
|
|
out[(x<<2)+0] = in[8] | (in[9]<<8);
|
|
out[(x<<2)+1] = in[10] | (in[11]<<8);
|
|
out[(x<<2)+2] = in[12] | (in[13]<<8);
|
|
out[(x<<2)+3] = in[14] | (in[15]<<8);
|
|
}
|
|
return;
|
|
}
|
|
if (b.status == ASTC_VOID_LDR)
|
|
{ //void extent
|
|
//Note: we don't validate the extents.
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = ASTC_GenHalffloat(0, in[8] | (in[9]<<8));
|
|
out[(x<<2)+1] = ASTC_GenHalffloat(0, in[10] | (in[11]<<8));
|
|
out[(x<<2)+2] = ASTC_GenHalffloat(0, in[12] | (in[13]<<8));
|
|
out[(x<<2)+3] = ASTC_GenHalffloat(0, in[14] | (in[15]<<8));
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadPartitions(&b);
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadEndpoints(&b);
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
{
|
|
#define N b.wcount[0]
|
|
#define M b.wcount[1]
|
|
int s1=1<<b.dualplane,s2=N<<b.dualplane; //values for 2d blocks (3d blocks will override)
|
|
int s3=((bd!=1?N*M:0)+N+1)<<b.dualplane; //small variation for 3d blocks.
|
|
|
|
int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
|
|
int fs, s, ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
|
|
int ft, t, dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
|
|
#ifdef ASTC_WITH_3D
|
|
int fr, r, dr = (1024+b.blocksize[2]/2)/(b.blocksize[2]-1);
|
|
#endif
|
|
int v0, w, w00,w01,w10,w11;
|
|
struct astc_part *p;
|
|
|
|
for (x = 0; x < b.partitions; x++)
|
|
{ //we need to do a little extra processing here
|
|
for (y = 0; y < 4; y++)
|
|
{
|
|
if (b.part[x].hdr&(1<<y))
|
|
{ //the 12bit endpoint values are shifted up to 16bit...
|
|
b.part[x].ep[0][y] <<= 4;
|
|
b.part[x].ep[1][y] <<= 4;
|
|
}
|
|
else
|
|
{ //convert to unorm16.
|
|
b.part[x].ep[0][y] |= b.part[x].ep[0][y] << 8;
|
|
b.part[x].ep[1][y] |= b.part[x].ep[1][y] << 8;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride)
|
|
#endif
|
|
{
|
|
#ifdef ASTC_WITH_3D
|
|
r = ((dr*z)*(b.wcount[2]-1)+32)>>6;
|
|
fr=r&0xf;
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
{
|
|
t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
|
|
ft=t&0xf;
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
|
|
s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
|
|
fs=s&0xf;
|
|
#ifdef ASTC_WITH_3D
|
|
if (bd != 1)
|
|
{ //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us.
|
|
if (fs>fr)
|
|
{ //figure out which weights/factors to use.
|
|
if (ft>fr)
|
|
{
|
|
if (fs>ft)
|
|
s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr;
|
|
else
|
|
s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr;
|
|
}
|
|
else
|
|
s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft;
|
|
}
|
|
else
|
|
{
|
|
if (fs>ft)
|
|
s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft;
|
|
else
|
|
{
|
|
if (ft>fr)
|
|
s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs;
|
|
else
|
|
s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs;
|
|
}
|
|
}
|
|
|
|
s1 <<= b.dualplane;
|
|
s2 <<= b.dualplane;
|
|
s2+=s1;
|
|
//s3 = (N*M+N+1)<<b.dualplane;
|
|
v0 = (((s>>4))+((t>>4)*N)+(r>>4)*N*M) << b.dualplane;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
//s1 = 1<<b.dualplane;
|
|
//s2 = (N)<<b.dualplane;
|
|
//s3 = (N+1)<<b.dualplane;
|
|
w11 = (fs*ft+8) >> 4;
|
|
w10 = ft - w11;
|
|
w01 = fs - w11;
|
|
w00 = 16 - fs - ft + w11;
|
|
|
|
v0 = (((s>>4))+(t>>4)*N) << b.dualplane;
|
|
}
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+0] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6);
|
|
out[(x<<2)+1] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6);
|
|
out[(x<<2)+2] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6);
|
|
out[(x<<2)+3] = ASTC_GenHalffloat(p->hdr&8, ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6);
|
|
|
|
if (b.dualplane)
|
|
{ //dual planes has a second set of weights that override a single channel
|
|
v0++;
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+b.ccs] = ASTC_GenHalffloat(p->hdr&(1<<b.ccs), ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#undef N
|
|
#undef M
|
|
}
|
|
else
|
|
{ //error colour == magenta
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride)
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = 0xf<<10;
|
|
out[(x<<2)+1] = 0;
|
|
out[(x<<2)+2] = 0xf<<10;
|
|
out[(x<<2)+3] = 0xf<<10;
|
|
}
|
|
}
|
|
}
|
|
#endif
|