fteqw/engine/client/image_astc.h
Spoike 41b0d993f2 smoother console scrolling (at least with the mouse)
support RLE+luminance+alpha tga files.
support half-float tga files.
recognise hdr astc images.
added appropriate fallbacks for astc support.
load mip-less .astc files (mostly just for debugging stuff).
allow packages to warn about required engine/gpu features.
catch when stdin flags get changed to blocking by external libraries, to avoid fatal stalls.
basic support for .mdx files (kingpin models)
sort packages loaded via wildcards, by datetime then name, to avoid random ordering from certain filesystems.


git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@5531 fc73d0e0-1445-4013-8a0c-d673dee63da5
2019-09-04 08:32:22 +00:00

1567 lines
46 KiB
C

//Note: this code does not claim to be bit-correct.
//It doesn't support volume textures.
//It doesn't validate block extents (and is generally unaware of more than one block anyway)
//It doesn't implement all validation checks, either.
//Do NOT use this code to validate any encoders...
#ifndef ASTC_PUBLIC
#define ASTC_PUBLIC
#endif
#define ASTC_WITH_LDR //comment out this line to disable pure-LDR decoding (the hdr code can still be used).
#define ASTC_WITH_HDR //comment out this line to disable HDR decoding.
#define ASTC_WITH_HDRTEST //comment out this line to disable HDR decoding.
#ifdef ASTC_WITH_LDR
ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride, int bw,int bh); //generates RGBA8 data (gives error colour for hdr blocks!)
#endif
#ifdef ASTC_WITH_HDR
ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride, int bw,int bh); //generates RGBA16F data.
#endif
#ifdef ASTC_WITH_HDRTEST
ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd); //returns true if n consecutive blocks require the HDR profile.
#endif
#include <math.h>
#include <stdio.h>
#include <string.h>
#ifndef Vector4Set
#define Vector4Set(r,x,y,z,w) {(r)[0] = x; (r)[1] = y;(r)[2] = z;(r)[3]=w;}
#endif
#ifndef countof
#define countof(array) (sizeof(array)/sizeof(array[0]))
#endif
#if defined(ASTC_WITH_LDR) || defined(ASTC_WITH_HDR)
#define ASTC_WITH_DECODE
#endif
enum
{
ASTC_OKAY,
ASTC_ERROR, //validation errors
ASTC_UNSUPPORTED_FULL, //volume textures... Note: non-hdr profile errors are per-partition, so not an actual block error.
ASTC_RESERVED, //reserved bits. basically an error but might not be in the future.
ASTC_VOID_LDR, //not an error - the block is a single LDR colour, with an RGBA16 colour in the last 8 bytes.
ASTC_VOID_HDR //not an error - the block is a single HDR colour, with an RGBA16F colour in the last 8 bytes.
};
struct astc_block_info
{
unsigned char *in; //the 16 bytes of the block
char blocksize[3];
char status; //0=regular block, -1=error, etc
unsigned char dualplane; //two sets of weights instead of one.
unsigned char ccs; //second set applies to this component
unsigned char precision; //defines the precision of the weights
int wcount[4]; //x,y,z,total weight counts
int weight_bits; //size of weights section.
int config_bits; //size of header before the endpoint bits
int ep_bits; //size available to endpoints
unsigned char weights[64]; //official limit to the number of weights stored
unsigned char partitions; //number of active partitions to select from (and number of endpoints to read)
unsigned short partindex; //used for deciding which partition each pixel belongs in
struct astc_part
{
char mode; //endpoint modes
#ifdef ASTC_WITH_HDR
char hdr; //endpoint colour mode - &1=rgb, &2=alpha
#endif
int ep[2][4];
} part[4];
};
static unsigned char ASTC_readbits(unsigned char *in, unsigned int offset, unsigned int count)
{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
unsigned short s;
in += offset>>3;
offset &= 7;
s = in[0];
if (offset+count>8)
s |= (in[1]<<8);
s>>=offset;
return s & ((1u<<count)-1);
}
static unsigned int ASTC_readmanybits(unsigned char *in, unsigned int offset, unsigned int count)
{
unsigned int r = 0;
while(count > 8)
{
count -= 8;
r |= ASTC_readbits(in, offset+count, 8)<<count;
}
r |= ASTC_readbits(in, offset, count);
return r;
}
//weights cover a range of 0-64 inclusive
//>32 is +1 (otherwise it would be 0-63)
//high bits are folded over
static unsigned char dequant_weight_1b[1<<1] = {0x00,0x40};
static unsigned char dequant_weight_2b[1<<2] = {0x00,0x15,0x2b,0x40};
static unsigned char dequant_weight_3b[1<<3] = {0x00,0x09,0x12,0x1b,0x25,0x2e,0x37,0x40};
static unsigned char dequant_weight_4b[1<<4] = {0x00,0x04,0x08,0x0c,0x11,0x15,0x19,0x1d,0x23,0x27,0x2b,0x2f,0x34,0x38,0x3c,0x40};
static unsigned char dequant_weight_5b[1<<5] = {0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,0x40};
static unsigned char dequant_weight_0t[3] = {0,32,64};
static unsigned char dequant_weight_1t[6] = {0x00,0x40,0x0c,0x34,0x19,0x27};
static unsigned char dequant_weight_2t[12] = {0x00,0x40,0x11,0x2f,0x06,0x3a,0x17,0x29,0x0c,0x34,0x1d,0x23};
static unsigned char dequant_weight_3t[24] = {0x00,0x40,0x08,0x38,0x10,0x30,0x18,0x28,0x02,0x3e,0x0b,0x35,0x13,0x2d,0x1b,0x25,0x05,0x3b,0x0d,0x33,0x16,0x2a,0x1e,0x22};
static unsigned char dequant_weight_0q[5] = {0,16,32,48,64};
static unsigned char dequant_weight_1q[10] = {0x00,0x40,0x05,0x3b,0x0b,0x35,0x11,0x2f,0x17,0x29};
static unsigned char dequant_weight_2q[20] = {0x00,0x40,0x10,0x30,0x03,0x3d,0x13,0x2d,0x06,0x3a,0x17,0x29,0x09,0x37,0x1a,0x26,0x0d,0x33,0x1d,0x23};
static const struct
{
unsigned char extra, bits, *dequant;
} astc_weightmode[] =
{
{0,0, NULL}, //invalid
{0,0, NULL}, //invalid
{0,1, dequant_weight_1b}, //2
{1,0, dequant_weight_0t}, //3
{0,2, dequant_weight_2b}, //4
{2,0, dequant_weight_0q}, //5
{1,1, dequant_weight_1t}, //6
{0,3, dequant_weight_3b}, //8
{0,0, NULL}, //invalid
{0,0, NULL}, //invalid
{2,1, dequant_weight_1q}, //10
{1,2, dequant_weight_2t}, //12
{0,4, dequant_weight_4b}, //16
{2,2, dequant_weight_2q}, //20
{1,3, dequant_weight_3t}, //24
{0,5, dequant_weight_5b}, //32
};
static unsigned int ASTC_DecodeSize(unsigned int count, unsigned int bits, unsigned char extra)
{
return ((extra==1)?((count*8)+4)/5:0) +
((extra==2)?((count*7)+2)/3:0) +
count*bits;
}
static void ASTC_ReadBlockMode(struct astc_block_info *b)
{
unsigned char *in = b->in;
unsigned short s = ASTC_readmanybits(in, 0, 13);//in[0] | (in[1]<<8);
b->config_bits = 13;
if ((s&0x1ff)==0x1fc)
{ //void extent
if (s&0x200)
b->status = ASTC_VOID_HDR;
else
b->status = ASTC_VOID_LDR;
b->dualplane = b->precision = b->wcount[0] = b->wcount[1] = b->wcount[2] = b->partitions = 0;
return;
}
b->status = ASTC_OKAY;
b->dualplane = (s>>10)&1; //Dp
b->precision = (s>>(9-3))&(1<<3);//P
b->precision |= (s>>4)&1; //p0
if (b->blocksize[2] != 1)
{ //3d blocks have a different layout
b->status = ASTC_UNSUPPORTED_FULL;
}
else
{
b->wcount[2] = 1;
if (s&3)
{ //one of the first 5 layouts...
b->precision|=(s&3)<<1; //p2, p1
if (!(s&8))
{ //first two layouts...
if (!(s&4))
{ //layout0
b->wcount[0] = ((s>>7)&3)+4;
b->wcount[1] = ((s>>5)&3)+2;
}
else
{ //layout1
b->wcount[0] = ((s>>7)&3)+8;
b->wcount[1] = ((s>>5)&3)+2;
}
}
else if (!(s&4))
{ //layout2
b->wcount[0] = ((s>>5)&3)+2;
b->wcount[1] = ((s>>7)&3)+8;
}
else if (!(s&256))
{ //layout3
b->wcount[0] = ((s>>5)&3)+2;
b->wcount[1] = ((s>>7)&1)+6;
}
else
{ //layout4
b->wcount[0] = ((s>>7)&1)+2;
b->wcount[1] = ((s>>5)&3)+2;
}
}
else
{ //one of the later layouts
b->precision|=(s&0xc)>>1; //p2, p1
if (!(s&384))
{
b->wcount[0] = 12;
b->wcount[1] = ((s>>5)&3)+2;
}
else if ((s&384)==128)
{
b->wcount[0] = ((s>>5)&3)+2;
b->wcount[1] = 12;
}
else if ((s&480)==384)
{
b->wcount[0] = 6;
b->wcount[1] = 10;
}
else if ((s&480)==416)
{
b->wcount[0] = 10;
b->wcount[1] = 6;
}
else if ((s&384)==256)
{
b->wcount[0] = ((s>>5)&3)+6;
b->wcount[1] = ((s>>9)&3)+6;
b->dualplane = 0; //forget the Dp bit, its reused in this layout
b->precision &= 7; //forget the P bit, too
}
else
b->status = ASTC_RESERVED; //reserved
}
}
b->partitions = ((s>>11)&3)+1;
if (b->partitions > 3 && b->dualplane)
b->status = ASTC_ERROR; //apparently.
if (b->wcount[0] > b->blocksize[0] || b->wcount[1] > b->blocksize[1] || b->wcount[2] > b->blocksize[2])
b->status = ASTC_ERROR; //invalid weight counts.
b->wcount[3] = b->wcount[0] * b->wcount[1] * b->wcount[2];
b->wcount[3]<<=b->dualplane; //dual-plane has twice the weights - interleaved.
if (b->wcount[3] > countof(b->weights))
b->status = ASTC_ERROR; //more than 64 weights are banned, for some reason
b->weight_bits = ASTC_DecodeSize(b->wcount[3], astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra);
}
static void ASTC_ReadPartitions(struct astc_block_info *b)
{
int sel;
int i;
unsigned char *in = b->in;
int weight_bits = b->weight_bits;
if (b->partitions == 1)
{ //single-partition mode, simple CEM
b->partindex = 0;
b->part[0].mode = ASTC_readbits(in, b->config_bits, 4);
b->config_bits += 4;
}
else
{ //multi
b->partindex = ASTC_readmanybits(in, b->config_bits, 10);
b->config_bits += 10;
sel = ASTC_readbits(in, b->config_bits, 6);
b->config_bits += 6;
if (!(sel&3))
{
sel = (sel>>2)&0xf;
for (i = 0; i < b->partitions; i++)
b->part[i].mode = sel; //all the same
}
else
{
int shift = 2;
int highbits = b->partitions*3 - 4;
weight_bits += highbits;
sel |= ASTC_readbits(in, 128-weight_bits, highbits)<<6; //I don't know why this is separate. it seems like an unnecessary complication to me.
for (i = 0; i < b->partitions; i++, shift++)
{
b->part[i].mode = ((sel&3)-1)<<2; //class groups
b->part[i].mode += ((sel>>shift)&1)<<2;//class
}
for (i = 0; i < b->partitions; i++, shift+=2)
b->part[i].mode += (sel>>shift)&3; //specific mode info
}
}
if (b->dualplane)
{
weight_bits += 2;
b->ccs = ASTC_readbits(in, 128-weight_bits, 2);
}
else
b->ccs = 0;
b->ep_bits = 128 - weight_bits - b->config_bits;
//weights are at 128-weight_bits to 128
//epdata is at config_bits to config_bits+ep_bits
}
#ifdef ASTC_WITH_HDRTEST
ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd)
{
struct astc_block_info b;
int i;
size_t blocks = datasize/16;
b.in = in;
b.blocksize[0] = bw;
b.blocksize[1] = bh;
b.blocksize[2] = bd;
while(blocks --> 0)
{
ASTC_ReadBlockMode(&b);
if (b.status == ASTC_VOID_HDR)
return 1; //if we're getting hdr blocks then we can decode properly only with hdr
if (b.status == ASTC_VOID_LDR)
return 0; //if we're getting ldr blocks, then its unlikely that there's any hdr blocks in there.
if (b.status != ASTC_OKAY)
continue;
ASTC_ReadPartitions(&b);
for (i = 0; i < b.partitions; i++)
{
switch(b.part[i].mode)
{
case 2:
case 3:
case 7:
case 11:
case 14:
case 15:
return 1;
}
}
b.in += 16;
}
return 0;
}
#endif
#ifdef ASTC_WITH_DECODE
static unsigned char ASTC_readbits2(unsigned char *in, unsigned int *offset, unsigned int count)
{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
unsigned char r = ASTC_readbits(in, *offset, count);
*offset += count;
return r;
}
static void ASTC_Decode(unsigned char *in, unsigned char *out, int count, unsigned int offset, int bits, int extra, unsigned char *dequant)
{
unsigned char block[5];
int j;
//unfortunately these trits depend upon the values of the later bits in each block.
//if only it were a nice simple modulo...
if (extra==1)
{
//read it 5 samples at a time
while(count > 0)
{
unsigned int t, c;
block[0] = ASTC_readbits2(in, &offset, bits);
t = ASTC_readbits2(in, &offset, 2);
if (count > 1)
{
block[1] = ASTC_readbits2(in, &offset, bits);
t |= ASTC_readbits2(in, &offset, 2)<<2;
}
if (count > 2)
{
block[2] = ASTC_readbits2(in, &offset, bits);
t |= ASTC_readbits2(in, &offset, 1)<<4;
}
if (count > 3)
{
block[3] = ASTC_readbits2(in, &offset, bits);
t |= ASTC_readbits2(in, &offset, 2)<<5;
}
if (count > 4)
{
block[4] = ASTC_readbits2(in, &offset, bits);
t |= ASTC_readbits2(in, &offset, 1)<<7;
}
//okay, we read the block, now figure out the trits and pack them into the high part of the result
if ((t&0x1c) == 0x1c)
{
c = ((t>>3)&0x1c) | (t&3);
block[4] |= 2<<bits;
block[3] |= 2<<bits;
}
else
{
c = t&0x1f;
if ((t&0x60) == 0x60)
{
block[4] |= 2<<bits;
block[3] |= (t>>7)<<bits;
}
else
{
block[4] |= (t>>7)<<bits;
block[3] |= ((t>>5)&3)<<bits;
}
}
if ((c&3)==3)
{
block[2] |= 2<<bits;
block[1] |= ((c>>4)&1)<<bits;
block[0] |= (((c>>2)&2) | ((c>>2)&~(c>>3)&1))<<bits;
}
else if ((c&0xc)==0xc)
{
block[2] |= 2<<bits;
block[1] |= 2<<bits;
block[0] |= (c&3)<<bits;
}
else
{
block[2] |= ((c>>4)&1)<<bits;
block[1] |= ((c>>2)&3)<<bits;
block[0] |= ((c&2)|(c&1&~(c>>1)))<<bits;
}
//spit out the result
for (j = 0; j < 5 && j < count; j++)
*out++ = dequant[block[j]];
count -= 5;
}
}
else if (extra == 2)
{
//read it 3 samples at a time
while(count > 0)
{
unsigned int t, c;
block[0] = ASTC_readbits2(in, &offset, bits);
t = ASTC_readbits2(in, &offset, 3);
if (count > 1)
{
block[1] = ASTC_readbits2(in, &offset, bits);
t |= ASTC_readbits2(in, &offset, 2)<<3;
}
if (count > 2)
{
block[2] = ASTC_readbits2(in, &offset, bits);
t |= ASTC_readbits2(in, &offset, 2)<<5;
}
//okay, we read the block, now figure out the trits and pack them into the high part of the result
if ((t&6)==6 && !(t&0x60))
{
block[2] |= (((t&1)<<2) | (((t>>4)&~t&1)<<1) | ((t>>3)&~t&1))<<bits;
block[1] |= 4<<bits;
block[0] |= 4<<bits;
}
else
{
if ((t&6) == 6)
{
block[2] |= 4<<bits;
c = ((t>>3)&3)<<3;
c |= (~(t>>5)&3)<<1;
c |= t&1;
}
else
{
block[2] |= ((t>>5)&3)<<bits;
c = t&0x1f;
}
if ((c&7) == 5)
{
block[1] |= 4<<bits;
block[0] |= ((c>>3)&3)<<bits;
}
else
{
block[1] |= ((c>>3)&3)<<bits;
block[0] |= (c&7)<<bits;
}
}
//spit out the result
for (j = 0; j < 3 && j < count; j++)
*out++ = dequant[block[j]];
count -= 3;
}
}
else while(count --> 0) //pure bits, nice and simple
{
unsigned char val = ASTC_readbits2(in, &offset, bits);
*out++ = dequant[val];
}
}
//endpoints have a logical value between 0 and 255.
//bit replication is used to fill in missing precision
static unsigned char dequant_ep_1b[1<<1] = {0,255};
static unsigned char dequant_ep_2b[1<<2] = {0x00,0x55,0xaa,0xff};
static unsigned char dequant_ep_3b[1<<3] = {0x00,0x24,0x49,0x6d,0x92,0xb6,0xdb,0xff};
static unsigned char dequant_ep_4b[1<<4] = {
0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff};
static unsigned char dequant_ep_5b[1<<5] = {
0x00,0x08,0x10,0x18,0x21,0x29,0x31,0x39,0x42,0x4a,0x52,0x5a,0x63,0x6b,0x73,0x7b,
0x84,0x8c,0x94,0x9c,0xa5,0xad,0xb5,0xbd,0xc6,0xce,0xd6,0xde,0xe7,0xef,0xf7,0xff};
static unsigned char dequant_ep_6b[1<<6] = {
0x00,0x04,0x08,0x0c,0x10,0x14,0x18,0x1c,0x20,0x24,0x28,0x2c,0x30,0x34,0x38,0x3c,
0x41,0x45,0x49,0x4d,0x51,0x55,0x59,0x5d,0x61,0x65,0x69,0x6d,0x71,0x75,0x79,0x7d,
0x82,0x86,0x8a,0x8e,0x92,0x96,0x9a,0x9e,0xa2,0xa6,0xaa,0xae,0xb2,0xb6,0xba,0xbe,
0xc3,0xc7,0xcb,0xcf,0xd3,0xd7,0xdb,0xdf,0xe3,0xe7,0xeb,0xef,0xf3,0xf7,0xfb,0xff};
static unsigned char dequant_ep_7b[1<<7] = {
0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
0x81,0x83,0x85,0x87,0x89,0x8b,0x8d,0x8f,0x91,0x93,0x95,0x97,0x99,0x9b,0x9d,0x9f,
0xa1,0xa3,0xa5,0xa7,0xa9,0xab,0xad,0xaf,0xb1,0xb3,0xb5,0xb7,0xb9,0xbb,0xbd,0xbf,
0xc1,0xc3,0xc5,0xc7,0xc9,0xcb,0xcd,0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf,
0xe1,0xe3,0xe5,0xe7,0xe9,0xeb,0xed,0xef,0xf1,0xf3,0xf5,0xf7,0xf9,0xfb,0xfd,0xff};
static unsigned char dequant_ep_8b[1<<8] = {
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff};
static unsigned char dequant_ep_0t[3] = {0,128,255};
static unsigned char dequant_ep_1t[6] = {0x00,0xff,0x33,0xcc,0x66,0x99};
static unsigned char dequant_ep_2t[12] = {0x00,0xff,0x45,0xba,0x17,0xe8,0x5c,0xa3,0x2e,0xd1,0x74,0x8b};
static unsigned char dequant_ep_3t[24] = {0x00,0xff,0x21,0xde,0x42,0xbd,0x63,0x9c,0x0b,0xf4,0x2c,0xd3,0x4d,0xb2,0x6e,0x91,0x16,0xe9,0x37,0xc8,0x58,0xa7,0x79,0x86};
static unsigned char dequant_ep_4t[48] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x41,0xbe,0x51,0xae,0x61,0x9e,0x71,0x8e,0x05,0xfa,0x15,0xea,0x26,0xd9,0x36,0xc9,0x46,0xb9,0x56,0xa9,0x67,0x98,0x77,0x88,0x0b,0xf4,0x1b,0xe4,0x2b,0xd4,0x3b,0xc4,0x4c,0xb3,0x5c,0xa3,0x6c,0x93,0x7c,0x83};
static unsigned char dequant_ep_5t[96] = {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x02,0xfd,0x0a,0xf5,0x12,0xed,0x1a,0xe5,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x05,0xfa,0x0d,0xf2,0x15,0xea,0x1d,0xe2,0x25,0xda,0x2d,0xd2,0x35,0xca,0x3d,0xc2,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
static unsigned char dequant_ep_6t[192]= {0x00,0xff,0x04,0xfb,0x08,0xf7,0x0c,0xf3,0x10,0xef,0x14,0xeb,0x18,0xe7,0x1c,0xe3,0x20,0xdf,0x24,0xdb,0x28,0xd7,0x2c,0xd3,0x30,0xcf,0x34,0xcb,0x38,0xc7,0x3c,0xc3,0x40,0xbf,0x44,0xbb,0x48,0xb7,0x4c,0xb3,0x50,0xaf,0x54,0xab,0x58,0xa7,0x5c,0xa3,0x60,0x9f,0x64,0x9b,0x68,0x97,0x6c,0x93,0x70,0x8f,0x74,0x8b,0x78,0x87,0x7c,0x83,0x01,0xfe,0x05,0xfa,0x09,0xf6,0x0d,0xf2,0x11,0xee,0x15,0xea,0x19,0xe6,0x1d,0xe2,0x21,0xde,0x25,0xda,0x29,0xd6,0x2d,0xd2,0x31,0xce,0x35,0xca,0x39,0xc6,0x3d,0xc2,0x41,0xbe,0x45,0xba,0x49,0xb6,0x4d,0xb2,0x51,0xae,0x55,0xaa,0x59,0xa6,0x5d,0xa2,0x61,0x9e,0x65,0x9a,0x69,0x96,0x6d,0x92,0x71,0x8e,0x75,0x8a,0x79,0x86,0x7d,0x82,0x02,0xfd,0x06,0xf9,0x0a,0xf5,0x0e,0xf1,0x12,0xed,0x16,0xe9,0x1a,0xe5,0x1e,0xe1,0x22,0xdd,0x26,0xd9,0x2a,0xd5,0x2e,0xd1,0x32,0xcd,0x36,0xc9,0x3a,0xc5,0x3e,0xc1,0x42,0xbd,0x46,0xb9,0x4a,0xb5,0x4e,0xb1,0x52,0xad,0x56,0xa9,0x5a,0xa5,0x5e,0xa1,0x62,0x9d,0x66,0x99,0x6a,0x95,0x6e,0x91,0x72,0x8d,0x76,0x89,0x7a,0x85,0x7e,0x81};
static unsigned char dequant_ep_0q[5] = {0,64,128,192,255};
static unsigned char dequant_ep_1q[10] = {0x00,0xff,0x1c,0xe3,0x38,0xc7,0x54,0xab,0x71,0x8e};
static unsigned char dequant_ep_2q[20] = {0x00,0xff,0x43,0xbc,0x0d,0xf2,0x50,0xaf,0x1b,0xe4,0x5e,0xa1,0x28,0xd7,0x6b,0x94,0x36,0xc9,0x79,0x86};
static unsigned char dequant_ep_3q[40] = {0x00,0xff,0x20,0xdf,0x41,0xbe,0x61,0x9e,0x06,0xf9,0x27,0xd8,0x47,0xb8,0x68,0x97,0x0d,0xf2,0x2d,0xd2,0x4e,0xb1,0x6e,0x91,0x13,0xec,0x34,0xcb,0x54,0xab,0x75,0x8a,0x1a,0xe5,0x3a,0xc5,0x5b,0xa4,0x7b,0x84};
static unsigned char dequant_ep_4q[80] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x40,0xbf,0x50,0xaf,0x60,0x9f,0x70,0x8f,0x03,0xfc,0x13,0xec,0x23,0xdc,0x33,0xcc,0x43,0xbc,0x53,0xac,0x64,0x9b,0x74,0x8b,0x06,0xf9,0x16,0xe9,0x26,0xd9,0x36,0xc9,0x47,0xb8,0x57,0xa8,0x67,0x98,0x77,0x88,0x09,0xf6,0x19,0xe6,0x2a,0xd5,0x3a,0xc5,0x4a,0xb5,0x5a,0xa5,0x6a,0x95,0x7a,0x85,0x0d,0xf2,0x1d,0xe2,0x2d,0xd2,0x3d,0xc2,0x4d,0xb2,0x5d,0xa2,0x6d,0x92,0x7d,0x82};
static unsigned char dequant_ep_5q[160]= {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x01,0xfe,0x09,0xf6,0x11,0xee,0x19,0xe6,0x21,0xde,0x29,0xd6,0x31,0xce,0x39,0xc6,0x41,0xbe,0x49,0xb6,0x51,0xae,0x59,0xa6,0x61,0x9e,0x69,0x96,0x71,0x8e,0x79,0x86,0x03,0xfc,0x0b,0xf4,0x13,0xec,0x1b,0xe4,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x04,0xfb,0x0c,0xf3,0x14,0xeb,0x1c,0xe3,0x24,0xdb,0x2c,0xd3,0x34,0xcb,0x3c,0xc3,0x44,0xbb,0x4c,0xb3,0x54,0xab,0x5c,0xa3,0x64,0x9b,0x6c,0x93,0x74,0x8b,0x7c,0x83,0x06,0xf9,0x0e,0xf1,0x16,0xe9,0x1e,0xe1,0x26,0xd9,0x2e,0xd1,0x36,0xc9,0x3e,0xc1,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
static const struct
{
unsigned char extra, bits, *dequant;
} astc_epvmode[] =
{
{0,1, dequant_ep_1b}, //2
{1,0, dequant_ep_0t}, //3
{0,2, dequant_ep_2b}, //4
{2,0, dequant_ep_0q}, //5
{1,1, dequant_ep_1t}, //6
{0,3, dequant_ep_3b}, //8
{2,1, dequant_ep_1q}, //10
{1,2, dequant_ep_2t}, //12
{0,4, dequant_ep_4b}, //16
{2,2, dequant_ep_2q}, //20
{1,3, dequant_ep_3t}, //24
{0,5, dequant_ep_5b}, //32
{2,3, dequant_ep_3q}, //40
{1,4, dequant_ep_4t}, //48
{0,6, dequant_ep_6b}, //64
{2,4, dequant_ep_4q}, //80
{1,5, dequant_ep_5t}, //96
{0,7, dequant_ep_7b}, //128
{2,5, dequant_ep_5q}, //160
{1,6, dequant_ep_6t}, //192
{0,8, dequant_ep_8b}, //256
//other modes don't make any sense
};
/*static void ASTC_CalcDequant(void)
{
int i;
int extra = 0;
int bits = 1;
int isweight = 1;
int targbits = isweight?6:8;
int v;
static qboolean nospam;
if (nospam)
return;
nospam = true;
//binary:
if (!extra)
{
for (bits = 1; bits <= (isweight?5:8); bits++)
{
Con_Printf("table: %s_%ib", isweight?"weight":"ep", bits);
for (i = 0; i < (1<<bits); i++)
{
v = i;
v<<=(targbits-bits);
v|=v>>bits;
v|=v>>bits;
v|=v>>bits;
v|=v>>bits;
v|=v>>bits;
v|=v>>bits;
v|=v>>bits;
v|=v>>bits;
if (isweight && v > 32)
v++; //0-64 instead of 0-63
Con_Printf("0x%02x,", v);
}
Con_Printf("\n");
}
}
else if (extra == 1)
{
int A,B,C,D;
for (bits = 1; bits <= (isweight?3:6); bits++)
{
Con_Printf("table: %s_%it:\n", isweight?"weight":"ep", bits);
for (i = 0; i < ((2<<bits)|(1<<bits)); i++)
{
switch(bits)
{
case 1:
A = (i&1)*(isweight?0x7f:0x1ff);
B = 0;
C = isweight?50:204;
D = i>>bits;
break;
case 2:
A = (i&1)*(isweight?0x7f:0x1ff);
B = ((i>>1)&1) * (isweight?0b1000101:0b100010110);
C = isweight?25:93;
D = i>>bits;
break;
case 3:
A = (i&1)*(isweight?0x7f:0x1ff);
B = ((i>>1)&1) * (isweight?0b0100001:0b010000101); //b
B|= ((i>>2)&1) * (isweight?0b1000010:0b100001010); //c
C = isweight?11:44;
D = i>>bits;
break;
case 4:
A = (i&1)*0x1ff;
B = ((i>>1)&1) * 0b001000001; //b
B|= ((i>>2)&1) * 0b010000010; //c
B|= ((i>>3)&1) * 0b100000100; //d
C = 22;
D = i>>bits;
break;
case 5:
A = (i&1)*0x1ff;
B = ((i>>1)&1) * 0b000100000; //b
B|= ((i>>2)&1) * 0b001000000; //c
B|= ((i>>3)&1) * 0b010000001; //d
B|= ((i>>4)&1) * 0b100000010; //e
C = 11;
D = i>>bits;
break;
case 6:
A = (i&1)*0x1ff;
B = ((i>>1)&1) * 0b000010000; //b
B|= ((i>>2)&1) * 0b000100000; //c
B|= ((i>>3)&1) * 0b001000000; //d
B|= ((i>>4)&1) * 0b010000000; //e
B|= ((i>>5)&1) * 0b100000001; //f
C = 5;
D = i>>bits;
break;
}
v = D * C + B;
v = v ^ A;
v = (A & (isweight?0x20:0x80)) | (v >> 2);
if (isweight && v > 32)
v++; //0-64 instead of 0-63
Con_Printf("0x%02x,", v);
}
Con_Printf("\n");
}
}
else if (extra == 2)
{
int A,B,C,D;
for (bits = 1; bits <= (isweight?2:5); bits++)
{
Con_Printf("table: %s_%iq:\n", isweight?"weight":"ep", bits);
for (i = 0; i < ((4<<bits)|(1<<bits)); i++)
{
switch(bits)
{
case 1:
A = (i&1)*(isweight?0x7f:0x1ff);
B = 0;
C = isweight?23:113;
D = i>>bits;
break;
case 2:
A = (i&1)*(isweight?0x7f:0x1ff);
B = ((i>>1)&1) * (isweight?0b1000010:0b100001100);
C = isweight?13:54;
D = i>>bits;
break;
case 3:
A = (i&1)*0x1ff;
B = ((i>>1)&1) * 0b010000010; //b
B|= ((i>>2)&1) * 0b100000101; //c
C = 26;
D = i>>bits;
break;
case 4:
A = (i&1)*0x1ff;
B = ((i>>1)&1) * 0b001000000; //b
B|= ((i>>2)&1) * 0b010000001; //c
B|= ((i>>3)&1) * 0b100000010; //d
C = 13;
D = i>>bits;
break;
case 5:
A = (i&1)*0x1ff;
B = ((i>>1)&1) * 0b000100000; //b
B|= ((i>>2)&1) * 0b001000000; //c
B|= ((i>>3)&1) * 0b010000000; //d
B|= ((i>>4)&1) * 0b100000001; //e
C = 6;
D = i>>bits;
break;
}
v = D * C + B;
v = v ^ A;
v = (A & (isweight?0x20:0x80)) | (v >> 2);
if (isweight && v > 32)
v++; //0-64 instead of 0-63
Con_Printf("0x%02x,", v);
}
Con_Printf("\n");
}
}
}*/
static void ASTC_blue_contract(int *out, int r, int g, int b, int a)
{
out[0] = (r+b) >> 1;
out[1] = (g+b) >> 1;
out[2] = b;
out[3] = a;
}
static int ASTC_bit_transfer_signed(int a, unsigned char *b) //returns new value for a.
{
*b >>= 1;
*b |= a & 0x80;
a >>= 1;
a &= 0x3F;
if((a&0x20)!=0)
a=a-0x40;
return a;
}
static void ASTC_clamp_unorm8(int *c)
{
c[0] = bound(0, c[0], 255);
c[1] = bound(0, c[1], 255);
c[2] = bound(0, c[2], 255);
c[3] = bound(0, c[3], 255);
}
#ifdef ASTC_WITH_HDR
static void ASTC_HDR_Mode_2(struct astc_part *p, unsigned char *v)
{
int y0,y1;
if(v[1] >= v[0])
{
y0 = (v[0] << 4);
y1 = (v[1] << 4);
}
else
{
y0 = (v[1] << 4) + 8;
y1 = (v[0] << 4) - 8;
}
Vector4Set(p->ep[0], y0, y0, y0, 0x780);
Vector4Set(p->ep[1], y1, y1, y1, 0x780);
p->hdr = 0xf;
}
static void ASTC_HDR_Mode_3(struct astc_part *p, unsigned char *v)
{
int y0, y1, d;
if((v[0]&0x80) != 0)
{
y0 = ((v[1] & 0xE0) << 4) | ((v[0] & 0x7F) << 2);
d = (v[1] & 0x1F) << 2;
}
else
{
y0 = ((v[1] & 0xF0) << 4) | ((v[0] & 0x7F) << 1);
d = (v[1] & 0x0F) << 1;
}
y1 = y0 + d;
if(y1 > 0xFFF)
y1 = 0xFFF;
Vector4Set(p->ep[0], y0, y0, y0, 0x780);
Vector4Set(p->ep[1], y1, y1, y1, 0x780);
p->hdr = 0xf;
}
static void ASTC_HDR_Mode_7(struct astc_part *p, unsigned char *v)
{
int modeval = ((v[0]&0xC0)>>6) | ((v[1]&0x80)>>5) | ((v[2]&0x80)>>4);
int majcomp;
int mode;
static const int shamts[6] = { 1,1,2,3,4,5 };
int shamt,t;
int red, green, blue, scale;
int x0,x1,x2,x3,x4,x5,x6,ohm;
if( (modeval & 0xC ) != 0xC )
{
majcomp = modeval >> 2;
mode = modeval & 3;
}
else if( modeval != 0xF )
{
majcomp = modeval & 3;
mode = 4;
}
else
{
majcomp = 0; mode = 5;
}
red = v[0] & 0x3f;
green = v[1] & 0x1f;
blue = v[2] & 0x1f;
scale = v[3] & 0x1f;
x0 = (v[1] >> 6) & 1; x1 = (v[1] >> 5) & 1;
x2 = (v[2] >> 6) & 1; x3 = (v[2] >> 5) & 1;
x4 = (v[3] >> 7) & 1; x5 = (v[3] >> 6) & 1;
x6 = (v[3] >> 5) & 1;
ohm = 1 << mode;
if( ohm & 0x30 ) green |= x0 << 6;
if( ohm & 0x3A ) green |= x1 << 5;
if( ohm & 0x30 ) blue |= x2 << 6;
if( ohm & 0x3A ) blue |= x3 << 5;
if( ohm & 0x3D ) scale |= x6 << 5;
if( ohm & 0x2D ) scale |= x5 << 6;
if( ohm & 0x04 ) scale |= x4 << 7;
if( ohm & 0x3B ) red |= x4 << 6;
if( ohm & 0x04 ) red |= x3 << 6;
if( ohm & 0x10 ) red |= x5 << 7;
if( ohm & 0x0F ) red |= x2 << 7;
if( ohm & 0x05 ) red |= x1 << 8;
if( ohm & 0x0A ) red |= x0 << 8;
if( ohm & 0x05 ) red |= x0 << 9;
if( ohm & 0x02 ) red |= x6 << 9;
if( ohm & 0x01 ) red |= x3 << 10;
if( ohm & 0x02 ) red |= x5 << 10;
shamt = shamts[mode];
red <<= shamt; green <<= shamt; blue <<= shamt; scale <<= shamt;
if( mode != 5 ) { green = red - green; blue = red - blue; }
if( majcomp == 1 )
{
t = red;
red = green;
green = t;
}
if( majcomp == 2 )
{
t = red;
red = blue;
blue = t;
}
p->ep[1][0] = bound( 0, red, 0xFFF );
p->ep[1][1] = bound( 0, green, 0xFFF );
p->ep[1][2] = bound( 0, blue, 0xFFF );
p->ep[0][0] = bound( 0, red - scale, 0xFFF );
p->ep[0][1] = bound( 0, green - scale, 0xFFF );
p->ep[0][2] = bound( 0, blue - scale, 0xFFF );
p->ep[1][3] = p->ep[0][3] = 0x780;
p->hdr = 0xf;
}
static void ASTC_HDR_Mode_11(struct astc_part *p, unsigned char *v)
{
static const int dbitstab[8] = {7,6,7,6,5,6,5,6};
int shamt;
int majcomp = ((v[4] & 0x80) >> 7) | ((v[5] & 0x80) >> 6);
int mode,va,vb0,vb1,vc,vd0,vd1;
int x0,x1,x2,x3,x4,x5,ohm;
if( majcomp == 3 )
{
Vector4Set(p->ep[0], v[0] << 4, v[2] << 4, (v[4] & 0x7f) << 5, 0x780);
Vector4Set(p->ep[1], v[1] << 4, v[3] << 4, (v[5] & 0x7f) << 5, 0x780);
p->hdr = 0xf;
return;
}
mode = ((v[1]&0x80)>>7) | ((v[2]&0x80)>>6) | ((v[3]&0x80)>>5);
va = v[0] | ((v[1] & 0x40) << 2);
vb0 = v[2] & 0x3f;
vb1 = v[3] & 0x3f;
vc = v[1] & 0x3f;
vd0 = v[4] & 0x7f;
vd1 = v[5] & 0x7f;
if (vd0 & (1<<(dbitstab[mode]-1)))
vd0 |= -1 & ~((1u<<dbitstab[mode])-1);
if (vd1 & (1<<(dbitstab[mode]-1)))
vd1 |= -1 & ~((1u<<dbitstab[mode])-1);
x0 = (v[2] >> 6) & 1;
x1 = (v[3] >> 6) & 1;
x2 = (v[4] >> 6) & 1;
x3 = (v[5] >> 6) & 1;
x4 = (v[4] >> 5) & 1;
x5 = (v[5] >> 5) & 1;
ohm = 1 << mode;
if( ohm & 0xA4 ) va |= x0 << 9;
if( ohm & 0x08 ) va |= x2 << 9;
if( ohm & 0x50 ) va |= x4 << 9;
if( ohm & 0x50 ) va |= x5 << 10;
if( ohm & 0xA0 ) va |= x1 << 10;
if( ohm & 0xC0 ) va |= x2 << 11;
if( ohm & 0x04 ) vc |= x1 << 6;
if( ohm & 0xE8 ) vc |= x3 << 6;
if( ohm & 0x20 ) vc |= x2 << 7;
if( ohm & 0x5B ) vb0 |= x0 << 6;
if( ohm & 0x5B ) vb1 |= x1 << 6;
if( ohm & 0x12 ) vb0 |= x2 << 7;
if( ohm & 0x12 ) vb1 |= x3 << 7;
// Now shift up so that major component is at top of 12-bit value
shamt = (mode >> 1) ^ 3;
va <<= shamt; vb0 <<= shamt; vb1 <<= shamt;
vc <<= shamt; vd0 <<= shamt; vd1 <<= shamt;
p->ep[1][0] = bound( 0, va, 0xFFF );
p->ep[1][1] = bound( 0, va - vb0, 0xFFF );
p->ep[1][2] = bound( 0, va - vb1, 0xFFF );
p->ep[0][0] = bound( 0, va - vc, 0xFFF );
p->ep[0][1] = bound( 0, va - vb0 - vc - vd0, 0xFFF );
p->ep[0][2] = bound( 0, va - vb1 - vc - vd1, 0xFFF );
if( majcomp == 1 )
{
p->ep[0][3] = p->ep[0][0];
p->ep[0][0] = p->ep[0][1];
p->ep[0][1] = p->ep[0][3];
p->ep[1][3] = p->ep[1][0];
p->ep[1][0] = p->ep[1][1];
p->ep[1][1] = p->ep[1][3];
}
else if( majcomp == 2 )
{
p->ep[0][3] = p->ep[0][0];
p->ep[0][0] = p->ep[0][2];
p->ep[0][2] = p->ep[0][3];
p->ep[1][3] = p->ep[1][0];
p->ep[1][0] = p->ep[1][2];
p->ep[1][2] = p->ep[1][3];
}
p->ep[0][3] = p->ep[1][3] = 0x780;
p->hdr = 0xf;
}
static void ASTC_HDR_Mode_14(struct astc_part *p, unsigned char *v)
{
ASTC_HDR_Mode_11(p, v);
p->ep[0][3] = v[6];
p->ep[1][3] = v[7];
p->hdr &= 0x7;
}
static void ASTC_HDR_Mode_15(struct astc_part *p, unsigned char *v)
{
int v6=v[6], v7=v[7];
int mode;
ASTC_HDR_Mode_11(p,v);
mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
v6 &= 0x7F;
v7 &= 0x7F;
if(mode==3)
{
p->ep[0][3] = v6 << 5;
p->ep[1][3] = v7 << 5;
}
else
{
v6 |= (v7 << (mode+1)) & 0x780;
v7 &= (0x3F >> mode);
v7 ^= 0x20 >> mode;
v7 -= 0x20 >> mode;
v6 <<= (4-mode);
v7 <<= (4-mode);
v7 += v6;
v7 = bound(0, v7, 0xFFF);
p->ep[0][3] = v6;
p->ep[1][3] = v7;
}
}
#endif
static void ASTC_DecodeEndpoints(struct astc_block_info *b, unsigned char *v)
{
int i, t0, t1, t3, t5, t7;
for (i = 0; i < b->partitions; i++)
{
#ifdef ASTC_WITH_HDR
b->part[i].hdr = 0;
#endif
switch (b->part[i].mode & 15)
{
#ifdef ASTC_WITH_HDR
case 2: //HDR Luminance, large range
ASTC_HDR_Mode_2(&b->part[i], v);
break;
case 3: //HDR Luminance, small range
ASTC_HDR_Mode_3(&b->part[i], v);
break;
case 7: //HDR RGB, base+scale
ASTC_HDR_Mode_7(&b->part[i], v);
break;
case 11: //HDR RGB
ASTC_HDR_Mode_11(&b->part[i], v);
break;
case 14: //HDR RGB + LDR Alpha
ASTC_HDR_Mode_14(&b->part[i], v);
break;
case 15: //HDR RGB + HDR Alpha
ASTC_HDR_Mode_15(&b->part[i], v);
break;
#endif
default: //the error colour - for unsupported hdr endpoints. unreachable when hdr is enabled. just fill it with the error colour.
Vector4Set(b->part[i].ep[0], 0xff, 0, 0xff, 0xff);
Vector4Set(b->part[i].ep[1], 0xff, 0, 0xff, 0xff);
break;
case 0: //LDR Luminance, direct
Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], 0xff);
Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], 0xff);
break;
case 1: //LDR Luminance, base+offset
t0 = (v[0]>>2)|(v[1]&0xc0);
t1 = t0+(v[1]&0x3f);
if (t1>0xff)
t1=0xff;
Vector4Set(b->part[i].ep[0], t0, t0, t0, 0xff);
Vector4Set(b->part[i].ep[1], t1, t1, t1, 0xff);
break;
case 4: //LDR Luminance+Alpha,direct
Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], v[2]);
Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], v[3]);
break;
case 5: //LDR Luminance+Alpha, base+offset
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
Vector4Set(b->part[i].ep[0],v[0],v[0],v[0],v[2]);
Vector4Set(b->part[i].ep[1],v[0]+t1,v[0]+t1,v[0]+t1,v[2]+t3);
ASTC_clamp_unorm8(b->part[i].ep[0]);
ASTC_clamp_unorm8(b->part[i].ep[1]);
break;
case 6: //LDR RGB, base+scale
Vector4Set(b->part[i].ep[0], ((int)v[0]*(int)v[3])>>8, ((int)v[1]*(int)v[3])>>8, ((int)v[2]*(int)v[3])>>8, 0xff);
Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], 0xff);
break;
case 8: //LDR RGB, Direct
t0 = (int)v[0]+(int)v[2]+(int)v[4];
t1 = (int)v[1]+(int)v[3]+(int)v[5];
if (t1>=t0)
{
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],0xff);
Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],0xff);
}
else
{
ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5], 0xff);
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
}
break;
case 9: //LDR RGB, base+offset
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
if(t1+t3+t5 >= 0)
{
Vector4Set(b->part[i].ep[0],v[0],v[2],v[4],0xff);
Vector4Set(b->part[i].ep[1],v[0]+t1,v[2]+t3,v[4]+t5,0xff);
}
else
{
ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5, 0xff);
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
}
ASTC_clamp_unorm8(b->part[i].ep[0]);
ASTC_clamp_unorm8(b->part[i].ep[1]);
break;
case 10: //LDR RGB, base+scale plus two A
Vector4Set(b->part[i].ep[0], ((int)v[0]*v[3])>>8, ((int)v[1]*v[3])>>8, ((int)v[2]*v[3])>>8, v[4]);
Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], v[5]);
break;
case 12: //LDR RGBA, direct
if (v[1]+(int)v[3]+v[5]>=v[0]+(int)v[2]+v[4])
{
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],v[7]);
}
else
{
ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5],v[7]);
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
}
break;
case 13: //LDR RGBA, base+offset
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
t7 = ASTC_bit_transfer_signed(v[7],&v[6]);
if(t1+t3+t5>=0)
{
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
Vector4Set(b->part[i].ep[1], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
}
else
{
ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
}
ASTC_clamp_unorm8(b->part[i].ep[0]);
ASTC_clamp_unorm8(b->part[i].ep[1]);
break;
}
v += ((b->part[i].mode>>2)+1)<<1;
}
}
static void ASTC_ReadEndpoints(struct astc_block_info *b)
{
int i;
int cembits;
unsigned char epv[18]; //maximum raw endpoint values,
char epvalues;
unsigned char gahffs[16], t;
//figure out how many raw values we need
epvalues = 0;
for (i = 0; i < b->partitions; i++)
epvalues += ((b->part[i].mode>>2)+1)<<1;
if (epvalues > countof(epv))
{
b->status = ASTC_ERROR;
return;
}
//the endpoint bits are encoded using the largest size available that'll still fit, yielding raw values between 0-255.
for(i = countof(astc_epvmode)-1; i >= 0; i--)
{
cembits = ASTC_DecodeSize(epvalues, astc_epvmode[i].bits, astc_epvmode[i].extra);
if(cembits <= b->ep_bits)
{
//read the values.
ASTC_Decode(b->in, epv, epvalues, b->config_bits, astc_epvmode[i].bits, astc_epvmode[i].extra, astc_epvmode[i].dequant);
//and decode them.
ASTC_DecodeEndpoints(b, epv);
//weight bits are backwards (gah! ffs!)
//so swap them around so our decode function doesn't need to care
for (i = 0; i < countof(gahffs); i++)
{
t = b->in[i];
t = (t>>4)|(t<<4);
t = ((t&0xcc)>>2)|((t&0x33)<<2);
t = ((t&0xaa)>>1)|((t&0x55)<<1);
gahffs[15-i] = t;
}
//weights are aligned at the end... now the start. gah! ffs!
ASTC_Decode(gahffs, b->weights, b->wcount[3], 0, astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra, astc_weightmode[b->precision].dequant);
return;
}
}
b->status = ASTC_ERROR;
}
static unsigned int hash52(unsigned int p)
{
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
p ^= p << 6; p ^= p >> 17;
return p;
}
static int ASTC_ChoosePartition(int seed, int x, int y, int z, int partitions, int smallblock)
{
int sh1, sh2, sh3, a,b,c,d;
unsigned int rnum;
unsigned char seed1,seed2,seed3,seed4,seed5,seed6,seed7,seed8,seed9,seed10,seed11,seed12;
if (partitions==1)
return 0;
if (smallblock)
{
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partitions-1) * 1024;
rnum = hash52(seed);
seed1 = rnum & 0xF;
seed2 = (rnum >> 4) & 0xF;
seed3 = (rnum >> 8) & 0xF;
seed4 = (rnum >> 12) & 0xF;
seed5 = (rnum >> 16) & 0xF;
seed6 = (rnum >> 20) & 0xF;
seed7 = (rnum >> 24) & 0xF;
seed8 = (rnum >> 28) & 0xF;
seed9 = (rnum >> 18) & 0xF;
seed10 = (rnum >> 22) & 0xF;
seed11 = (rnum >> 26) & 0xF;
seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
seed1 *= seed1; seed2 *= seed2;
seed3 *= seed3; seed4 *= seed4;
seed5 *= seed5; seed6 *= seed6;
seed7 *= seed7; seed8 *= seed8;
seed9 *= seed9; seed10 *= seed10;
seed11 *= seed11; seed12 *= seed12;
if (seed & 1)
{
sh1 = ((seed&2) ? 4:5);
sh2 = ((partitions==3) ? 6:5);
}
else
{
sh1 = ((partitions==3) ? 6:5);
sh2 = ((seed&2) ? 4:5);
}
sh3 = (seed & 0x10) ? sh1 : sh2;
seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
c = seed5*x + seed6*y + seed9 *z + (rnum >> 6);
d = seed7*x + seed8*y + seed10*z + (rnum >> 2);
a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
if (partitions < 4)
d = 0;
if (partitions < 3)
c = 0;
if (a >= b && a >= c && a >= d)
return 0;
else if (b >= c && b >= d)
return 1;
else if (c >= d)
return 2;
else
return 3;
}
#endif
#ifdef ASTC_WITH_LDR
//Spits out 8-bit RGBA data for a single block. Any HDR blocks will result in the error colour.
//sRGB can be applied by the caller, if needed.
ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride, int bw, int bh)
{
struct astc_block_info b;
int x, y;
int stride = pixstride*4;
b.in = in;
b.blocksize[0] = bw;
b.blocksize[1] = bh;
b.blocksize[2] = 1;
ASTC_ReadBlockMode(&b);
if (b.status == ASTC_VOID_LDR)
{ //void extent
//Note: we don't validate the extents.
for (y = 0; y < bh; y++, out += stride)
for (x = 0; x < bw; x++)
{
out[(x<<2)+0] = in[9];
out[(x<<2)+1] = in[11];
out[(x<<2)+2] = in[13];
out[(x<<2)+3] = in[15];
}
return;
}
if (b.status == ASTC_OKAY)
ASTC_ReadPartitions(&b);
if (b.status == ASTC_OKAY)
ASTC_ReadEndpoints(&b);
if (b.status == ASTC_OKAY)
{
int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
int ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
int dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
int planes = 1<<b.dualplane, wstride = b.wcount[0]*planes;
int s, t, v0, w, w00,w01,w10,w11;
struct astc_part *p;
//int dr = (1024+b.bd/2)/(b.bd-1);
#ifdef ASTC_WITH_HDR
for (x = 0; x < b.partitions; x++)
{ //the LDR profile treats HDR endpoints as the error colour. this is per-partition rather than per-block.
if (b.part[x].hdr)
{
Vector4Set(b.part[x].ep[0], 0xff, 0, 0xff, 0xff);
Vector4Set(b.part[x].ep[1], 0xff, 0, 0xff, 0xff);
}
//else FIXME: when spitting out 8bit, we're meant to have an extra 9th bit which is always set, in order to avoid round-to-zero biasing the result of the final 8 bits.
}
#endif
//for (z = 0; z < bd; z++, out += layerstride-stride*bh)
{
//r = ((dr*z)*(b.nweights[2]-1)+32)>>6;
for (y = 0; y < bh; y++, out += stride)
{
t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
for (x = 0; x < bw; x++)
{
p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
w11 = ((s&0xf)*(t&0xf)+8) >> 4;
w10 = (t&0xf) - w11;
w01 = (s&0xf) - w11;
w00 = 16 - (s&0xf) - (t&0xf) + w11;
v0 = (((s>>4))<<b.dualplane)+(((t>>4))*wstride);
w = ( w00*b.weights[v0] +
w01*b.weights[v0+planes] +
w10*b.weights[v0+wstride] +
w11*b.weights[v0+planes+wstride] + 8) >> 4;
out[(x<<2)+0] = ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6;
out[(x<<2)+1] = ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6;
out[(x<<2)+2] = ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6;
out[(x<<2)+3] = ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6;
if (b.dualplane)
{ //dual planes has a second set of weights that override a single channel
v0++;
w = ( w00*b.weights[v0] +
w01*b.weights[v0+planes] +
w10*b.weights[v0+wstride] +
w11*b.weights[v0+planes+wstride] + 8) >> 4;
out[(x<<2)+b.ccs] = ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6;
}
}
}
}
}
else
{
for (y = 0; y < bh; y++, out += stride)
for (x = 0; x < bw; x++)
{
out[(x<<2)+0] = 0xff;
out[(x<<2)+1] = 0;
out[(x<<2)+2] = 0xff;
out[(x<<2)+3] = 0xff;
}
}
}
#endif
#ifdef ASTC_WITH_HDR
static unsigned short ASTC_GenHalffloat(int hdr, int rawval)
{
if (hdr)
{
int fp16, m;
fp16 = (rawval&0xF800) >> 1;
m = rawval&0x7FF;
if (m < 512)
fp16 |= (3*m)>>3;
else if (m >= 1536)
fp16 |= (5*m - 2048)>>3;
else
fp16 |= (4*m - 512)>>3;
return fp16;
}
else
{
union
{
float f;
unsigned int u;
} u = {rawval/65535.0};
int e = 0;
int m;
e = ((u.u>>23)&0xff) - 127;
if (e < -15)
return 0; //too small exponent, treat it as a 0 denormal
if (e > 15)
m = 0; //infinity instead of a nan
else
m = (u.u&((1<<23)-1))>>13;
return ((e+15)<<10) | m;
}
}
//Spits out half-float RGBA data for a single block.
ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride, int bw, int bh)
{
int x, y;
int stride = pixstride*4;
struct astc_block_info b;
b.in = in;
b.blocksize[0] = bw;
b.blocksize[1] = bh;
b.blocksize[2] = 1;
ASTC_ReadBlockMode(&b);
if (b.status == ASTC_VOID_HDR)
{ //void extent
//Note: we don't validate the extents.
for (y = 0; y < bh; y++, out += stride)
for (x = 0; x < bw; x++)
{ //hdr void extents already use fp16
out[(x<<2)+0] = in[8] | (in[9]<<8);
out[(x<<2)+1] = in[10] | (in[11]<<8);
out[(x<<2)+2] = in[12] | (in[13]<<8);
out[(x<<2)+3] = in[14] | (in[15]<<8);
}
return;
}
if (b.status == ASTC_VOID_LDR)
{ //void extent
//Note: we don't validate the extents.
for (y = 0; y < bh; y++, out += stride)
for (x = 0; x < bw; x++)
{
out[(x<<2)+0] = ASTC_GenHalffloat(0, in[8] | (in[9]<<8));
out[(x<<2)+1] = ASTC_GenHalffloat(0, in[10] | (in[11]<<8));
out[(x<<2)+2] = ASTC_GenHalffloat(0, in[12] | (in[13]<<8));
out[(x<<2)+3] = ASTC_GenHalffloat(0, in[14] | (in[15]<<8));
}
return;
}
if (b.status == ASTC_OKAY)
ASTC_ReadPartitions(&b);
if (b.status == ASTC_OKAY)
ASTC_ReadEndpoints(&b);
if (b.status == ASTC_OKAY)
{
int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
int ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
int dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
int planes = 1<<b.dualplane, wstride = b.wcount[0]*planes;
int s, t, v0, w, w00,w01,w10,w11;
struct astc_part *p;
//int dr = (1024+b.bd/2)/(b.bd-1);
for (x = 0; x < b.partitions; x++)
{ //we need to do a little extra processing here
for (y = 0; y < 4; y++)
{
if (b.part[x].hdr&(1<<y))
{ //the 12bit endpoint values are shifted up to 16bit...
b.part[x].ep[0][y] <<= 4;
b.part[x].ep[1][y] <<= 4;
}
else
{ //convert to unorm16.
b.part[x].ep[0][y] |= b.part[x].ep[0][y] << 8;
b.part[x].ep[1][y] |= b.part[x].ep[1][y] << 8;
}
}
}
//for (z = 0; z < bd; z++, out += layerstride-stride*bh)
{
//r = ((dr*z)*(b.nweights[2]-1)+32)>>6;
for (y = 0; y < bh; y++, out += stride)
{
t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
for (x = 0; x < bw; x++)
{
p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
w11 = ((s&0xf)*(t&0xf)+8) >> 4;
w10 = (t&0xf) - w11;
w01 = (s&0xf) - w11;
w00 = 16 - (s&0xf) - (t&0xf) + w11;
v0 = (((s>>4))<<b.dualplane)+(((t>>4))*wstride);
w = ( w00*b.weights[v0] +
w01*b.weights[v0+planes] +
w10*b.weights[v0+wstride] +
w11*b.weights[v0+planes+wstride] + 8) >> 4;
out[(x<<2)+0] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6);
out[(x<<2)+1] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6);
out[(x<<2)+2] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6);
out[(x<<2)+3] = ASTC_GenHalffloat(p->hdr&8, ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6);
if (b.dualplane)
{ //dual planes has a second set of weights that override a single channel
v0++;
w = ( w00*b.weights[v0] +
w01*b.weights[v0+planes] +
w10*b.weights[v0+wstride] +
w11*b.weights[v0+planes+wstride] + 8) >> 4;
out[(x<<2)+b.ccs] = ASTC_GenHalffloat(p->hdr&(1<<b.ccs), ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6);
}
}
}
}
}
else
{
for (y = 0; y < bh; y++, out += stride)
for (x = 0; x < bw; x++)
{
out[(x<<2)+0] = 0;//0xf<<10;
out[(x<<2)+1] = 0;
out[(x<<2)+2] = 0;//0xf<<10;
out[(x<<2)+3] = 0xf<<10;
}
}
}
#endif