4c2066601a
Fixed up the -netquake / -spasm / -fitz args slightly, should actually be usable now. sv_mintic 0 is now treated as 0.013 when using nqplayerphysics, to try to make it smoother for nq clients. Preparing for astc's volume formats. Mostly for completeness, I was bored. Disabled for now because nothing supports them anyway. Fix broken mousewheel in SDL2 builds. Fix configs not getting loaded following initial downloads in the web port/etc. Make the near-cloud layer of q1 scrolling sky fully opaque by default (like vanilla). Sky fog now ignores depth, treating it as an infinite distance. Fix turbs not responding to fog. r_fullbright no longer needs vid_reload to take effect (and more efficient now). Tweaked the audio code to use an format enum instead of byte width, just with the same values still, primarily to clean up loaders that deal with S32 vs F32, or U8 vs S8. Added a cvar to control whether to use threads for the qcgc. Still disabled by default but no longer requires engine recompiles to enable! git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@5683 fc73d0e0-1445-4013-8a0c-d673dee63da5
1745 lines
51 KiB
C
1745 lines
51 KiB
C
//Note: this code does not claim to be bit-correct.
|
|
//It doesn't support volume textures.
|
|
//It doesn't validate block extents (and is generally unaware of more than one block anyway)
|
|
//It doesn't implement all validation checks, either.
|
|
//Do NOT use this code to validate any encoders...
|
|
|
|
//Based upon documentation here: https://www.khronos.org/registry/OpenGL/extensions/OES/OES_texture_compression_astc.txt
|
|
|
|
#ifndef ASTC_PUBLIC
|
|
#define ASTC_PUBLIC
|
|
#endif
|
|
|
|
#define ASTC_WITH_LDR //comment out this line to disable pure-LDR decoding (the hdr code can still be used).
|
|
#define ASTC_WITH_HDR //comment out this line to disable HDR decoding.
|
|
#define ASTC_WITH_HDRTEST //comment out this line to disable checking for which profile is needed.
|
|
//#define ASTC_WITH_3D
|
|
|
|
#ifdef ASTC_WITH_LDR
|
|
ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA8 data (gives error colour for hdr blocks!)
|
|
#endif
|
|
#ifdef ASTC_WITH_HDR
|
|
ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride/*outwidth*/, int layerstride/*outwidth*outheight*/, int bw,int bh,int bd); //generates RGBA16F data.
|
|
#endif
|
|
#ifdef ASTC_WITH_HDRTEST
|
|
ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd); //returns true if n consecutive blocks require the HDR profile (ie: detects when you need to soft-decode for drivers with partial support, as opposed to just always decompressing).
|
|
#endif
|
|
|
|
|
|
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#ifndef Vector4Set
|
|
#define Vector4Set(r,x,y,z,w) {(r)[0] = x; (r)[1] = y;(r)[2] = z;(r)[3]=w;}
|
|
#endif
|
|
#ifndef countof
|
|
#define countof(array) (sizeof(array)/sizeof(array[0]))
|
|
#endif
|
|
#if defined(ASTC_WITH_LDR) || defined(ASTC_WITH_HDR)
|
|
#define ASTC_WITH_DECODE
|
|
#endif
|
|
enum astc_status_e
|
|
{
|
|
//valid blocks
|
|
ASTC_OKAY, //we can decode at least part of this normally (hdr endpoints may still result in per-endpoint errors).
|
|
ASTC_VOID_LDR, //not an error - the block is a single LDR colour, with an RGBA16 colour in the last 8 bytes.
|
|
ASTC_VOID_HDR, //not an error - the block is a single HDR colour, with an RGBA16F colour in the last 8 bytes.
|
|
|
|
//invalid blocks
|
|
ASTC_ERROR, //validation errors
|
|
ASTC_UNSUPPORTED, //basically just volume textures
|
|
ASTC_RESERVED, //reserved bits. basically an error but might not be in the future.
|
|
};
|
|
struct astc_block_info
|
|
{
|
|
unsigned char *in; //the 16 bytes of the block
|
|
unsigned char blocksize[3]; //block width, height, depth(1 for 2d).
|
|
|
|
enum astc_status_e status; //block status/type.
|
|
unsigned char dualplane; //two sets of weights instead of one.
|
|
unsigned char ccs; //second set applies to this component
|
|
|
|
unsigned char precision; //defines the precision of the weights
|
|
|
|
int wcount[4]; //x,y,z,total weight counts
|
|
int weight_bits; //size of weights section.
|
|
int config_bits; //size of header before the endpoint bits
|
|
int ep_bits; //size available to endpoints
|
|
unsigned char weights[64]; //official limit to the number of weights stored
|
|
|
|
unsigned char partitions; //number of active partitions to select from (and number of endpoints to read)
|
|
unsigned short partindex; //used for deciding which partition each pixel belongs in
|
|
struct astc_part
|
|
{
|
|
unsigned char mode; //endpoint modes
|
|
#ifdef ASTC_WITH_HDR
|
|
unsigned char hdr; //endpoint colour mode - &1=rgb, &2=alpha
|
|
#endif
|
|
int ep[2][4];
|
|
} part[4];
|
|
};
|
|
|
|
static unsigned char ASTC_readbits(unsigned char *in, unsigned int offset, unsigned int count)
|
|
{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
|
|
unsigned short s;
|
|
in += offset>>3;
|
|
offset &= 7;
|
|
s = in[0];
|
|
if (offset+count>8)
|
|
s |= (in[1]<<8);
|
|
s>>=offset;
|
|
return s & ((1u<<count)-1);
|
|
}
|
|
static unsigned int ASTC_readmanybits(unsigned char *in, unsigned int offset, unsigned int count)
|
|
{
|
|
unsigned int r = 0;
|
|
while(count > 8)
|
|
{
|
|
count -= 8;
|
|
r |= ASTC_readbits(in, offset+count, 8)<<count;
|
|
}
|
|
r |= ASTC_readbits(in, offset, count);
|
|
return r;
|
|
}
|
|
|
|
//weights cover a range of 0-64 inclusive
|
|
//>32 is +1 (otherwise it would be 0-63)
|
|
//high bits are folded over
|
|
static unsigned char dequant_weight_1b[1<<1] = {0x00,0x40};
|
|
static unsigned char dequant_weight_2b[1<<2] = {0x00,0x15,0x2b,0x40};
|
|
static unsigned char dequant_weight_3b[1<<3] = {0x00,0x09,0x12,0x1b,0x25,0x2e,0x37,0x40};
|
|
static unsigned char dequant_weight_4b[1<<4] = {0x00,0x04,0x08,0x0c,0x11,0x15,0x19,0x1d,0x23,0x27,0x2b,0x2f,0x34,0x38,0x3c,0x40};
|
|
static unsigned char dequant_weight_5b[1<<5] = {0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,0x40};
|
|
static unsigned char dequant_weight_0t[3] = {0,32,64};
|
|
static unsigned char dequant_weight_1t[6] = {0x00,0x40,0x0c,0x34,0x19,0x27};
|
|
static unsigned char dequant_weight_2t[12] = {0x00,0x40,0x11,0x2f,0x06,0x3a,0x17,0x29,0x0c,0x34,0x1d,0x23};
|
|
static unsigned char dequant_weight_3t[24] = {0x00,0x40,0x08,0x38,0x10,0x30,0x18,0x28,0x02,0x3e,0x0b,0x35,0x13,0x2d,0x1b,0x25,0x05,0x3b,0x0d,0x33,0x16,0x2a,0x1e,0x22};
|
|
static unsigned char dequant_weight_0q[5] = {0,16,32,48,64};
|
|
static unsigned char dequant_weight_1q[10] = {0x00,0x40,0x05,0x3b,0x0b,0x35,0x11,0x2f,0x17,0x29};
|
|
static unsigned char dequant_weight_2q[20] = {0x00,0x40,0x10,0x30,0x03,0x3d,0x13,0x2d,0x06,0x3a,0x17,0x29,0x09,0x37,0x1a,0x26,0x0d,0x33,0x1d,0x23};
|
|
static const struct
|
|
{
|
|
unsigned char extra, bits, *dequant;
|
|
} astc_weightmode[] =
|
|
{
|
|
{0,0, NULL}, //invalid
|
|
{0,0, NULL}, //invalid
|
|
{0,1, dequant_weight_1b}, //2
|
|
{1,0, dequant_weight_0t}, //3
|
|
{0,2, dequant_weight_2b}, //4
|
|
{2,0, dequant_weight_0q}, //5
|
|
{1,1, dequant_weight_1t}, //6
|
|
{0,3, dequant_weight_3b}, //8
|
|
{0,0, NULL}, //invalid
|
|
{0,0, NULL}, //invalid
|
|
{2,1, dequant_weight_1q}, //10
|
|
{1,2, dequant_weight_2t}, //12
|
|
{0,4, dequant_weight_4b}, //16
|
|
{2,2, dequant_weight_2q}, //20
|
|
{1,3, dequant_weight_3t}, //24
|
|
{0,5, dequant_weight_5b}, //32
|
|
};
|
|
static unsigned int ASTC_DecodeSize(unsigned int count, unsigned int bits, unsigned char extra)
|
|
{
|
|
return ((extra==1)?((count*8)+4)/5:0) +
|
|
((extra==2)?((count*7)+2)/3:0) +
|
|
count*bits;
|
|
}
|
|
|
|
|
|
static void ASTC_ReadBlockMode(struct astc_block_info *b)
|
|
{
|
|
unsigned char *in = b->in;
|
|
unsigned short s = ASTC_readmanybits(in, 0, 13);//in[0] | (in[1]<<8);
|
|
b->config_bits = 13;
|
|
|
|
if ((s&0x1ff)==0x1fc)
|
|
{ //void extent
|
|
if (s&0x200)
|
|
b->status = ASTC_VOID_HDR;
|
|
else
|
|
b->status = ASTC_VOID_LDR;
|
|
b->dualplane = b->precision = b->wcount[0] = b->wcount[1] = b->wcount[2] = b->partitions = 0;
|
|
return;
|
|
}
|
|
b->status = ASTC_OKAY;
|
|
b->dualplane = (s>>10)&1; //Dp
|
|
b->precision = (s>>(9-3))&(1<<3);//P
|
|
b->precision |= (s>>4)&1; //p0
|
|
if (b->blocksize[2] != 1)
|
|
{ //3d blocks have a different header layout
|
|
#ifdef ASTC_WITH_3D
|
|
if (s&3)
|
|
{
|
|
b->precision|=(s&3)<<1; //p2, p1
|
|
b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>7)&3)+2, b->wcount[2] = ((s>>2)&3)+2;
|
|
}
|
|
else
|
|
{
|
|
b->precision|=(s&0xc)>>1; //p2, p1
|
|
if ((s&0x180)!=0x180)
|
|
{
|
|
b->dualplane = 0; //always single plane.
|
|
b->precision &= 7; //clear the high precision bit (reused for 'b')
|
|
if (!(s&0x180))
|
|
b->wcount[0] = 6, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = ((s>>5)&3)+2;
|
|
else if (!(s&0x80))
|
|
b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = 6, b->wcount[2] = ((s>>9)&3)+2;
|
|
else
|
|
b->wcount[0] = ((s>>5)&3)+2, b->wcount[1] = ((s>>9)&3)+2, b->wcount[2] = 6;
|
|
}
|
|
else if ((s&0x60)!=0x60)
|
|
{
|
|
if (!(s&0x60))
|
|
b->wcount[0] = 6, b->wcount[1] = 2, b->wcount[2] = 2;
|
|
else if (!(s&0x20))
|
|
b->wcount[0] = 2, b->wcount[1] = 6, b->wcount[2] = 2;
|
|
else //40
|
|
b->wcount[0] = 2, b->wcount[1] = 2, b->wcount[2] = 6;
|
|
}
|
|
else
|
|
b->status = ASTC_RESERVED; //reserved (or void extent, but those were handled above)
|
|
}
|
|
#else
|
|
b->status = ASTC_UNSUPPORTED;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
b->wcount[2] = 1;
|
|
if (s&3)
|
|
{ //one of the first 5 layouts...
|
|
b->precision|=(s&3)<<1; //p2, p1
|
|
if (!(s&8))
|
|
{ //first two layouts...
|
|
if (!(s&4))
|
|
{ //layout0
|
|
b->wcount[0] = ((s>>7)&3)+4;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
else
|
|
{ //layout1
|
|
b->wcount[0] = ((s>>7)&3)+8;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
}
|
|
else if (!(s&4))
|
|
{ //layout2
|
|
b->wcount[0] = ((s>>5)&3)+2;
|
|
b->wcount[1] = ((s>>7)&3)+8;
|
|
}
|
|
else if (!(s&256))
|
|
{ //layout3
|
|
b->wcount[0] = ((s>>5)&3)+2;
|
|
b->wcount[1] = ((s>>7)&1)+6;
|
|
}
|
|
else
|
|
{ //layout4
|
|
b->wcount[0] = ((s>>7)&1)+2;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
}
|
|
else
|
|
{ //one of the later layouts
|
|
b->precision|=(s&0xc)>>1; //p2, p1
|
|
if (!(s&384))
|
|
{
|
|
b->wcount[0] = 12;
|
|
b->wcount[1] = ((s>>5)&3)+2;
|
|
}
|
|
else if ((s&384)==128)
|
|
{
|
|
b->wcount[0] = ((s>>5)&3)+2;
|
|
b->wcount[1] = 12;
|
|
}
|
|
else if ((s&480)==384)
|
|
{
|
|
b->wcount[0] = 6;
|
|
b->wcount[1] = 10;
|
|
}
|
|
else if ((s&480)==416)
|
|
{
|
|
b->wcount[0] = 10;
|
|
b->wcount[1] = 6;
|
|
}
|
|
else if ((s&384)==256)
|
|
{
|
|
b->wcount[0] = ((s>>5)&3)+6;
|
|
b->wcount[1] = ((s>>9)&3)+6;
|
|
b->dualplane = 0; //forget the Dp bit, its reused in this layout
|
|
b->precision &= 7; //forget the P bit, too
|
|
}
|
|
else
|
|
b->status = ASTC_RESERVED; //reserved
|
|
}
|
|
}
|
|
b->partitions = ((s>>11)&3)+1;
|
|
|
|
if (b->partitions > 3 && b->dualplane)
|
|
b->status = ASTC_ERROR; //apparently.
|
|
|
|
if (b->wcount[0] > b->blocksize[0] || b->wcount[1] > b->blocksize[1] || b->wcount[2] > b->blocksize[2])
|
|
b->status = ASTC_ERROR; //invalid weight counts.
|
|
|
|
b->wcount[3] = b->wcount[0] * b->wcount[1] * b->wcount[2];
|
|
b->wcount[3]<<=b->dualplane; //dual-plane has twice the weights - interleaved.
|
|
if (b->wcount[3] > countof(b->weights))
|
|
b->status = ASTC_ERROR; //more than 64 weights are banned, for some reason
|
|
b->weight_bits = ASTC_DecodeSize(b->wcount[3], astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra);
|
|
}
|
|
|
|
static void ASTC_ReadPartitions(struct astc_block_info *b)
|
|
{
|
|
int sel;
|
|
int i;
|
|
unsigned char *in = b->in;
|
|
int weight_bits = b->weight_bits;
|
|
|
|
if (b->partitions == 1)
|
|
{ //single-partition mode, simple CEM
|
|
b->partindex = 0;
|
|
b->part[0].mode = ASTC_readbits(in, b->config_bits, 4);
|
|
b->config_bits += 4;
|
|
}
|
|
else
|
|
{ //multi
|
|
b->partindex = ASTC_readmanybits(in, b->config_bits, 10);
|
|
b->config_bits += 10;
|
|
sel = ASTC_readbits(in, b->config_bits, 6);
|
|
b->config_bits += 6;
|
|
if (!(sel&3))
|
|
{
|
|
sel = (sel>>2)&0xf;
|
|
for (i = 0; i < b->partitions; i++)
|
|
b->part[i].mode = sel; //all the same
|
|
}
|
|
else
|
|
{
|
|
int shift = 2;
|
|
int highbits = b->partitions*3 - 4;
|
|
|
|
weight_bits += highbits;
|
|
sel |= ASTC_readbits(in, 128-weight_bits, highbits)<<6; //I don't know why this is separate. it seems like an unnecessary complication to me.
|
|
|
|
for (i = 0; i < b->partitions; i++, shift++)
|
|
{
|
|
b->part[i].mode = ((sel&3)-1)<<2; //class groups
|
|
b->part[i].mode += ((sel>>shift)&1)<<2;//class
|
|
}
|
|
for (i = 0; i < b->partitions; i++, shift+=2)
|
|
b->part[i].mode += (sel>>shift)&3; //specific mode info
|
|
}
|
|
}
|
|
if (b->dualplane)
|
|
{
|
|
weight_bits += 2;
|
|
b->ccs = ASTC_readbits(in, 128-weight_bits, 2);
|
|
}
|
|
else
|
|
b->ccs = 0;
|
|
|
|
b->ep_bits = 128 - weight_bits - b->config_bits;
|
|
//weights are at 128-weight_bits to 128
|
|
//epdata is at config_bits to config_bits+ep_bits
|
|
}
|
|
|
|
#ifdef ASTC_WITH_HDRTEST
|
|
ASTC_PUBLIC int ASTC_BlocksAreHDR(unsigned char *in, size_t datasize, int bw, int bh, int bd)
|
|
{
|
|
struct astc_block_info b;
|
|
int i;
|
|
size_t blocks = datasize/16;
|
|
b.in = in;
|
|
b.blocksize[0] = bw;
|
|
b.blocksize[1] = bh;
|
|
b.blocksize[2] = bd;
|
|
while(blocks --> 0)
|
|
{
|
|
ASTC_ReadBlockMode(&b);
|
|
if (b.status == ASTC_VOID_HDR)
|
|
return 1; //if we're getting hdr blocks then we can decode properly only with hdr
|
|
if (b.status == ASTC_VOID_LDR)
|
|
return 0; //if we're getting ldr blocks, then its unlikely that there's any hdr blocks in there.
|
|
if (b.status != ASTC_OKAY)
|
|
continue;
|
|
ASTC_ReadPartitions(&b);
|
|
for (i = 0; i < b.partitions; i++)
|
|
{
|
|
switch(b.part[i].mode)
|
|
{
|
|
case 2:
|
|
case 3:
|
|
case 7:
|
|
case 11:
|
|
case 14:
|
|
case 15:
|
|
return 1;
|
|
}
|
|
}
|
|
b.in += 16;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_DECODE
|
|
static unsigned char ASTC_readbits2(unsigned char *in, unsigned int *offset, unsigned int count)
|
|
{ //only reads up to 9 bits, because offset 7 with 10 bits needs to read more than two bytes
|
|
unsigned char r = ASTC_readbits(in, *offset, count);
|
|
*offset += count;
|
|
return r;
|
|
}
|
|
static void ASTC_Decode(unsigned char *in, unsigned char *out, int count, unsigned int offset, int bits, int extra, unsigned char *dequant)
|
|
{
|
|
unsigned char block[5];
|
|
int j;
|
|
|
|
//unfortunately these trits depend upon the values of the later bits in each block.
|
|
//if only it were a nice simple modulo...
|
|
if (extra==1)
|
|
{
|
|
//read it 5 samples at a time
|
|
while(count > 0)
|
|
{
|
|
unsigned int t, c;
|
|
|
|
block[0] = ASTC_readbits2(in, &offset, bits);
|
|
t = ASTC_readbits2(in, &offset, 2);
|
|
if (count > 1)
|
|
{
|
|
block[1] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<2;
|
|
}
|
|
else
|
|
block[1] = 0;
|
|
if (count > 2)
|
|
{
|
|
block[2] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 1)<<4;
|
|
}
|
|
else
|
|
block[2] = 0;
|
|
if (count > 3)
|
|
{
|
|
block[3] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<5;
|
|
}
|
|
else
|
|
block[3] = 0;
|
|
if (count > 4)
|
|
{
|
|
block[4] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 1)<<7;
|
|
}
|
|
else
|
|
block[4] = 0;
|
|
|
|
//okay, we read the block, now figure out the trits and pack them into the high part of the result
|
|
if ((t&0x1c) == 0x1c)
|
|
{
|
|
c = ((t>>3)&0x1c) | (t&3);
|
|
block[4] |= 2<<bits;
|
|
block[3] |= 2<<bits;
|
|
}
|
|
else
|
|
{
|
|
c = t&0x1f;
|
|
if ((t&0x60) == 0x60)
|
|
{
|
|
block[4] |= 2<<bits;
|
|
block[3] |= (t>>7)<<bits;
|
|
}
|
|
else
|
|
{
|
|
block[4] |= (t>>7)<<bits;
|
|
block[3] |= ((t>>5)&3)<<bits;
|
|
}
|
|
}
|
|
if ((c&3)==3)
|
|
{
|
|
block[2] |= 2<<bits;
|
|
block[1] |= ((c>>4)&1)<<bits;
|
|
block[0] |= (((c>>2)&2) | ((c>>2)&~(c>>3)&1))<<bits;
|
|
}
|
|
else if ((c&0xc)==0xc)
|
|
{
|
|
block[2] |= 2<<bits;
|
|
block[1] |= 2<<bits;
|
|
block[0] |= (c&3)<<bits;
|
|
}
|
|
else
|
|
{
|
|
block[2] |= ((c>>4)&1)<<bits;
|
|
block[1] |= ((c>>2)&3)<<bits;
|
|
block[0] |= ((c&2)|(c&1&~(c>>1)))<<bits;
|
|
}
|
|
|
|
//spit out the result
|
|
for (j = 0; j < 5 && j < count; j++)
|
|
*out++ = dequant[block[j]];
|
|
count -= 5;
|
|
}
|
|
}
|
|
else if (extra == 2)
|
|
{
|
|
//read it 3 samples at a time
|
|
while(count > 0)
|
|
{
|
|
unsigned int t, c;
|
|
|
|
block[0] = ASTC_readbits2(in, &offset, bits);
|
|
t = ASTC_readbits2(in, &offset, 3);
|
|
if (count > 1)
|
|
{
|
|
block[1] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<3;
|
|
}
|
|
else
|
|
block[1] = 0;
|
|
if (count > 2)
|
|
{
|
|
block[2] = ASTC_readbits2(in, &offset, bits);
|
|
t |= ASTC_readbits2(in, &offset, 2)<<5;
|
|
}
|
|
else
|
|
block[2] = 0;
|
|
|
|
//okay, we read the block, now figure out the trits and pack them into the high part of the result
|
|
if ((t&6)==6 && !(t&0x60))
|
|
{
|
|
block[2] |= (((t&1)<<2) | (((t>>4)&~t&1)<<1) | ((t>>3)&~t&1))<<bits;
|
|
block[1] |= 4<<bits;
|
|
block[0] |= 4<<bits;
|
|
}
|
|
else
|
|
{
|
|
if ((t&6) == 6)
|
|
{
|
|
block[2] |= 4<<bits;
|
|
c = ((t>>3)&3)<<3;
|
|
c |= (~(t>>5)&3)<<1;
|
|
c |= t&1;
|
|
}
|
|
else
|
|
{
|
|
block[2] |= ((t>>5)&3)<<bits;
|
|
c = t&0x1f;
|
|
}
|
|
|
|
if ((c&7) == 5)
|
|
{
|
|
block[1] |= 4<<bits;
|
|
block[0] |= ((c>>3)&3)<<bits;
|
|
}
|
|
else
|
|
{
|
|
block[1] |= ((c>>3)&3)<<bits;
|
|
block[0] |= (c&7)<<bits;
|
|
}
|
|
}
|
|
|
|
//spit out the result
|
|
for (j = 0; j < 3 && j < count; j++)
|
|
*out++ = dequant[block[j]];
|
|
count -= 3;
|
|
}
|
|
}
|
|
else while(count --> 0) //pure bits, nice and simple
|
|
{
|
|
unsigned char val = ASTC_readbits2(in, &offset, bits);
|
|
|
|
*out++ = dequant[val];
|
|
}
|
|
}
|
|
|
|
//endpoints have a logical value between 0 and 255.
|
|
//bit replication is used to fill in missing precision
|
|
static unsigned char dequant_ep_1b[1<<1] = {0,255};
|
|
static unsigned char dequant_ep_2b[1<<2] = {0x00,0x55,0xaa,0xff};
|
|
static unsigned char dequant_ep_3b[1<<3] = {0x00,0x24,0x49,0x6d,0x92,0xb6,0xdb,0xff};
|
|
static unsigned char dequant_ep_4b[1<<4] = {
|
|
0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff};
|
|
static unsigned char dequant_ep_5b[1<<5] = {
|
|
0x00,0x08,0x10,0x18,0x21,0x29,0x31,0x39,0x42,0x4a,0x52,0x5a,0x63,0x6b,0x73,0x7b,
|
|
0x84,0x8c,0x94,0x9c,0xa5,0xad,0xb5,0xbd,0xc6,0xce,0xd6,0xde,0xe7,0xef,0xf7,0xff};
|
|
static unsigned char dequant_ep_6b[1<<6] = {
|
|
0x00,0x04,0x08,0x0c,0x10,0x14,0x18,0x1c,0x20,0x24,0x28,0x2c,0x30,0x34,0x38,0x3c,
|
|
0x41,0x45,0x49,0x4d,0x51,0x55,0x59,0x5d,0x61,0x65,0x69,0x6d,0x71,0x75,0x79,0x7d,
|
|
0x82,0x86,0x8a,0x8e,0x92,0x96,0x9a,0x9e,0xa2,0xa6,0xaa,0xae,0xb2,0xb6,0xba,0xbe,
|
|
0xc3,0xc7,0xcb,0xcf,0xd3,0xd7,0xdb,0xdf,0xe3,0xe7,0xeb,0xef,0xf3,0xf7,0xfb,0xff};
|
|
static unsigned char dequant_ep_7b[1<<7] = {
|
|
0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
|
|
0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
|
|
0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
|
|
0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
|
|
0x81,0x83,0x85,0x87,0x89,0x8b,0x8d,0x8f,0x91,0x93,0x95,0x97,0x99,0x9b,0x9d,0x9f,
|
|
0xa1,0xa3,0xa5,0xa7,0xa9,0xab,0xad,0xaf,0xb1,0xb3,0xb5,0xb7,0xb9,0xbb,0xbd,0xbf,
|
|
0xc1,0xc3,0xc5,0xc7,0xc9,0xcb,0xcd,0xcf,0xd1,0xd3,0xd5,0xd7,0xd9,0xdb,0xdd,0xdf,
|
|
0xe1,0xe3,0xe5,0xe7,0xe9,0xeb,0xed,0xef,0xf1,0xf3,0xf5,0xf7,0xf9,0xfb,0xfd,0xff};
|
|
static unsigned char dequant_ep_8b[1<<8] = {
|
|
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
|
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
|
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
|
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
|
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
|
|
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
|
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
|
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
|
0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,0x88,0x89,0x8a,0x8b,0x8c,0x8d,0x8e,0x8f,
|
|
0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0x9b,0x9c,0x9d,0x9e,0x9f,
|
|
0xa0,0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xab,0xac,0xad,0xae,0xaf,
|
|
0xb0,0xb1,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,0xbc,0xbd,0xbe,0xbf,
|
|
0xc0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
|
|
0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
|
|
0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xeb,0xec,0xed,0xee,0xef,
|
|
0xf0,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa,0xfb,0xfc,0xfd,0xfe,0xff};
|
|
static unsigned char dequant_ep_0t[3] = {0,128,255};
|
|
static unsigned char dequant_ep_1t[6] = {0x00,0xff,0x33,0xcc,0x66,0x99};
|
|
static unsigned char dequant_ep_2t[12] = {0x00,0xff,0x45,0xba,0x17,0xe8,0x5c,0xa3,0x2e,0xd1,0x74,0x8b};
|
|
static unsigned char dequant_ep_3t[24] = {0x00,0xff,0x21,0xde,0x42,0xbd,0x63,0x9c,0x0b,0xf4,0x2c,0xd3,0x4d,0xb2,0x6e,0x91,0x16,0xe9,0x37,0xc8,0x58,0xa7,0x79,0x86};
|
|
static unsigned char dequant_ep_4t[48] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x41,0xbe,0x51,0xae,0x61,0x9e,0x71,0x8e,0x05,0xfa,0x15,0xea,0x26,0xd9,0x36,0xc9,0x46,0xb9,0x56,0xa9,0x67,0x98,0x77,0x88,0x0b,0xf4,0x1b,0xe4,0x2b,0xd4,0x3b,0xc4,0x4c,0xb3,0x5c,0xa3,0x6c,0x93,0x7c,0x83};
|
|
static unsigned char dequant_ep_5t[96] = {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x02,0xfd,0x0a,0xf5,0x12,0xed,0x1a,0xe5,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x05,0xfa,0x0d,0xf2,0x15,0xea,0x1d,0xe2,0x25,0xda,0x2d,0xd2,0x35,0xca,0x3d,0xc2,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
|
|
static unsigned char dequant_ep_6t[192]= {0x00,0xff,0x04,0xfb,0x08,0xf7,0x0c,0xf3,0x10,0xef,0x14,0xeb,0x18,0xe7,0x1c,0xe3,0x20,0xdf,0x24,0xdb,0x28,0xd7,0x2c,0xd3,0x30,0xcf,0x34,0xcb,0x38,0xc7,0x3c,0xc3,0x40,0xbf,0x44,0xbb,0x48,0xb7,0x4c,0xb3,0x50,0xaf,0x54,0xab,0x58,0xa7,0x5c,0xa3,0x60,0x9f,0x64,0x9b,0x68,0x97,0x6c,0x93,0x70,0x8f,0x74,0x8b,0x78,0x87,0x7c,0x83,0x01,0xfe,0x05,0xfa,0x09,0xf6,0x0d,0xf2,0x11,0xee,0x15,0xea,0x19,0xe6,0x1d,0xe2,0x21,0xde,0x25,0xda,0x29,0xd6,0x2d,0xd2,0x31,0xce,0x35,0xca,0x39,0xc6,0x3d,0xc2,0x41,0xbe,0x45,0xba,0x49,0xb6,0x4d,0xb2,0x51,0xae,0x55,0xaa,0x59,0xa6,0x5d,0xa2,0x61,0x9e,0x65,0x9a,0x69,0x96,0x6d,0x92,0x71,0x8e,0x75,0x8a,0x79,0x86,0x7d,0x82,0x02,0xfd,0x06,0xf9,0x0a,0xf5,0x0e,0xf1,0x12,0xed,0x16,0xe9,0x1a,0xe5,0x1e,0xe1,0x22,0xdd,0x26,0xd9,0x2a,0xd5,0x2e,0xd1,0x32,0xcd,0x36,0xc9,0x3a,0xc5,0x3e,0xc1,0x42,0xbd,0x46,0xb9,0x4a,0xb5,0x4e,0xb1,0x52,0xad,0x56,0xa9,0x5a,0xa5,0x5e,0xa1,0x62,0x9d,0x66,0x99,0x6a,0x95,0x6e,0x91,0x72,0x8d,0x76,0x89,0x7a,0x85,0x7e,0x81};
|
|
static unsigned char dequant_ep_0q[5] = {0,64,128,192,255};
|
|
static unsigned char dequant_ep_1q[10] = {0x00,0xff,0x1c,0xe3,0x38,0xc7,0x54,0xab,0x71,0x8e};
|
|
static unsigned char dequant_ep_2q[20] = {0x00,0xff,0x43,0xbc,0x0d,0xf2,0x50,0xaf,0x1b,0xe4,0x5e,0xa1,0x28,0xd7,0x6b,0x94,0x36,0xc9,0x79,0x86};
|
|
static unsigned char dequant_ep_3q[40] = {0x00,0xff,0x20,0xdf,0x41,0xbe,0x61,0x9e,0x06,0xf9,0x27,0xd8,0x47,0xb8,0x68,0x97,0x0d,0xf2,0x2d,0xd2,0x4e,0xb1,0x6e,0x91,0x13,0xec,0x34,0xcb,0x54,0xab,0x75,0x8a,0x1a,0xe5,0x3a,0xc5,0x5b,0xa4,0x7b,0x84};
|
|
static unsigned char dequant_ep_4q[80] = {0x00,0xff,0x10,0xef,0x20,0xdf,0x30,0xcf,0x40,0xbf,0x50,0xaf,0x60,0x9f,0x70,0x8f,0x03,0xfc,0x13,0xec,0x23,0xdc,0x33,0xcc,0x43,0xbc,0x53,0xac,0x64,0x9b,0x74,0x8b,0x06,0xf9,0x16,0xe9,0x26,0xd9,0x36,0xc9,0x47,0xb8,0x57,0xa8,0x67,0x98,0x77,0x88,0x09,0xf6,0x19,0xe6,0x2a,0xd5,0x3a,0xc5,0x4a,0xb5,0x5a,0xa5,0x6a,0x95,0x7a,0x85,0x0d,0xf2,0x1d,0xe2,0x2d,0xd2,0x3d,0xc2,0x4d,0xb2,0x5d,0xa2,0x6d,0x92,0x7d,0x82};
|
|
static unsigned char dequant_ep_5q[160]= {0x00,0xff,0x08,0xf7,0x10,0xef,0x18,0xe7,0x20,0xdf,0x28,0xd7,0x30,0xcf,0x38,0xc7,0x40,0xbf,0x48,0xb7,0x50,0xaf,0x58,0xa7,0x60,0x9f,0x68,0x97,0x70,0x8f,0x78,0x87,0x01,0xfe,0x09,0xf6,0x11,0xee,0x19,0xe6,0x21,0xde,0x29,0xd6,0x31,0xce,0x39,0xc6,0x41,0xbe,0x49,0xb6,0x51,0xae,0x59,0xa6,0x61,0x9e,0x69,0x96,0x71,0x8e,0x79,0x86,0x03,0xfc,0x0b,0xf4,0x13,0xec,0x1b,0xe4,0x23,0xdc,0x2b,0xd4,0x33,0xcc,0x3b,0xc4,0x43,0xbc,0x4b,0xb4,0x53,0xac,0x5b,0xa4,0x63,0x9c,0x6b,0x94,0x73,0x8c,0x7b,0x84,0x04,0xfb,0x0c,0xf3,0x14,0xeb,0x1c,0xe3,0x24,0xdb,0x2c,0xd3,0x34,0xcb,0x3c,0xc3,0x44,0xbb,0x4c,0xb3,0x54,0xab,0x5c,0xa3,0x64,0x9b,0x6c,0x93,0x74,0x8b,0x7c,0x83,0x06,0xf9,0x0e,0xf1,0x16,0xe9,0x1e,0xe1,0x26,0xd9,0x2e,0xd1,0x36,0xc9,0x3e,0xc1,0x46,0xb9,0x4e,0xb1,0x56,0xa9,0x5e,0xa1,0x66,0x99,0x6e,0x91,0x76,0x89,0x7e,0x81};
|
|
static const struct
|
|
{
|
|
unsigned char extra, bits, *dequant;
|
|
} astc_epvmode[] =
|
|
{
|
|
{0,1, dequant_ep_1b}, //2
|
|
{1,0, dequant_ep_0t}, //3
|
|
{0,2, dequant_ep_2b}, //4
|
|
{2,0, dequant_ep_0q}, //5
|
|
{1,1, dequant_ep_1t}, //6
|
|
{0,3, dequant_ep_3b}, //8
|
|
{2,1, dequant_ep_1q}, //10
|
|
{1,2, dequant_ep_2t}, //12
|
|
{0,4, dequant_ep_4b}, //16
|
|
{2,2, dequant_ep_2q}, //20
|
|
{1,3, dequant_ep_3t}, //24
|
|
{0,5, dequant_ep_5b}, //32
|
|
{2,3, dequant_ep_3q}, //40
|
|
{1,4, dequant_ep_4t}, //48
|
|
{0,6, dequant_ep_6b}, //64
|
|
{2,4, dequant_ep_4q}, //80
|
|
{1,5, dequant_ep_5t}, //96
|
|
{0,7, dequant_ep_7b}, //128
|
|
{2,5, dequant_ep_5q}, //160
|
|
{1,6, dequant_ep_6t}, //192
|
|
{0,8, dequant_ep_8b}, //256
|
|
//other modes don't make any sense
|
|
};
|
|
/*static void ASTC_CalcDequant(void)
|
|
{
|
|
int i;
|
|
|
|
int extra = 0;
|
|
int bits = 1;
|
|
int isweight = 1;
|
|
int targbits = isweight?6:8;
|
|
int v;
|
|
|
|
static qboolean nospam;
|
|
if (nospam)
|
|
return;
|
|
nospam = true;
|
|
|
|
//binary:
|
|
if (!extra)
|
|
{
|
|
for (bits = 1; bits <= (isweight?5:8); bits++)
|
|
{
|
|
Con_Printf("table: %s_%ib", isweight?"weight":"ep", bits);
|
|
for (i = 0; i < (1<<bits); i++)
|
|
{
|
|
v = i;
|
|
v<<=(targbits-bits);
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
v|=v>>bits;
|
|
|
|
if (isweight && v > 32)
|
|
v++; //0-64 instead of 0-63
|
|
|
|
Con_Printf("0x%02x,", v);
|
|
}
|
|
Con_Printf("\n");
|
|
}
|
|
}
|
|
else if (extra == 1)
|
|
{
|
|
int A,B,C,D;
|
|
|
|
for (bits = 1; bits <= (isweight?3:6); bits++)
|
|
{
|
|
Con_Printf("table: %s_%it:\n", isweight?"weight":"ep", bits);
|
|
for (i = 0; i < ((2<<bits)|(1<<bits)); i++)
|
|
{
|
|
switch(bits)
|
|
{
|
|
case 1:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = 0;
|
|
C = isweight?50:204;
|
|
D = i>>bits;
|
|
break;
|
|
case 2:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = ((i>>1)&1) * (isweight?0b1000101:0b100010110);
|
|
C = isweight?25:93;
|
|
D = i>>bits;
|
|
break;
|
|
case 3:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = ((i>>1)&1) * (isweight?0b0100001:0b010000101); //b
|
|
B|= ((i>>2)&1) * (isweight?0b1000010:0b100001010); //c
|
|
C = isweight?11:44;
|
|
D = i>>bits;
|
|
break;
|
|
case 4:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b001000001; //b
|
|
B|= ((i>>2)&1) * 0b010000010; //c
|
|
B|= ((i>>3)&1) * 0b100000100; //d
|
|
C = 22;
|
|
D = i>>bits;
|
|
break;
|
|
case 5:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b000100000; //b
|
|
B|= ((i>>2)&1) * 0b001000000; //c
|
|
B|= ((i>>3)&1) * 0b010000001; //d
|
|
B|= ((i>>4)&1) * 0b100000010; //e
|
|
C = 11;
|
|
D = i>>bits;
|
|
break;
|
|
case 6:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b000010000; //b
|
|
B|= ((i>>2)&1) * 0b000100000; //c
|
|
B|= ((i>>3)&1) * 0b001000000; //d
|
|
B|= ((i>>4)&1) * 0b010000000; //e
|
|
B|= ((i>>5)&1) * 0b100000001; //f
|
|
C = 5;
|
|
D = i>>bits;
|
|
break;
|
|
}
|
|
v = D * C + B;
|
|
v = v ^ A;
|
|
v = (A & (isweight?0x20:0x80)) | (v >> 2);
|
|
|
|
if (isweight && v > 32)
|
|
v++; //0-64 instead of 0-63
|
|
|
|
Con_Printf("0x%02x,", v);
|
|
}
|
|
Con_Printf("\n");
|
|
}
|
|
}
|
|
else if (extra == 2)
|
|
{
|
|
int A,B,C,D;
|
|
|
|
for (bits = 1; bits <= (isweight?2:5); bits++)
|
|
{
|
|
Con_Printf("table: %s_%iq:\n", isweight?"weight":"ep", bits);
|
|
for (i = 0; i < ((4<<bits)|(1<<bits)); i++)
|
|
{
|
|
switch(bits)
|
|
{
|
|
case 1:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = 0;
|
|
C = isweight?23:113;
|
|
D = i>>bits;
|
|
break;
|
|
case 2:
|
|
A = (i&1)*(isweight?0x7f:0x1ff);
|
|
B = ((i>>1)&1) * (isweight?0b1000010:0b100001100);
|
|
C = isweight?13:54;
|
|
D = i>>bits;
|
|
break;
|
|
case 3:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b010000010; //b
|
|
B|= ((i>>2)&1) * 0b100000101; //c
|
|
C = 26;
|
|
D = i>>bits;
|
|
break;
|
|
case 4:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b001000000; //b
|
|
B|= ((i>>2)&1) * 0b010000001; //c
|
|
B|= ((i>>3)&1) * 0b100000010; //d
|
|
C = 13;
|
|
D = i>>bits;
|
|
break;
|
|
case 5:
|
|
A = (i&1)*0x1ff;
|
|
B = ((i>>1)&1) * 0b000100000; //b
|
|
B|= ((i>>2)&1) * 0b001000000; //c
|
|
B|= ((i>>3)&1) * 0b010000000; //d
|
|
B|= ((i>>4)&1) * 0b100000001; //e
|
|
C = 6;
|
|
D = i>>bits;
|
|
break;
|
|
}
|
|
v = D * C + B;
|
|
v = v ^ A;
|
|
v = (A & (isweight?0x20:0x80)) | (v >> 2);
|
|
|
|
if (isweight && v > 32)
|
|
v++; //0-64 instead of 0-63
|
|
|
|
Con_Printf("0x%02x,", v);
|
|
}
|
|
Con_Printf("\n");
|
|
}
|
|
}
|
|
}*/
|
|
|
|
static void ASTC_blue_contract(int *out, int r, int g, int b, int a)
|
|
{
|
|
out[0] = (r+b) >> 1;
|
|
out[1] = (g+b) >> 1;
|
|
out[2] = b;
|
|
out[3] = a;
|
|
}
|
|
static int ASTC_bit_transfer_signed(int a, unsigned char *b) //returns new value for a.
|
|
{
|
|
*b >>= 1;
|
|
*b |= a & 0x80;
|
|
a >>= 1;
|
|
a &= 0x3F;
|
|
if((a&0x20)!=0)
|
|
a=a-0x40;
|
|
return a;
|
|
}
|
|
static void ASTC_clamp_unorm8(int *c)
|
|
{
|
|
c[0] = bound(0, c[0], 255);
|
|
c[1] = bound(0, c[1], 255);
|
|
c[2] = bound(0, c[2], 255);
|
|
c[3] = bound(0, c[3], 255);
|
|
}
|
|
|
|
#ifdef ASTC_WITH_HDR
|
|
static void ASTC_HDR_Mode_2(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int y0,y1;
|
|
if(v[1] >= v[0])
|
|
{
|
|
y0 = (v[0] << 4);
|
|
y1 = (v[1] << 4);
|
|
}
|
|
else
|
|
{
|
|
y0 = (v[1] << 4) + 8;
|
|
y1 = (v[0] << 4) - 8;
|
|
}
|
|
Vector4Set(p->ep[0], y0, y0, y0, 0x780);
|
|
Vector4Set(p->ep[1], y1, y1, y1, 0x780);
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_3(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int y0, y1, d;
|
|
if((v[0]&0x80) != 0)
|
|
{
|
|
y0 = ((v[1] & 0xE0) << 4) | ((v[0] & 0x7F) << 2);
|
|
d = (v[1] & 0x1F) << 2;
|
|
}
|
|
else
|
|
{
|
|
y0 = ((v[1] & 0xF0) << 4) | ((v[0] & 0x7F) << 1);
|
|
d = (v[1] & 0x0F) << 1;
|
|
}
|
|
|
|
y1 = y0 + d;
|
|
if(y1 > 0xFFF)
|
|
y1 = 0xFFF;
|
|
|
|
Vector4Set(p->ep[0], y0, y0, y0, 0x780);
|
|
Vector4Set(p->ep[1], y1, y1, y1, 0x780);
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_7(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int modeval = ((v[0]&0xC0)>>6) | ((v[1]&0x80)>>5) | ((v[2]&0x80)>>4);
|
|
int majcomp;
|
|
int mode;
|
|
static const int shamts[6] = { 1,1,2,3,4,5 };
|
|
int shamt,t;
|
|
|
|
int red, green, blue, scale;
|
|
int x0,x1,x2,x3,x4,x5,x6,ohm;
|
|
|
|
if( (modeval & 0xC ) != 0xC )
|
|
{
|
|
majcomp = modeval >> 2;
|
|
mode = modeval & 3;
|
|
}
|
|
else if( modeval != 0xF )
|
|
{
|
|
majcomp = modeval & 3;
|
|
mode = 4;
|
|
}
|
|
else
|
|
{
|
|
majcomp = 0; mode = 5;
|
|
}
|
|
|
|
red = v[0] & 0x3f;
|
|
green = v[1] & 0x1f;
|
|
blue = v[2] & 0x1f;
|
|
scale = v[3] & 0x1f;
|
|
|
|
x0 = (v[1] >> 6) & 1; x1 = (v[1] >> 5) & 1;
|
|
x2 = (v[2] >> 6) & 1; x3 = (v[2] >> 5) & 1;
|
|
x4 = (v[3] >> 7) & 1; x5 = (v[3] >> 6) & 1;
|
|
x6 = (v[3] >> 5) & 1;
|
|
|
|
ohm = 1 << mode;
|
|
if( ohm & 0x30 ) green |= x0 << 6;
|
|
if( ohm & 0x3A ) green |= x1 << 5;
|
|
if( ohm & 0x30 ) blue |= x2 << 6;
|
|
if( ohm & 0x3A ) blue |= x3 << 5;
|
|
if( ohm & 0x3D ) scale |= x6 << 5;
|
|
if( ohm & 0x2D ) scale |= x5 << 6;
|
|
if( ohm & 0x04 ) scale |= x4 << 7;
|
|
if( ohm & 0x3B ) red |= x4 << 6;
|
|
if( ohm & 0x04 ) red |= x3 << 6;
|
|
if( ohm & 0x10 ) red |= x5 << 7;
|
|
if( ohm & 0x0F ) red |= x2 << 7;
|
|
if( ohm & 0x05 ) red |= x1 << 8;
|
|
if( ohm & 0x0A ) red |= x0 << 8;
|
|
if( ohm & 0x05 ) red |= x0 << 9;
|
|
if( ohm & 0x02 ) red |= x6 << 9;
|
|
if( ohm & 0x01 ) red |= x3 << 10;
|
|
if( ohm & 0x02 ) red |= x5 << 10;
|
|
|
|
shamt = shamts[mode];
|
|
red <<= shamt; green <<= shamt; blue <<= shamt; scale <<= shamt;
|
|
|
|
if( mode != 5 ) { green = red - green; blue = red - blue; }
|
|
|
|
if( majcomp == 1 )
|
|
{
|
|
t = red;
|
|
red = green;
|
|
green = t;
|
|
}
|
|
if( majcomp == 2 )
|
|
{
|
|
t = red;
|
|
red = blue;
|
|
blue = t;
|
|
}
|
|
|
|
p->ep[1][0] = bound( 0, red, 0xFFF );
|
|
p->ep[1][1] = bound( 0, green, 0xFFF );
|
|
p->ep[1][2] = bound( 0, blue, 0xFFF );
|
|
|
|
p->ep[0][0] = bound( 0, red - scale, 0xFFF );
|
|
p->ep[0][1] = bound( 0, green - scale, 0xFFF );
|
|
p->ep[0][2] = bound( 0, blue - scale, 0xFFF );
|
|
|
|
p->ep[1][3] = p->ep[0][3] = 0x780;
|
|
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_11(struct astc_part *p, unsigned char *v)
|
|
{
|
|
static const int dbitstab[8] = {7,6,7,6,5,6,5,6};
|
|
int shamt;
|
|
int majcomp = ((v[4] & 0x80) >> 7) | ((v[5] & 0x80) >> 6);
|
|
int mode,va,vb0,vb1,vc,vd0,vd1;
|
|
int x0,x1,x2,x3,x4,x5,ohm;
|
|
|
|
if( majcomp == 3 )
|
|
{
|
|
Vector4Set(p->ep[0], v[0] << 4, v[2] << 4, (v[4] & 0x7f) << 5, 0x780);
|
|
Vector4Set(p->ep[1], v[1] << 4, v[3] << 4, (v[5] & 0x7f) << 5, 0x780);
|
|
p->hdr = 0xf;
|
|
return;
|
|
}
|
|
|
|
mode = ((v[1]&0x80)>>7) | ((v[2]&0x80)>>6) | ((v[3]&0x80)>>5);
|
|
va = v[0] | ((v[1] & 0x40) << 2);
|
|
vb0 = v[2] & 0x3f;
|
|
vb1 = v[3] & 0x3f;
|
|
vc = v[1] & 0x3f;
|
|
vd0 = v[4] & 0x7f;
|
|
vd1 = v[5] & 0x7f;
|
|
|
|
if (vd0 & (1<<(dbitstab[mode]-1)))
|
|
vd0 |= -1 & ~((1u<<dbitstab[mode])-1);
|
|
if (vd1 & (1<<(dbitstab[mode]-1)))
|
|
vd1 |= -1 & ~((1u<<dbitstab[mode])-1);
|
|
|
|
x0 = (v[2] >> 6) & 1;
|
|
x1 = (v[3] >> 6) & 1;
|
|
x2 = (v[4] >> 6) & 1;
|
|
x3 = (v[5] >> 6) & 1;
|
|
x4 = (v[4] >> 5) & 1;
|
|
x5 = (v[5] >> 5) & 1;
|
|
|
|
ohm = 1 << mode;
|
|
if( ohm & 0xA4 ) va |= x0 << 9;
|
|
if( ohm & 0x08 ) va |= x2 << 9;
|
|
if( ohm & 0x50 ) va |= x4 << 9;
|
|
if( ohm & 0x50 ) va |= x5 << 10;
|
|
if( ohm & 0xA0 ) va |= x1 << 10;
|
|
if( ohm & 0xC0 ) va |= x2 << 11;
|
|
if( ohm & 0x04 ) vc |= x1 << 6;
|
|
if( ohm & 0xE8 ) vc |= x3 << 6;
|
|
if( ohm & 0x20 ) vc |= x2 << 7;
|
|
if( ohm & 0x5B ) vb0 |= x0 << 6;
|
|
if( ohm & 0x5B ) vb1 |= x1 << 6;
|
|
if( ohm & 0x12 ) vb0 |= x2 << 7;
|
|
if( ohm & 0x12 ) vb1 |= x3 << 7;
|
|
|
|
// Now shift up so that major component is at top of 12-bit value
|
|
shamt = (mode >> 1) ^ 3;
|
|
va <<= shamt; vb0 <<= shamt; vb1 <<= shamt;
|
|
vc <<= shamt; vd0 <<= shamt; vd1 <<= shamt;
|
|
|
|
p->ep[1][0] = bound( 0, va, 0xFFF );
|
|
p->ep[1][1] = bound( 0, va - vb0, 0xFFF );
|
|
p->ep[1][2] = bound( 0, va - vb1, 0xFFF );
|
|
|
|
p->ep[0][0] = bound( 0, va - vc, 0xFFF );
|
|
p->ep[0][1] = bound( 0, va - vb0 - vc - vd0, 0xFFF );
|
|
p->ep[0][2] = bound( 0, va - vb1 - vc - vd1, 0xFFF );
|
|
|
|
if( majcomp == 1 )
|
|
{
|
|
p->ep[0][3] = p->ep[0][0];
|
|
p->ep[0][0] = p->ep[0][1];
|
|
p->ep[0][1] = p->ep[0][3];
|
|
p->ep[1][3] = p->ep[1][0];
|
|
p->ep[1][0] = p->ep[1][1];
|
|
p->ep[1][1] = p->ep[1][3];
|
|
}
|
|
else if( majcomp == 2 )
|
|
{
|
|
p->ep[0][3] = p->ep[0][0];
|
|
p->ep[0][0] = p->ep[0][2];
|
|
p->ep[0][2] = p->ep[0][3];
|
|
p->ep[1][3] = p->ep[1][0];
|
|
p->ep[1][0] = p->ep[1][2];
|
|
p->ep[1][2] = p->ep[1][3];
|
|
}
|
|
|
|
p->ep[0][3] = p->ep[1][3] = 0x780;
|
|
|
|
p->hdr = 0xf;
|
|
}
|
|
static void ASTC_HDR_Mode_14(struct astc_part *p, unsigned char *v)
|
|
{
|
|
ASTC_HDR_Mode_11(p, v);
|
|
|
|
p->ep[0][3] = v[6];
|
|
p->ep[1][3] = v[7];
|
|
p->hdr &= 0x7;
|
|
}
|
|
static void ASTC_HDR_Mode_15(struct astc_part *p, unsigned char *v)
|
|
{
|
|
int v6=v[6], v7=v[7];
|
|
int mode;
|
|
ASTC_HDR_Mode_11(p,v);
|
|
|
|
mode = ((v6 >> 7) & 1) | ((v7 >> 6) & 2);
|
|
v6 &= 0x7F;
|
|
v7 &= 0x7F;
|
|
|
|
if(mode==3)
|
|
{
|
|
p->ep[0][3] = v6 << 5;
|
|
p->ep[1][3] = v7 << 5;
|
|
}
|
|
else
|
|
{
|
|
v6 |= (v7 << (mode+1)) & 0x780;
|
|
v7 &= (0x3F >> mode);
|
|
v7 ^= 0x20 >> mode;
|
|
v7 -= 0x20 >> mode;
|
|
v6 <<= (4-mode);
|
|
v7 <<= (4-mode);
|
|
|
|
v7 += v6;
|
|
v7 = bound(0, v7, 0xFFF);
|
|
p->ep[0][3] = v6;
|
|
p->ep[1][3] = v7;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static void ASTC_DecodeEndpoints(struct astc_block_info *b, unsigned char *v)
|
|
{
|
|
int i, t0, t1, t3, t5, t7;
|
|
|
|
for (i = 0; i < b->partitions; i++)
|
|
{
|
|
#ifdef ASTC_WITH_HDR
|
|
b->part[i].hdr = 0;
|
|
#endif
|
|
switch (b->part[i].mode & 15)
|
|
{
|
|
#ifdef ASTC_WITH_HDR
|
|
case 2: //HDR Luminance, large range
|
|
ASTC_HDR_Mode_2(&b->part[i], v);
|
|
break;
|
|
case 3: //HDR Luminance, small range
|
|
ASTC_HDR_Mode_3(&b->part[i], v);
|
|
break;
|
|
case 7: //HDR RGB, base+scale
|
|
ASTC_HDR_Mode_7(&b->part[i], v);
|
|
break;
|
|
case 11: //HDR RGB
|
|
ASTC_HDR_Mode_11(&b->part[i], v);
|
|
break;
|
|
case 14: //HDR RGB + LDR Alpha
|
|
ASTC_HDR_Mode_14(&b->part[i], v);
|
|
break;
|
|
case 15: //HDR RGB + HDR Alpha
|
|
ASTC_HDR_Mode_15(&b->part[i], v);
|
|
break;
|
|
#endif
|
|
default: //the error colour - for unsupported hdr endpoints. unreachable when hdr is enabled. just fill it with the error colour.
|
|
Vector4Set(b->part[i].ep[0], 0xff, 0, 0xff, 0xff);
|
|
Vector4Set(b->part[i].ep[1], 0xff, 0, 0xff, 0xff);
|
|
break;
|
|
|
|
case 0: //LDR Luminance, direct
|
|
Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], 0xff);
|
|
Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], 0xff);
|
|
break;
|
|
case 1: //LDR Luminance, base+offset
|
|
t0 = (v[0]>>2)|(v[1]&0xc0);
|
|
t1 = t0+(v[1]&0x3f);
|
|
if (t1>0xff)
|
|
t1=0xff;
|
|
Vector4Set(b->part[i].ep[0], t0, t0, t0, 0xff);
|
|
Vector4Set(b->part[i].ep[1], t1, t1, t1, 0xff);
|
|
break;
|
|
case 4: //LDR Luminance+Alpha,direct
|
|
Vector4Set(b->part[i].ep[0], v[0], v[0], v[0], v[2]);
|
|
Vector4Set(b->part[i].ep[1], v[1], v[1], v[1], v[3]);
|
|
break;
|
|
case 5: //LDR Luminance+Alpha, base+offset
|
|
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
|
|
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
|
|
Vector4Set(b->part[i].ep[0],v[0],v[0],v[0],v[2]);
|
|
Vector4Set(b->part[i].ep[1],v[0]+t1,v[0]+t1,v[0]+t1,v[2]+t3);
|
|
ASTC_clamp_unorm8(b->part[i].ep[0]);
|
|
ASTC_clamp_unorm8(b->part[i].ep[1]);
|
|
break;
|
|
case 6: //LDR RGB, base+scale
|
|
Vector4Set(b->part[i].ep[0], ((int)v[0]*(int)v[3])>>8, ((int)v[1]*(int)v[3])>>8, ((int)v[2]*(int)v[3])>>8, 0xff);
|
|
Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], 0xff);
|
|
break;
|
|
case 8: //LDR RGB, Direct
|
|
t0 = (int)v[0]+(int)v[2]+(int)v[4];
|
|
t1 = (int)v[1]+(int)v[3]+(int)v[5];
|
|
if (t1>=t0)
|
|
{
|
|
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],0xff);
|
|
Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],0xff);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5], 0xff);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
|
|
}
|
|
break;
|
|
case 9: //LDR RGB, base+offset
|
|
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
|
|
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
|
|
t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
|
|
if(t1+t3+t5 >= 0)
|
|
{
|
|
Vector4Set(b->part[i].ep[0],v[0],v[2],v[4],0xff);
|
|
Vector4Set(b->part[i].ep[1],v[0]+t1,v[2]+t3,v[4]+t5,0xff);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5, 0xff);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4], 0xff);
|
|
}
|
|
ASTC_clamp_unorm8(b->part[i].ep[0]);
|
|
ASTC_clamp_unorm8(b->part[i].ep[1]);
|
|
break;
|
|
case 10: //LDR RGB, base+scale plus two A
|
|
Vector4Set(b->part[i].ep[0], ((int)v[0]*v[3])>>8, ((int)v[1]*v[3])>>8, ((int)v[2]*v[3])>>8, v[4]);
|
|
Vector4Set(b->part[i].ep[1], v[0], v[1], v[2], v[5]);
|
|
break;
|
|
case 12: //LDR RGBA, direct
|
|
if (v[1]+(int)v[3]+v[5]>=v[0]+(int)v[2]+v[4])
|
|
{
|
|
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
|
|
Vector4Set(b->part[i].ep[1], v[1],v[3],v[5],v[7]);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[1],v[3],v[5],v[7]);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
|
|
}
|
|
break;
|
|
case 13: //LDR RGBA, base+offset
|
|
t1 = ASTC_bit_transfer_signed(v[1],&v[0]);
|
|
t3 = ASTC_bit_transfer_signed(v[3],&v[2]);
|
|
t5 = ASTC_bit_transfer_signed(v[5],&v[4]);
|
|
t7 = ASTC_bit_transfer_signed(v[7],&v[6]);
|
|
if(t1+t3+t5>=0)
|
|
{
|
|
Vector4Set(b->part[i].ep[0], v[0],v[2],v[4],v[6]);
|
|
Vector4Set(b->part[i].ep[1], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
|
|
}
|
|
else
|
|
{
|
|
ASTC_blue_contract(b->part[i].ep[0], v[0]+t1,v[2]+t3,v[4]+t5,v[6]+t7);
|
|
ASTC_blue_contract(b->part[i].ep[1], v[0],v[2],v[4],v[6]);
|
|
}
|
|
ASTC_clamp_unorm8(b->part[i].ep[0]);
|
|
ASTC_clamp_unorm8(b->part[i].ep[1]);
|
|
break;
|
|
}
|
|
v += ((b->part[i].mode>>2)+1)<<1;
|
|
}
|
|
}
|
|
static void ASTC_ReadEndpoints(struct astc_block_info *b)
|
|
{
|
|
int i;
|
|
int cembits;
|
|
|
|
unsigned char epv[18]; //maximum raw endpoint values,
|
|
char epvalues;
|
|
unsigned char gahffs[16], t;
|
|
|
|
//figure out how many raw values we need
|
|
epvalues = 0;
|
|
for (i = 0; i < b->partitions; i++)
|
|
epvalues += ((b->part[i].mode>>2)+1)<<1;
|
|
if (epvalues > countof(epv))
|
|
{
|
|
b->status = ASTC_ERROR;
|
|
return;
|
|
}
|
|
|
|
//the endpoint bits are encoded using the largest size available that'll still fit, yielding raw values between 0-255.
|
|
for(i = countof(astc_epvmode)-1; i >= 0; i--)
|
|
{
|
|
cembits = ASTC_DecodeSize(epvalues, astc_epvmode[i].bits, astc_epvmode[i].extra);
|
|
if(cembits <= b->ep_bits)
|
|
{
|
|
//read the values.
|
|
ASTC_Decode(b->in, epv, epvalues, b->config_bits, astc_epvmode[i].bits, astc_epvmode[i].extra, astc_epvmode[i].dequant);
|
|
//and decode them.
|
|
ASTC_DecodeEndpoints(b, epv);
|
|
|
|
//weight bits are backwards (gah! ffs!)
|
|
//so swap them around so our decode function doesn't need to care
|
|
for (i = 0; i < countof(gahffs); i++)
|
|
{
|
|
t = b->in[i];
|
|
t = (t>>4)|(t<<4);
|
|
t = ((t&0xcc)>>2)|((t&0x33)<<2);
|
|
t = ((t&0xaa)>>1)|((t&0x55)<<1);
|
|
gahffs[15-i] = t;
|
|
}
|
|
//weights are aligned at the end... now the start. gah! ffs!
|
|
ASTC_Decode(gahffs, b->weights, b->wcount[3], 0, astc_weightmode[b->precision].bits, astc_weightmode[b->precision].extra, astc_weightmode[b->precision].dequant);
|
|
return;
|
|
}
|
|
}
|
|
b->status = ASTC_ERROR;
|
|
}
|
|
|
|
static unsigned int hash52(unsigned int p)
|
|
{
|
|
p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
|
|
p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
|
|
p ^= p << 6; p ^= p >> 17;
|
|
return p;
|
|
}
|
|
static int ASTC_ChoosePartition(int seed, int x, int y, int z, int partitions, int smallblock)
|
|
{
|
|
int sh1, sh2, sh3, a,b,c,d;
|
|
unsigned int rnum;
|
|
unsigned char seed1,seed2,seed3,seed4,seed5,seed6,seed7,seed8,seed9,seed10,seed11,seed12;
|
|
if (partitions==1)
|
|
return 0;
|
|
if (smallblock)
|
|
{
|
|
x <<= 1;
|
|
y <<= 1;
|
|
z <<= 1;
|
|
}
|
|
seed += (partitions-1) * 1024;
|
|
rnum = hash52(seed);
|
|
seed1 = rnum & 0xF;
|
|
seed2 = (rnum >> 4) & 0xF;
|
|
seed3 = (rnum >> 8) & 0xF;
|
|
seed4 = (rnum >> 12) & 0xF;
|
|
seed5 = (rnum >> 16) & 0xF;
|
|
seed6 = (rnum >> 20) & 0xF;
|
|
seed7 = (rnum >> 24) & 0xF;
|
|
seed8 = (rnum >> 28) & 0xF;
|
|
seed9 = (rnum >> 18) & 0xF;
|
|
seed10 = (rnum >> 22) & 0xF;
|
|
seed11 = (rnum >> 26) & 0xF;
|
|
seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
|
|
|
|
seed1 *= seed1; seed2 *= seed2;
|
|
seed3 *= seed3; seed4 *= seed4;
|
|
seed5 *= seed5; seed6 *= seed6;
|
|
seed7 *= seed7; seed8 *= seed8;
|
|
seed9 *= seed9; seed10 *= seed10;
|
|
seed11 *= seed11; seed12 *= seed12;
|
|
|
|
|
|
if (seed & 1)
|
|
{
|
|
sh1 = ((seed&2) ? 4:5);
|
|
sh2 = ((partitions==3) ? 6:5);
|
|
}
|
|
else
|
|
{
|
|
sh1 = ((partitions==3) ? 6:5);
|
|
sh2 = ((seed&2) ? 4:5);
|
|
}
|
|
sh3 = (seed & 0x10) ? sh1 : sh2;
|
|
|
|
seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
|
|
seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
|
|
seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
|
|
|
|
a = seed1*x + seed2*y + seed11*z + (rnum >> 14);
|
|
b = seed3*x + seed4*y + seed12*z + (rnum >> 10);
|
|
c = seed5*x + seed6*y + seed9 *z + (rnum >> 6);
|
|
d = seed7*x + seed8*y + seed10*z + (rnum >> 2);
|
|
|
|
a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
|
|
|
|
if (partitions < 4)
|
|
d = 0;
|
|
if (partitions < 3)
|
|
c = 0;
|
|
|
|
if (a >= b && a >= c && a >= d)
|
|
return 0;
|
|
else if (b >= c && b >= d)
|
|
return 1;
|
|
else if (c >= d)
|
|
return 2;
|
|
else
|
|
return 3;
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_LDR
|
|
//Spits out 8-bit RGBA data for a single block. Any HDR blocks will result in the error colour.
|
|
//sRGB can be applied by the caller, if needed.
|
|
ASTC_PUBLIC void ASTC_Decode_LDR8(unsigned char *in, unsigned char *out, int pixstride, int layerstride, int bw, int bh, int bd)
|
|
{
|
|
struct astc_block_info b;
|
|
int x, y;
|
|
int stride = pixstride*4;
|
|
#ifdef ASTC_WITH_3D
|
|
int z;
|
|
layerstride = layerstride*4-(stride*bh);
|
|
#else
|
|
if (bd != 1)
|
|
return; //error!
|
|
#endif
|
|
b.in = in;
|
|
b.blocksize[0] = bw;
|
|
b.blocksize[1] = bh;
|
|
b.blocksize[2] = bd;
|
|
ASTC_ReadBlockMode(&b);
|
|
|
|
if (b.status == ASTC_VOID_LDR)
|
|
{ //void extent
|
|
//Note: we don't validate the extents.
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = in[9];
|
|
out[(x<<2)+1] = in[11];
|
|
out[(x<<2)+2] = in[13];
|
|
out[(x<<2)+3] = in[15];
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadPartitions(&b);
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadEndpoints(&b);
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
{
|
|
#define N b.wcount[0]
|
|
#define M b.wcount[1]
|
|
int s1=1<<b.dualplane,s2=N<<b.dualplane; //values for 2d blocks (3d blocks will override)
|
|
int s3=((bd!=1?N*M:0)+N+1)<<b.dualplane; //small variation for 3d blocks.
|
|
|
|
int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
|
|
int fs, s, ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
|
|
int ft, t, dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
|
|
#ifdef ASTC_WITH_3D
|
|
int fr, r, dr = (1024+b.blocksize[2]/2)/(b.blocksize[2]-1);
|
|
#endif
|
|
int v0, w, w00,w01,w10,w11;
|
|
struct astc_part *p;
|
|
|
|
#ifdef ASTC_WITH_HDR
|
|
for (x = 0; x < b.partitions; x++)
|
|
{ //the LDR profile treats HDR endpoints as the error colour. this is per-partition rather than per-block.
|
|
if (b.part[x].hdr)
|
|
{
|
|
Vector4Set(b.part[x].ep[0], 0xff, 0, 0xff, 0xff);
|
|
Vector4Set(b.part[x].ep[1], 0xff, 0, 0xff, 0xff);
|
|
}
|
|
//else FIXME: when spitting out 8bit, we're meant to have an extra 9th bit which is always set, in order to avoid round-to-zero biasing the result of the final 8 bits.
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride-stride*bh)
|
|
#endif
|
|
{
|
|
#ifdef ASTC_WITH_3D
|
|
r = ((dr*z)*(b.wcount[2]-1)+32)>>6;
|
|
fr=r&0xf;
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
{
|
|
t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
|
|
ft=t&0xf;
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
|
|
s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
|
|
fs=s&0xf;
|
|
#ifdef ASTC_WITH_3D
|
|
if (bd != 1)
|
|
{ //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us.
|
|
if (fs>fr)
|
|
{ //figure out which weights/factors to use.
|
|
if (ft>fr)
|
|
{
|
|
if (fs>ft)
|
|
s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr;
|
|
else
|
|
s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr;
|
|
}
|
|
else
|
|
s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft;
|
|
}
|
|
else
|
|
{
|
|
if (fs>ft)
|
|
s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft;
|
|
else
|
|
{
|
|
if (ft>fr)
|
|
s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs;
|
|
else
|
|
s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs;
|
|
}
|
|
}
|
|
|
|
s1 <<= b.dualplane;
|
|
s2 <<= b.dualplane;
|
|
s2+=s1;
|
|
//s3 = (N*M+N+1)<<b.dualplane;
|
|
v0 = ((s>>4)+(t>>4)*N+(r>>4)*N*M) << b.dualplane;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
//s1 = 1<<b.dualplane;
|
|
//s2 = (N)<<b.dualplane;
|
|
//s3 = (N+1)<<b.dualplane;
|
|
w11 = (fs*ft+8) >> 4;
|
|
w10 = ft - w11;
|
|
w01 = fs - w11;
|
|
w00 = 16 - fs - ft + w11;
|
|
v0 = ((s>>4)+(t>>4)*N) << b.dualplane;
|
|
}
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+0] = ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6;
|
|
out[(x<<2)+1] = ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6;
|
|
out[(x<<2)+2] = ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6;
|
|
out[(x<<2)+3] = ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6;
|
|
|
|
if (b.dualplane)
|
|
{ //dual planes has a second set of weights that override a single channel
|
|
v0++;
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+b.ccs] = ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{ //error colour == magenta
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride)
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = 0xff;
|
|
out[(x<<2)+1] = 0;
|
|
out[(x<<2)+2] = 0xff;
|
|
out[(x<<2)+3] = 0xff;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef ASTC_WITH_HDR
|
|
static unsigned short ASTC_GenHalffloat(int hdr, int rawval)
|
|
{
|
|
if (hdr)
|
|
{
|
|
int fp16, m;
|
|
fp16 = (rawval&0xF800) >> 1;
|
|
m = rawval&0x7FF;
|
|
if (m < 512)
|
|
fp16 |= (3*m)>>3;
|
|
else if (m >= 1536)
|
|
fp16 |= (5*m - 2048)>>3;
|
|
else
|
|
fp16 |= (4*m - 512)>>3;
|
|
return fp16;
|
|
}
|
|
else
|
|
{
|
|
union
|
|
{
|
|
float f;
|
|
unsigned int u;
|
|
} u = {rawval/65535.0};
|
|
int e = 0;
|
|
int m;
|
|
|
|
e = ((u.u>>23)&0xff) - 127;
|
|
if (e < -15)
|
|
return 0; //too small exponent, treat it as a 0 denormal
|
|
if (e > 15)
|
|
m = 0; //infinity instead of a nan
|
|
else
|
|
m = (u.u&((1<<23)-1))>>13;
|
|
return ((e+15)<<10) | m;
|
|
}
|
|
}
|
|
|
|
//Spits out half-float RGBA data for a single block.
|
|
ASTC_PUBLIC void ASTC_Decode_HDR(unsigned char *in, unsigned short *out, int pixstride, int layerstride, int bw, int bh, int bd)
|
|
{
|
|
int x, y;
|
|
int stride = pixstride*4;
|
|
struct astc_block_info b;
|
|
#ifdef ASTC_WITH_3D
|
|
int z;
|
|
layerstride = layerstride*4-(stride*bh);
|
|
#else
|
|
if (bd != 1)
|
|
return; //error!
|
|
#endif
|
|
b.in = in;
|
|
b.blocksize[0] = bw;
|
|
b.blocksize[1] = bh;
|
|
b.blocksize[2] = bd;
|
|
|
|
ASTC_ReadBlockMode(&b);
|
|
|
|
if (b.status == ASTC_VOID_HDR)
|
|
{ //void extent
|
|
//Note: we don't validate the extents.
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{ //hdr void extents already use fp16
|
|
out[(x<<2)+0] = in[8] | (in[9]<<8);
|
|
out[(x<<2)+1] = in[10] | (in[11]<<8);
|
|
out[(x<<2)+2] = in[12] | (in[13]<<8);
|
|
out[(x<<2)+3] = in[14] | (in[15]<<8);
|
|
}
|
|
return;
|
|
}
|
|
if (b.status == ASTC_VOID_LDR)
|
|
{ //void extent
|
|
//Note: we don't validate the extents.
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = ASTC_GenHalffloat(0, in[8] | (in[9]<<8));
|
|
out[(x<<2)+1] = ASTC_GenHalffloat(0, in[10] | (in[11]<<8));
|
|
out[(x<<2)+2] = ASTC_GenHalffloat(0, in[12] | (in[13]<<8));
|
|
out[(x<<2)+3] = ASTC_GenHalffloat(0, in[14] | (in[15]<<8));
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadPartitions(&b);
|
|
if (b.status == ASTC_OKAY)
|
|
ASTC_ReadEndpoints(&b);
|
|
|
|
if (b.status == ASTC_OKAY)
|
|
{
|
|
#define N b.wcount[0]
|
|
#define M b.wcount[1]
|
|
int s1=1<<b.dualplane,s2=N<<b.dualplane; //values for 2d blocks (3d blocks will override)
|
|
int s3=((bd!=1?N*M:0)+N+1)<<b.dualplane; //small variation for 3d blocks.
|
|
|
|
int smallblock = (b.blocksize[0]*b.blocksize[1]*b.blocksize[2])<31;
|
|
int fs, s, ds = (1024+b.blocksize[0]/2)/(b.blocksize[0]-1);
|
|
int ft, t, dt = (1024+b.blocksize[1]/2)/(b.blocksize[1]-1);
|
|
#ifdef ASTC_WITH_3D
|
|
int fr, r, dr = (1024+b.blocksize[2]/2)/(b.blocksize[2]-1);
|
|
#endif
|
|
int v0, w, w00,w01,w10,w11;
|
|
struct astc_part *p;
|
|
|
|
for (x = 0; x < b.partitions; x++)
|
|
{ //we need to do a little extra processing here
|
|
for (y = 0; y < 4; y++)
|
|
{
|
|
if (b.part[x].hdr&(1<<y))
|
|
{ //the 12bit endpoint values are shifted up to 16bit...
|
|
b.part[x].ep[0][y] <<= 4;
|
|
b.part[x].ep[1][y] <<= 4;
|
|
}
|
|
else
|
|
{ //convert to unorm16.
|
|
b.part[x].ep[0][y] |= b.part[x].ep[0][y] << 8;
|
|
b.part[x].ep[1][y] |= b.part[x].ep[1][y] << 8;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride)
|
|
#endif
|
|
{
|
|
#ifdef ASTC_WITH_3D
|
|
r = ((dr*z)*(b.wcount[2]-1)+32)>>6;
|
|
fr=s&0xf;
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
{
|
|
t = ((dt*y)*(b.wcount[1]-1)+32)>>6;
|
|
ft=s&0xf;
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
p = &b.part[ASTC_ChoosePartition(b.partindex, x,y,0, b.partitions, smallblock)];
|
|
s = ((ds*x)*(b.wcount[0]-1)+32)>>6;
|
|
fs=s&0xf;
|
|
#ifdef ASTC_WITH_3D
|
|
if (bd != 1)
|
|
{ //3d blocks use simplex interpolation instead of 8-way interpolation. its easier for hardware but more cycles for us.
|
|
if (fs>fr)
|
|
{ //figure out which weights/factors to use.
|
|
if (ft>fr)
|
|
{
|
|
if (fs>ft)
|
|
s1=1, s2=N, w00=16-fs, w01=fs-ft, w10=ft-fr, w11=fr;
|
|
else
|
|
s1=N, s2=1, w00=16-ft, w01=ft-fs, w10=fs-fr, w11=fr;
|
|
}
|
|
else
|
|
s1=1, s2=N*M, w00=16-fs, w01=fs-fr, w10=fr-ft, w11=ft;
|
|
}
|
|
else
|
|
{
|
|
if (fs>ft)
|
|
s1=N*M, s2=1, w00=16-fr, w01=fr-fs, w10=fs-ft, w11=ft;
|
|
else
|
|
{
|
|
if (ft>fr)
|
|
s1=N, s2=N*M, w00=16-ft, w01=ft-fr, w10=fr-fs, w11=fs;
|
|
else
|
|
s1=N*M, s2=N, w00=16-fr, w01=fr-ft, w10=ft-fs, w11=fs;
|
|
}
|
|
}
|
|
|
|
s1 <<= b.dualplane;
|
|
s2 <<= b.dualplane;
|
|
s2+=s1;
|
|
//s3 = (N*M+N+1)<<b.dualplane;
|
|
v0 = (((s>>4))+((t>>4)*N)+(r>>4)*N*M) << b.dualplane;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
//s1 = 1<<b.dualplane;
|
|
//s2 = (N)<<b.dualplane;
|
|
//s3 = (N+1)<<b.dualplane;
|
|
w11 = (fs*ft+8) >> 4;
|
|
w10 = ft - w11;
|
|
w01 = fs - w11;
|
|
w00 = 16 - fs - ft + w11;
|
|
|
|
v0 = (((s>>4))+(t>>4)*N) << b.dualplane;
|
|
}
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+0] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][0] + w*p->ep[1][0])>>6);
|
|
out[(x<<2)+1] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][1] + w*p->ep[1][1])>>6);
|
|
out[(x<<2)+2] = ASTC_GenHalffloat(p->hdr&1, ((64-w)*p->ep[0][2] + w*p->ep[1][2])>>6);
|
|
out[(x<<2)+3] = ASTC_GenHalffloat(p->hdr&8, ((64-w)*p->ep[0][3] + w*p->ep[1][3])>>6);
|
|
|
|
if (b.dualplane)
|
|
{ //dual planes has a second set of weights that override a single channel
|
|
v0++;
|
|
w = ( w00*b.weights[v0] +
|
|
w01*b.weights[v0+s1] +
|
|
w10*b.weights[v0+s2] +
|
|
w11*b.weights[v0+s3] + 8) >> 4;
|
|
out[(x<<2)+b.ccs] = ASTC_GenHalffloat(p->hdr&(1<<b.ccs), ((64-w)*p->ep[0][b.ccs] + w*p->ep[1][b.ccs])>>6);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{ //error colour == magenta
|
|
#ifdef ASTC_WITH_3D
|
|
for (z = 0; z < bd; z++, out += layerstride)
|
|
#endif
|
|
for (y = 0; y < bh; y++, out += stride)
|
|
for (x = 0; x < bw; x++)
|
|
{
|
|
out[(x<<2)+0] = 0xf<<10;
|
|
out[(x<<2)+1] = 0;
|
|
out[(x<<2)+2] = 0xf<<10;
|
|
out[(x<<2)+3] = 0xf<<10;
|
|
}
|
|
}
|
|
}
|
|
#endif
|