fteqw/engine/vk/vk_backend.c
Spoike b3050121cf Some Android tweaks.
clean up args to Net_PextMask.
some build config fixes.
attempt to support meag's 'status 32' qtv address lists.
fix bug with vulkan+sdl renderer.




git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@5460 fc73d0e0-1445-4013-8a0c-d673dee63da5
2019-05-10 09:31:21 +00:00

6426 lines
200 KiB
C

#include "quakedef.h"
#ifdef VKQUAKE
#include "vkrenderer.h"
#include "glquake.h"
#include "gl_draw.h"
#include "shader.h"
#include "renderque.h"
//FIXME: instead of switching rendertargets and back, we should be using an alternative queue.
#define PERMUTATION_BEM_DEPTHONLY (1u<<14)
#define PERMUTATION_BEM_WIREFRAME (1u<<15)
#undef BE_Init
#undef BE_SelectMode
#undef BE_GenBrushModelVBO
#undef BE_ClearVBO
#undef BE_UploadAllLightmaps
#undef BE_LightCullModel
#undef BE_SelectEntity
#undef BE_SelectDLight
#undef BE_GetTempBatch
#undef BE_SubmitBatch
#undef BE_DrawMesh_List
#undef BE_DrawMesh_Single
#undef BE_SubmitMeshes
#undef BE_DrawWorld
#undef BE_VBO_Begin
#undef BE_VBO_Data
#undef BE_VBO_Finish
#undef BE_VBO_Destroy
#undef BE_Scissor
#undef BE_RenderToTextureUpdate2d
extern cvar_t r_shadow_realtime_world_lightmaps;
extern cvar_t gl_overbright;
extern cvar_t r_portalrecursion;
extern cvar_t r_polygonoffset_shadowmap_offset, r_polygonoffset_shadowmap_factor;
extern cvar_t r_wireframe;
extern cvar_t vk_stagingbuffers;
unsigned int vk_usedynamicstaging;
#ifdef RTLIGHTS
static void VK_TerminateShadowMap(void);
#endif
void VKBE_BeginShadowmapFace(void);
static void R_DrawPortal(batch_t *batch, batch_t **blist, batch_t *depthmasklist[2], int portaltype);
#define MAX_TMUS 32
extern texid_t r_whiteimage, missing_texture_gloss, missing_texture_normal;
texid_t r_blackimage;
static void BE_RotateForEntity (const entity_t *e, const model_t *mod);
void VKBE_SetupLightCBuffer(dlight_t *l, vec3_t colour);
/*========================================== tables for deforms =====================================*/
#define frand() (rand()*(1.0/RAND_MAX))
#define FTABLE_SIZE 1024
#define FTABLE_CLAMP(x) (((int)((x)*FTABLE_SIZE) & (FTABLE_SIZE-1)))
#define FTABLE_EVALUATE(table,x) (table ? table[FTABLE_CLAMP(x)] : frand()*((x)-floor(x)))
#define R_FastSin(x) r_sintable[FTABLE_CLAMP(x)]
static float r_sintable[FTABLE_SIZE];
static float r_triangletable[FTABLE_SIZE];
static float r_squaretable[FTABLE_SIZE];
static float r_sawtoothtable[FTABLE_SIZE];
static float r_inversesawtoothtable[FTABLE_SIZE];
static float *FTableForFunc ( unsigned int func )
{
switch (func)
{
case SHADER_FUNC_SIN:
return r_sintable;
case SHADER_FUNC_TRIANGLE:
return r_triangletable;
case SHADER_FUNC_SQUARE:
return r_squaretable;
case SHADER_FUNC_SAWTOOTH:
return r_sawtoothtable;
case SHADER_FUNC_INVERSESAWTOOTH:
return r_inversesawtoothtable;
}
//bad values allow us to crash (so I can debug em)
return NULL;
}
static void FTable_Init(void)
{
unsigned int i;
double t;
for (i = 0; i < FTABLE_SIZE; i++)
{
t = (double)i / (double)FTABLE_SIZE;
r_sintable[i] = sin(t * 2*M_PI);
if (t < 0.25)
r_triangletable[i] = t * 4.0;
else if (t < 0.75)
r_triangletable[i] = 2 - 4.0 * t;
else
r_triangletable[i] = (t - 0.75) * 4.0 - 1.0;
if (t < 0.5)
r_squaretable[i] = 1.0f;
else
r_squaretable[i] = -1.0f;
r_sawtoothtable[i] = t;
r_inversesawtoothtable[i] = 1.0 - t;
}
}
typedef vec3_t mat3_t[3];
static mat3_t axisDefault={{1, 0, 0},
{0, 1, 0},
{0, 0, 1}};
static void Matrix3_Transpose (mat3_t in, mat3_t out)
{
out[0][0] = in[0][0];
out[1][1] = in[1][1];
out[2][2] = in[2][2];
out[0][1] = in[1][0];
out[0][2] = in[2][0];
out[1][0] = in[0][1];
out[1][2] = in[2][1];
out[2][0] = in[0][2];
out[2][1] = in[1][2];
}
static void Matrix3_Multiply_Vec3 (const mat3_t a, const vec3_t b, vec3_t product)
{
product[0] = a[0][0]*b[0] + a[0][1]*b[1] + a[0][2]*b[2];
product[1] = a[1][0]*b[0] + a[1][1]*b[1] + a[1][2]*b[2];
product[2] = a[2][0]*b[0] + a[2][1]*b[1] + a[2][2]*b[2];
}
static int Matrix3_Compare(const mat3_t in, const mat3_t out)
{
return !memcmp(in, out, sizeof(mat3_t));
}
/*================================================*/
//dlight-specific constant-buffer
typedef struct
{
float l_cubematrix[16];
vec3_t l_lightposition; float padl1;
vec3_t l_colour; float pad2;
vec3_t l_lightcolourscale; float l_lightradius;
vec4_t l_shadowmapproj;
vec2_t l_shadowmapscale; vec2_t pad3;
} vkcbuf_light_t;
//entity-specific constant-buffer
typedef struct
{
float m_modelviewproj[16];
float m_model[16];
float m_modelinv[16];
vec3_t e_eyepos;
float e_time;
vec3_t e_light_ambient; float pad1;
vec3_t e_light_dir; float pad2;
vec3_t e_light_mul; float pad3;
vec4_t e_lmscale[4];
vec3_t e_uppercolour; float pad4;
vec3_t e_lowercolour; float pad5;
vec3_t e_glowmod; float pad6;
vec4_t e_colourident;
vec4_t w_fogcolours;
float w_fogdensity; float w_fogdepthbias; vec2_t pad7;
} vkcbuf_entity_t;
enum
{
VK_BUFF_POS,
VK_BUFF_TC,
VK_BUFF_COL,
VK_BUFF_LMTC,
VK_BUFF_NORM,
VK_BUFF_SDIR,
VK_BUFF_TDIR,
VK_BUFF_MAX
};
typedef struct
{ //there should be only one copy of this struct for each thread that renders anything in vulkan.
//descriptor sets are: 0) entity+light 1) batch textures + pass textures
VkDescriptorSet descriptorsets[1];
//commandbuffer state, to avoid redundant state changes.
VkPipeline activepipeline;
float depthrange;
} vkrendercontext_t;
typedef struct
{
unsigned int inited;
backendmode_t mode;
unsigned int modepermutation;
unsigned int flags;
unsigned int forcebeflags;
float identitylighting;
float identitylightmap;
float curtime;
const entity_t *curentity;
const dlight_t *curdlight;
shader_t *curshader;
shader_t *depthonly;
texnums_t *curtexnums;
vbo_t *batchvbo;
batch_t *curbatch;
batch_t dummybatch;
vec4_t lightshadowmapproj;
vec2_t lightshadowmapscale;
unsigned int curlmode;
shader_t *shader_rtlight[LSHADER_MODES];
program_t *programfixedemu[2];
mesh_t **meshlist;
unsigned int nummeshes;
unsigned int wbatch;
unsigned int maxwbatches;
batch_t *wbatches;
VkDescriptorBufferInfo ubo_entity;
VkDescriptorBufferInfo ubo_light;
vec4_t lightinfo; //org+radius
VkBuffer staticbuf; //holds fallback vertex info so we don't crash from it
vk_poolmem_t staticbufmem;
texid_t tex_currentrender;
struct vk_rendertarg rt_reflection;
struct vk_rendertarg rt_refraction;
texid_t tex_refraction; //separate from rt_reflection, because $reasons
texid_t tex_ripplemap;
vkrendercontext_t rc;
struct shadowmaps_s
{
uint32_t width;
uint32_t height;
VkImage image; //array. multiple allows for things to happen out of order, which should help to avoid barrier stalls.
VkDeviceMemory memory;
uint32_t seq;
struct
{
VkFramebuffer framebuffer;
image_t qimage; //this is silly, but whatever.
vk_image_t vimage;
} buf[8];
} shadow[2]; //omni, spot
texid_t currentshadowmap;
VkDescriptorSetLayout textureLayout;
} vkbackend_t;
#define VERTEXSTREAMSIZE (1024*1024*2) //2mb = 1 PAE jumbo page
#define DYNVBUFFSIZE 65536
#define DYNIBUFFSIZE 65536
static vecV_t tmpbuf[65536]; //max verts per mesh
static vkbackend_t shaderstate;
extern int be_maxpasses;
struct blobheader
{
unsigned char blobmagic[4];
unsigned int blobversion;
unsigned int defaulttextures; //s_diffuse etc flags
unsigned int numtextures; //s_t0 count
unsigned int permutations; //
unsigned int cvarsoffset;
unsigned int cvarslength;
unsigned int vertoffset;
unsigned int vertlength;
unsigned int fragoffset;
unsigned int fraglength;
};
static float VK_ShaderReadArgument(const char *arglist, const char *arg, char type, qbyte size, void *out)
{
qbyte i;
const char *var;
int arglen = strlen(arg);
//grab an argument instead, otherwise 0
var = arglist;
while((var = strchr(var, '#')))
{
if (!Q_strncasecmp(var+1, arg, arglen))
{
if (var[1+arglen] == '=')
{
var = var+arglen+2;
for (i = 0; i < size; i++)
{
while (*var == ' ' || *var == '\t' || *var == ',')
var++;
if (type == 'F')
((float*)out)[i] = BigFloat(strtod(var, (char**)&var));
else
((int*)out)[i] = BigLong(strtol(var, (char**)&var, 0));
if (!var)
break;
}
return 1;
}
if (var[1+arglen] == '#' || !var[1+arglen])
{
for (i = 0; i < size; i++)
{
if (type == 'F')
((float*)out)[i] = BigFloat(1);
else
((int*)out)[i] = BigLong(1);
}
return 1; //present, but no value
}
}
var++;
}
return 0; //not present.
}
#if 0
//this should use shader pass flags, but those are specific to the shader, not the program, which makes this awkward.
static VkSampler VK_GetSampler(unsigned int flags)
{
static VkSampler ret;
qboolean clamptoedge = flags & IF_CLAMP;
VkSamplerCreateInfo lmsampinfo = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
if (ret)
return ret;
if (flags & IF_LINEAR)
{
lmsampinfo.minFilter = lmsampinfo.magFilter = VK_FILTER_LINEAR;
lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
}
else if (flags & IF_NEAREST)
{
lmsampinfo.minFilter = lmsampinfo.magFilter = VK_FILTER_NEAREST;
lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
}
else
{
int *filter = (flags & IF_UIPIC)?vk.filterpic:vk.filtermip;
if (filter[0])
lmsampinfo.minFilter = VK_FILTER_LINEAR;
else
lmsampinfo.minFilter = VK_FILTER_NEAREST;
if (filter[1])
lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
else
lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST;
if (filter[2])
lmsampinfo.magFilter = VK_FILTER_LINEAR;
else
lmsampinfo.magFilter = VK_FILTER_NEAREST;
}
lmsampinfo.addressModeU = clamptoedge?VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:VK_SAMPLER_ADDRESS_MODE_REPEAT;
lmsampinfo.addressModeV = clamptoedge?VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:VK_SAMPLER_ADDRESS_MODE_REPEAT;
lmsampinfo.addressModeW = clamptoedge?VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:VK_SAMPLER_ADDRESS_MODE_REPEAT;
lmsampinfo.mipLodBias = 0.0;
lmsampinfo.anisotropyEnable = (flags & IF_NEAREST)?false:(vk.max_anistophy > 1);
lmsampinfo.maxAnisotropy = vk.max_anistophy;
lmsampinfo.compareEnable = VK_FALSE;
lmsampinfo.compareOp = VK_COMPARE_OP_NEVER;
lmsampinfo.minLod = vk.mipcap[0]; //this isn't quite right
lmsampinfo.maxLod = vk.mipcap[1];
lmsampinfo.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
lmsampinfo.unnormalizedCoordinates = VK_FALSE;
VkAssert(vkCreateSampler(vk.device, &lmsampinfo, NULL, &ret));
return ret;
}
#endif
//creates the layout stuff for the prog.
static void VK_FinishProg(program_t *prog, const char *name)
{
{
VkDescriptorSetLayout desclayout;
VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
VkDescriptorSetLayoutBinding dbs[2+MAX_TMUS], *db = dbs;
uint32_t i;
//VkSampler samp = VK_GetSampler(0);
db->binding = db-dbs;
db->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
db->descriptorCount = 1;
db->stageFlags = VK_SHADER_STAGE_VERTEX_BIT|VK_SHADER_STAGE_FRAGMENT_BIT;
db->pImmutableSamplers = NULL;
db++;
db->binding = db-dbs;
db->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
db->descriptorCount = 1;
db->stageFlags = VK_SHADER_STAGE_VERTEX_BIT|VK_SHADER_STAGE_FRAGMENT_BIT;
db->pImmutableSamplers = NULL;
db++;
for (i = 0; i < 32; i++)
{
if (!(prog->defaulttextures & (1u<<i)))
continue;
db->binding = db-dbs;
db->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
db->descriptorCount = 1;
db->stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
db->pImmutableSamplers = NULL;//&samp;
db++;
}
for (i = 0; i < prog->numsamplers; i++)
{
db->binding = db-dbs;
db->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
db->descriptorCount = 1;
db->stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
db->pImmutableSamplers = NULL;//&samp;
db++;
}
descSetLayoutCreateInfo.bindingCount = db-dbs;
descSetLayoutCreateInfo.pBindings = dbs;
if (vk.khr_push_descriptor)
descSetLayoutCreateInfo.flags |= VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
VkAssert(vkCreateDescriptorSetLayout(vk.device, &descSetLayoutCreateInfo, NULL, &desclayout));
prog->desclayout = desclayout;
}
{
VkDescriptorSetLayout sets[1] = {prog->desclayout};
VkPipelineLayout layout;
VkPushConstantRange push[1];
VkPipelineLayoutCreateInfo pipeLayoutCreateInfo = {VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
push[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
push[0].offset = 0;
push[0].size = sizeof(vec4_t);
pipeLayoutCreateInfo.flags = 0;
pipeLayoutCreateInfo.setLayoutCount = countof(sets);
pipeLayoutCreateInfo.pSetLayouts = sets;
pipeLayoutCreateInfo.pushConstantRangeCount = !strncmp(name, "fixedemu", 8);
pipeLayoutCreateInfo.pPushConstantRanges = push;
VkAssert(vkCreatePipelineLayout(vk.device, &pipeLayoutCreateInfo, vkallocationcb, &layout));
prog->layout = layout;
}
}
static const char *vulkan_glsl_hdrs[] =
{
"sys/defs.h",
"#define DEFS_DEFINED\n"
"#undef texture2D\n" //nvidia is fucking us over
"#undef textureCube\n" //nvidia is fucking us over
"#define texture2D texture\n"
"#define textureCube texture\n"
"#define e_lmscale e_lmscales[0]\n"
,
"sys/skeletal.h",
"#ifdef SKELETAL\n"
"vec4 skeletaltransform()"
"{"
"mat3x4 wmat;\n"
"wmat = m_bones[int(v_bone.x)] * v_weight.x;\n"
"wmat += m_bones[int(v_bone.y)] * v_weight.y;\n"
"wmat += m_bones[int(v_bone.z)] * v_weight.z;\n"
"wmat += m_bones[int(v_bone.w)] * v_weight.w;\n"
"return m_modelviewprojection * vec4(vec4(v_position.xyz, 1.0) * wmat, 1.0);"
"}\n"
"vec4 skeletaltransform_nst(out vec3 n, out vec3 t, out vec3 b)"
"{"
"mat3x4 wmat;\n"
"wmat = m_bones[int(v_bone.x)] * v_weight.x;"
"wmat += m_bones[int(v_bone.y)] * v_weight.y;"
"wmat += m_bones[int(v_bone.z)] * v_weight.z;"
"wmat += m_bones[int(v_bone.w)] * v_weight.w;"
"n = vec4(v_normal.xyz, 0.0) * wmat;"
"t = vec4(v_svector.xyz, 0.0) * wmat;"
"b = vec4(v_tvector.xyz, 0.0) * wmat;"
"return m_modelviewprojection * vec4(vec4(v_position.xyz, 1.0) * wmat, 1.0);"
"}\n"
"vec4 skeletaltransform_wnst(out vec3 w, out vec3 n, out vec3 t, out vec3 b)"
"{"
"mat3x4 wmat;\n"
"wmat = m_bones[int(v_bone.x)] * v_weight.x;"
"wmat += m_bones[int(v_bone.y)] * v_weight.y;"
"wmat += m_bones[int(v_bone.z)] * v_weight.z;"
"wmat += m_bones[int(v_bone.w)] * v_weight.w;"
"n = vec4(v_normal.xyz, 0.0) * wmat;"
"t = vec4(v_svector.xyz, 0.0) * wmat;"
"b = vec4(v_tvector.xyz, 0.0) * wmat;"
"w = vec4(v_position.xyz, 1.0) * wmat;"
"return m_modelviewprojection * vec4(w, 1.0);"
"}\n"
"vec4 skeletaltransform_n(out vec3 n)"
"{"
"mat3x4 wmat;\n"
"wmat = m_bones[int(v_bone.x)] * v_weight.x;"
"wmat += m_bones[int(v_bone.y)] * v_weight.y;"
"wmat += m_bones[int(v_bone.z)] * v_weight.z;"
"wmat += m_bones[int(v_bone.w)] * v_weight.w;"
"n = vec4(v_normal.xyz, 0.0) * wmat;"
"return m_modelviewprojection * vec4(vec4(v_position.xyz, 1.0) * wmat, 1.0);"
"}\n"
"#else\n"
"#define skeletaltransform ftetransform\n"
"vec4 skeletaltransform_wnst(out vec3 w, out vec3 n, out vec3 t, out vec3 b)"
"{"
"n = v_normal;"
"t = v_svector;"
"b = v_tvector;"
"w = v_position.xyz;"
"return ftetransform();"
"}\n"
"vec4 skeletaltransform_nst(out vec3 n, out vec3 t, out vec3 b)"
"{"
"n = v_normal;"
"t = v_svector;"
"b = v_tvector;"
"return ftetransform();"
"}\n"
"vec4 skeletaltransform_n(out vec3 n)"
"{"
"n = v_normal;"
"return ftetransform();"
"}\n"
"#endif\n"
,
"sys/fog.h",
"#ifdef FRAGMENT_SHADER\n"
"#ifdef FOG\n"
"vec3 fog3(in vec3 regularcolour)"
"{"
"float z = w_fogdensity * gl_FragCoord.z / gl_FragCoord.w;\n"
"z = max(0.0,z-w_fogdepthbias);\n"
"#if #include \"cvar/r_fog_exp2\"\n"
"z *= z;\n"
"#endif\n"
"float fac = exp2(-(z * 1.442695));\n"
"fac = (1.0-w_fogalpha) + (clamp(fac, 0.0, 1.0)*w_fogalpha);\n"
"return mix(w_fogcolour, regularcolour, fac);\n"
"}\n"
"vec3 fog3additive(in vec3 regularcolour)"
"{"
"float z = w_fogdensity * gl_FragCoord.z / gl_FragCoord.w;\n"
"z = max(0.0,z-w_fogdepthbias);\n"
"#if #include \"cvar/r_fog_exp2\"\n"
"z *= z;\n"
"#endif\n"
"float fac = exp2(-(z * 1.442695));\n"
"fac = (1.0-w_fogalpha) + (clamp(fac, 0.0, 1.0)*w_fogalpha);\n"
"return regularcolour * fac;\n"
"}\n"
"vec4 fog4(in vec4 regularcolour)"
"{"
"return vec4(fog3(regularcolour.rgb), 1.0) * regularcolour.a;\n"
"}\n"
"vec4 fog4additive(in vec4 regularcolour)"
"{"
"float z = w_fogdensity * gl_FragCoord.z / gl_FragCoord.w;\n"
"z = max(0.0,z-w_fogdepthbias);\n"
"#if #include \"cvar/r_fog_exp2\"\n"
"z *= z;\n"
"#endif\n"
"float fac = exp2(-(z * 1.442695));\n"
"fac = (1.0-w_fogalpha) + (clamp(fac, 0.0, 1.0)*w_fogalpha);\n"
"return regularcolour * vec4(fac, fac, fac, 1.0);\n"
"}\n"
"vec4 fog4blend(in vec4 regularcolour)"
"{"
"float z = w_fogdensity * gl_FragCoord.z / gl_FragCoord.w;\n"
"z = max(0.0,z-w_fogdepthbias);\n"
"#if #include \"cvar/r_fog_exp2\"\n"
"z *= z;\n"
"#endif\n"
"float fac = exp2(-(z * 1.442695));\n"
"fac = (1.0-w_fogalpha) + (clamp(fac, 0.0, 1.0)*w_fogalpha);\n"
"return regularcolour * vec4(1.0, 1.0, 1.0, fac);\n"
"}\n"
"#else\n"
/*don't use macros for this - mesa bugs out*/
"vec3 fog3(in vec3 regularcolour) { return regularcolour; }\n"
"vec3 fog3additive(in vec3 regularcolour) { return regularcolour; }\n"
"vec4 fog4(in vec4 regularcolour) { return regularcolour; }\n"
"vec4 fog4additive(in vec4 regularcolour) { return regularcolour; }\n"
"vec4 fog4blend(in vec4 regularcolour) { return regularcolour; }\n"
"#endif\n"
"#endif\n"
,
"sys/offsetmapping.h",
"uniform float cvar_r_glsl_offsetmapping_scale;\n"
"vec2 offsetmap(sampler2D normtex, vec2 base, vec3 eyevector)\n"
"{\n"
"#if !defined(OFFSETMAPPING_SCALE)\n"
"#define OFFSETMAPPING_SCALE 1.0\n"
"#endif\n"
"#if defined(RELIEFMAPPING) && !defined(GL_ES)\n"
"float i, f;\n"
"vec3 OffsetVector = vec3(normalize(eyevector.xyz).xy * cvar_r_glsl_offsetmapping_scale * OFFSETMAPPING_SCALE * vec2(-1.0, 1.0), -1.0);\n"
"vec3 RT = vec3(vec2(base.xy"/* - OffsetVector.xy*OffsetMapping_Bias*/"), 1.0);\n"
"OffsetVector /= 10.0;\n"
"for(i = 1.0; i < 10.0; ++i)\n"
"RT += OffsetVector * step(texture2D(normtex, RT.xy).a, RT.z);\n"
"for(i = 0.0, f = 1.0; i < 5.0; ++i, f *= 0.5)\n"
"RT += OffsetVector * (step(texture2D(normtex, RT.xy).a, RT.z) * f - 0.5 * f);\n"
"return RT.xy;\n"
"#elif defined(OFFSETMAPPING)\n"
"vec2 OffsetVector = normalize(eyevector).xy * cvar_r_glsl_offsetmapping_scale * OFFSETMAPPING_SCALE * vec2(-1.0, 1.0);\n"
"vec2 tc = base;\n"
"tc += OffsetVector;\n"
"OffsetVector *= 0.333;\n"
"tc -= OffsetVector * texture2D(normtex, tc).w;\n"
"tc -= OffsetVector * texture2D(normtex, tc).w;\n"
"tc -= OffsetVector * texture2D(normtex, tc).w;\n"
"return tc;\n"
"#else\n"
"return base;\n"
"#endif\n"
"}\n"
,
"sys/pcf.h",
"#ifndef r_glsl_pcf\n"
"#define r_glsl_pcf 9\n"
"#endif\n"
"#if r_glsl_pcf < 1\n"
"#undef r_glsl_pcf\n"
"#define r_glsl_pcf 9\n"
"#endif\n"
"vec3 ShadowmapCoord(void)\n"
"{\n"
"#ifdef SPOT\n"
//bias it. don't bother figuring out which side or anything, its not needed
//l_projmatrix contains the light's projection matrix so no other magic needed
"return ((vtexprojcoord.xyz-vec3(0.0,0.0,0.015))/vtexprojcoord.w + vec3(1.0, 1.0, 1.0)) * vec3(0.5, 0.5, 0.5);\n"
//"#elif defined(CUBESHADOW)\n"
// vec3 shadowcoord = vshadowcoord.xyz / vshadowcoord.w;
// #define dosamp(x,y) shadowCube(s_t4, shadowcoord + vec2(x,y)*texscale.xy).r
"#else\n"
//figure out which axis to use
//texture is arranged thusly:
//forward left up
//back right down
"vec3 dir = abs(vtexprojcoord.xyz);\n"
//assume z is the major axis (ie: forward from the light)
"vec3 t = vtexprojcoord.xyz;\n"
"float ma = dir.z;\n"
"vec3 axis = vec3(0.5/3.0, 0.5/2.0, 0.5);\n"
"if (dir.x > ma)\n"
"{\n"
"ma = dir.x;\n"
"t = vtexprojcoord.zyx;\n"
"axis.x = 0.5;\n"
"}\n"
"if (dir.y > ma)\n"
"{\n"
"ma = dir.y;\n"
"t = vtexprojcoord.xzy;\n"
"axis.x = 2.5/3.0;\n"
"}\n"
//if the axis is negative, flip it.
"if (t.z > 0.0)\n"
"{\n"
"axis.y = 1.5/2.0;\n"
"t.z = -t.z;\n"
"}\n"
//we also need to pass the result through the light's projection matrix too
//the 'matrix' we need only contains 5 actual values. and one of them is a -1. So we might as well just use a vec4.
//note: the projection matrix also includes scalers to pinch the image inwards to avoid sampling over borders, as well as to cope with non-square source image
//the resulting z is prescaled to result in a value between -0.5 and 0.5.
//also make sure we're in the right quadrant type thing
"return axis + ((l_shadowmapproj.xyz*t.xyz + vec3(0.0, 0.0, l_shadowmapproj.w)) / -t.z);\n"
"#endif\n"
"}\n"
"float ShadowmapFilter(sampler2DShadow smap)\n"
"{\n"
"vec3 shadowcoord = ShadowmapCoord();\n"
"#if 0\n"//def GL_ARB_texture_gather
"vec2 ipart, fpart;\n"
"#define dosamp(x,y) textureGatherOffset(smap, ipart.xy, vec2(x,y)))\n"
"vec4 tl = step(shadowcoord.z, dosamp(-1.0, -1.0));\n"
"vec4 bl = step(shadowcoord.z, dosamp(-1.0, 1.0));\n"
"vec4 tr = step(shadowcoord.z, dosamp(1.0, -1.0));\n"
"vec4 br = step(shadowcoord.z, dosamp(1.0, 1.0));\n"
//we now have 4*4 results, woo
//we can just average them for 1/16th precision, but that's still limited graduations
//the middle four pixels are 'full strength', but we interpolate the sides to effectively give 3*3
"vec4 col = vec4(tl.ba, tr.ba) + vec4(bl.rg, br.rg) + " //middle two rows are full strength
"mix(vec4(tl.rg, tr.rg), vec4(bl.ba, br.ba), fpart.y);\n" //top+bottom rows
"return dot(mix(col.rgb, col.agb, fpart.x), vec3(1.0/9.0));\n" //blend r+a, gb are mixed because its pretty much free and gives a nicer dot instruction instead of lots of adds.
"#else\n"
"#define dosamp(x,y) shadow2D(smap, shadowcoord.xyz + (vec3(x,y,0.0)*l_shadowmapscale.xyx)).r\n"
"float s = 0.0;\n"
"#if r_glsl_pcf >= 1 && r_glsl_pcf < 5\n"
"s += dosamp(0.0, 0.0);\n"
"return s;\n"
"#elif r_glsl_pcf >= 5 && r_glsl_pcf < 9\n"
"s += dosamp(-1.0, 0.0);\n"
"s += dosamp(0.0, -1.0);\n"
"s += dosamp(0.0, 0.0);\n"
"s += dosamp(0.0, 1.0);\n"
"s += dosamp(1.0, 0.0);\n"
"return s/5.0;\n"
"#else\n"
"s += dosamp(-1.0, -1.0);\n"
"s += dosamp(-1.0, 0.0);\n"
"s += dosamp(-1.0, 1.0);\n"
"s += dosamp(0.0, -1.0);\n"
"s += dosamp(0.0, 0.0);\n"
"s += dosamp(0.0, 1.0);\n"
"s += dosamp(1.0, -1.0);\n"
"s += dosamp(1.0, 0.0);\n"
"s += dosamp(1.0, 1.0);\n"
"return s/9.0;\n"
"#endif\n"
"#endif\n"
"}\n"
,
NULL
};
//glsl doesn't officially support #include, this might be vulkan, but don't push things.
qboolean Vulkan_GenerateIncludes(int maxstrings, int *strings, const char *prstrings[], int length[], const char *shadersource)
{
int i;
char *incline, *inc;
char incname[256];
while((incline=strstr(shadersource, "#include")))
{
if (*strings == maxstrings)
return false;
/*emit up to the include*/
if (incline - shadersource)
{
prstrings[*strings] = shadersource;
length[*strings] = incline - shadersource;
*strings += 1;
}
incline += 8;
incline = COM_ParseOut (incline, incname, sizeof(incname));
if (!strncmp(incname, "cvar/", 5))
{
cvar_t *var = Cvar_Get(incname+5, "0", 0, "shader cvars");
if (var)
{
var->flags |= CVAR_SHADERSYSTEM;
if (!Vulkan_GenerateIncludes(maxstrings, strings, prstrings, length, var->string))
return false;
}
else
{
/*dump something if the cvar doesn't exist*/
if (*strings == maxstrings)
return false;
prstrings[*strings] = "0";
length[*strings] = strlen("0");
*strings += 1;
}
}
else
{
for (i = 0; vulkan_glsl_hdrs[i]; i += 2)
{
if (!strcmp(incname, vulkan_glsl_hdrs[i]))
{
if (!Vulkan_GenerateIncludes(maxstrings, strings, prstrings, length, vulkan_glsl_hdrs[i+1]))
return false;
break;
}
}
if (!vulkan_glsl_hdrs[i])
{
if (FS_LoadFile(incname, (void**)&inc) != (qofs_t)-1)
{
if (!Vulkan_GenerateIncludes(maxstrings, strings, prstrings, length, inc))
{
FS_FreeFile(inc);
return false;
}
FS_FreeFile(inc);
}
}
}
/*move the pointer past the include*/
shadersource = incline;
}
if (*shadersource)
{
if (*strings == maxstrings)
return false;
/*dump the remaining shader string*/
prstrings[*strings] = shadersource;
length[*strings] = strlen(prstrings[*strings]);
*strings += 1;
}
return true;
}
//assumes VK_NV_glsl_shader for raw glsl
VkShaderModule VK_CreateGLSLModule(program_t *prog, const char *name, int ver, const char **precompilerconstants, const char *body, int isfrag)
{
VkShaderModuleCreateInfo info = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
VkShaderModule mod;
const char *strings[256];
int lengths[256];
unsigned int numstrings = 0;
char *blob;
size_t blobsize;
unsigned int i;
strings[numstrings++] = "#version 450 core\n";
strings[numstrings++] = "#define ENGINE_"DISTRIBUTION"\n";
strings[numstrings++] =
"layout(std140, binding=0) uniform entityblock"
"{\n"
"mat4 m_modelviewproj;"
"mat4 m_model;"
"mat4 m_modelinv;"
"vec3 e_eyepos;"
"float e_time;"
"vec3 e_light_ambient; float epad1;"
"vec3 e_light_dir; float epad2;"
"vec3 e_light_mul; float epad3;"
"vec4 e_lmscales[4];"
"vec3 e_uppercolour; float epad4;"
"vec3 e_lowercolour; float epad5;"
"vec4 e_colourident;"
"vec4 w_fogcolours;"
"float w_fogdensity; float w_fogdepthbias; vec2 epad6;"
"};\n"
"layout(std140, binding=1) uniform lightblock"
"{\n"
"mat4 l_cubematrix;"
"vec3 l_lightposition; float lpad1;"
"vec3 l_lightcolour; float lpad2;"
"vec3 l_lightcolourscale; float l_lightradius;"
"vec4 l_shadowmapproj;"
"vec2 l_shadowmapscale; vec2 lpad3;"
"};\n"
;
if (isfrag)
{
int bindloc = 0;
const char *bindlocations[] =
{
"layout(set=0, binding=2) ",
"layout(set=0, binding=3) ",
"layout(set=0, binding=4) ",
"layout(set=0, binding=5) ",
"layout(set=0, binding=6) ",
"layout(set=0, binding=7) ",
"layout(set=0, binding=8) ",
"layout(set=0, binding=9) ",
"layout(set=0, binding=10) ",
"layout(set=0, binding=11) ",
"layout(set=0, binding=12) ",
"layout(set=0, binding=13) ",
"layout(set=0, binding=14) ",
"layout(set=0, binding=15) ",
"layout(set=0, binding=16) ",
"layout(set=0, binding=17) ",
"layout(set=0, binding=18) ",
"layout(set=0, binding=19) ",
"layout(set=0, binding=20) ",
"layout(set=0, binding=21) ",
"layout(set=0, binding=22) ",
"layout(set=0, binding=23) ",
"layout(set=0, binding=24) ",
"layout(set=0, binding=25) ",
};
const char *numberedsamplernames[] =
{
"uniform sampler2D s_t0;\n",
"uniform sampler2D s_t1;\n",
"uniform sampler2D s_t2;\n",
"uniform sampler2D s_t3;\n",
"uniform sampler2D s_t4;\n",
"uniform sampler2D s_t5;\n",
"uniform sampler2D s_t6;\n",
"uniform sampler2D s_t7;\n",
};
const char *defaultsamplernames[] =
{
"uniform sampler2D s_shadowmap;\n",
"uniform samplerCube s_projectionmap;\n",
"uniform sampler2D s_diffuse;\n",
"uniform sampler2D s_normalmap;\n",
"uniform sampler2D s_specular;\n",
"uniform sampler2D s_upper;\n",
"uniform sampler2D s_lower;\n",
"uniform sampler2D s_fullbright;\n",
"uniform sampler2D s_paletted;\n",
"uniform samplerCube s_reflectcube;\n",
"uniform sampler2D s_reflectmask;\n",
"uniform sampler2D s_lightmap;\n#define s_lightmap0 s_lightmap\n",
"uniform sampler2D s_deluxmap;\n#define s_deluxmap0 s_deluxmap\n",
"uniform sampler2D s_lightmap1;\n",
"uniform sampler2D s_lightmap2;\n",
"uniform sampler2D s_lightmap3;\n",
"uniform sampler2D s_deluxmap1;\n",
"uniform sampler2D s_deluxmap2;\n",
"uniform sampler2D s_deluxmap3;\n",
};
strings[numstrings++] = "#define FRAGMENT_SHADER\n"
"#define varying in\n"
"layout(location=0) out vec4 outcolour;\n"
"#define gl_FragColor outcolour\n"
;
for (i = 0; i < countof(defaultsamplernames); i++)
{
if (prog->defaulttextures & (1u<<i))
{
strings[numstrings++] = bindlocations[bindloc++];
strings[numstrings++] = defaultsamplernames[i];
}
}
for (i = 0; i < prog->numsamplers && i < countof(numberedsamplernames); i++)
{
strings[numstrings++] = bindlocations[bindloc++];
strings[numstrings++] = numberedsamplernames[i];
}
}
else
{
strings[numstrings++] = "#define VERTEX_SHADER\n"
"#define attribute in\n"
"#define varying out\n"
"out gl_PerVertex"
"{"
"vec4 gl_Position;"
"};"
"layout(location=0) attribute vec3 v_position;"
"layout(location=1) attribute vec2 v_texcoord;"
"layout(location=2) attribute vec4 v_colour;"
"layout(location=3) attribute vec2 v_lmcoord;"
"layout(location=4) attribute vec3 v_normal;"
"layout(location=5) attribute vec3 v_svector;"
"layout(location=6) attribute vec3 v_tvector;"
//"layout(location=7) attribute vec4 v_boneweights;"
//"layout(location=8) attribute ivec4 v_bonenums;"
"\n"
"vec4 ftetransform()"
"{"
"vec4 proj = (m_modelviewproj*vec4(v_position,1.0));"
"proj.y *= -1;"
"proj.z = (proj.z + proj.w) / 2.0;"
"return proj;"
"}\n"
;
}
while (*precompilerconstants)
strings[numstrings++] = *precompilerconstants++;
for (i = 0, blobsize = 0; i < numstrings; i++)
lengths[i] = strlen(strings[i]);
Vulkan_GenerateIncludes(countof(strings), &numstrings, strings, lengths, body);
//now glue it all together into a single blob
for (i = 0, blobsize = 0; i < numstrings; i++)
blobsize += lengths[i];
blobsize++;
blob = malloc(blobsize);
for (i = 0, blobsize = 0; i < numstrings; i++)
{
memcpy(blob+blobsize, strings[i], lengths[i]);
blobsize += lengths[i];
}
blob[blobsize] = 0;
//and submit it.
info.flags = 0;
info.codeSize = blobsize;
info.pCode = (void*)blob;
VkAssert(vkCreateShaderModule(vk.device, &info, vkallocationcb, &mod));
return mod;
}
qboolean VK_LoadGLSL(program_t *prog, struct programpermu_s *permu, int ver, const char **precompilerconstants, const char *vert, const char *tcs, const char *tes, const char *geom, const char *frag, qboolean noerrors, vfsfile_t *blobfile)
{
if (permu->permutation) //FIXME...
return false;
prog->nofixedcompat = false;
// prog->supportedpermutations = 0;
prog->cvardata = NULL;
prog->cvardatasize = 0;
prog->pipelines = NULL;
prog->vert = VK_CreateGLSLModule(prog, prog->name, ver, precompilerconstants, vert, false);
prog->frag = VK_CreateGLSLModule(prog, prog->name, ver, precompilerconstants, frag, true);
VK_FinishProg(prog, prog->name);
return true;
}
qboolean VK_LoadBlob(program_t *prog, void *blobdata, const char *name)
{
//fixme: should validate that the offset+lengths are within the blobdata.
struct blobheader *blob = blobdata;
VkShaderModuleCreateInfo info = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
VkShaderModule vert, frag;
unsigned char *cvardata;
if (blob->blobmagic[0] != 0xff || blob->blobmagic[1] != 'S' || blob->blobmagic[2] != 'P' || blob->blobmagic[3] != 'V')
return false; //assume its glsl. this is going to be 'fun'.
if (blob->blobversion != 1)
{
Con_Printf("Blob %s is outdated\n", name);
return false;
}
info.flags = 0;
info.codeSize = blob->vertlength;
info.pCode = (uint32_t*)((char*)blob+blob->vertoffset);
VkAssert(vkCreateShaderModule(vk.device, &info, vkallocationcb, &vert));
info.flags = 0;
info.codeSize = blob->fraglength;
info.pCode = (uint32_t*)((char*)blob+blob->fragoffset);
VkAssert(vkCreateShaderModule(vk.device, &info, vkallocationcb, &frag));
prog->vert = vert;
prog->frag = frag;
prog->nofixedcompat = true;
prog->numsamplers = blob->numtextures;
prog->defaulttextures = blob->defaulttextures;
prog->supportedpermutations = blob->permutations;
if (blob->cvarslength)
{
prog->cvardata = BZ_Malloc(blob->cvarslength);
prog->cvardatasize = blob->cvarslength;
memcpy(prog->cvardata, (char*)blob+blob->cvarsoffset, blob->cvarslength);
}
else
{
prog->cvardata = NULL;
prog->cvardatasize = 0;
}
//go through the cvars and a) validate them. b) create them with the right defaults.
//FIXME: validate
for (cvardata = prog->cvardata; cvardata < prog->cvardata + prog->cvardatasize; )
{
unsigned char type = cvardata[2], size = cvardata[3]-'0';
const char *cvarname;
cvar_t *var;
cvardata += 4;
cvarname = cvardata;
cvardata += strlen(cvarname)+1;
if (type >= 'A' && type <= 'Z')
{ //args will be handled by the blob loader.
//the blob contains default values, overwrite them with the user's preferences...
VK_ShaderReadArgument(name, cvarname, type, size, cvardata);
}
else
{
var = Cvar_FindVar(cvarname);
if (var)
var->flags |= CVAR_SHADERSYSTEM; //just in case
else
{
union
{
int i;
float f;
} u;
char value[128];
uint32_t i;
*value = 0;
for (i = 0; i < size; i++)
{
u.i = (cvardata[i*4+0]<<24)|(cvardata[i*4+1]<<16)|(cvardata[i*4+2]<<8)|(cvardata[i*4+3]<<0);
if (i)
Q_strncatz(value, " ", sizeof(value));
if (type == 'i' || type == 'b')
Q_strncatz(value, va("%i", u.i), sizeof(value));
else
Q_strncatz(value, va("%f", u.f), sizeof(value));
}
Cvar_Get(cvarname, value, CVAR_SHADERSYSTEM, "GLSL Settings");
}
}
cvardata += 4*size;
}
VK_FinishProg(prog, name);
prog->pipelines = NULL; //generated as needed, depending on blend states etc.
return true;
}
static void VKBE_ReallyDeleteProg(void *vprog)
{ //nothing else is refering to this data any more, its safe to obliterate it.
program_t *prog = vprog;
struct pipeline_s *pipe;
while(prog->pipelines)
{
pipe = prog->pipelines;
prog->pipelines = pipe->next;
if (pipe->pipeline)
vkDestroyPipeline(vk.device, pipe->pipeline, vkallocationcb);
Z_Free(pipe);
}
if (prog->layout)
vkDestroyPipelineLayout(vk.device, prog->layout, vkallocationcb);
if (prog->desclayout)
vkDestroyDescriptorSetLayout(vk.device, prog->desclayout, vkallocationcb);
if (prog->vert)
vkDestroyShaderModule(vk.device, prog->vert, vkallocationcb);
if (prog->frag)
vkDestroyShaderModule(vk.device, prog->frag, vkallocationcb);
}
void VKBE_DeleteProg(program_t *prog)
{
//schedule the deletes when its safe to do so.
VK_AtFrameEnd(VKBE_ReallyDeleteProg, prog, sizeof(*prog));
//clear stuff out so that the caller doesn't get confused.
Z_Free(prog->cvardata);
prog->cvardata = NULL;
prog->pipelines = NULL;
prog->layout = VK_NULL_HANDLE;
prog->desclayout = VK_NULL_HANDLE;
prog->vert = VK_NULL_HANDLE;
prog->frag = VK_NULL_HANDLE;
}
static unsigned int VKBE_ApplyShaderBits(unsigned int bits)
{
if (shaderstate.flags & (BEF_FORCEADDITIVE|BEF_FORCETRANSPARENT|BEF_FORCENODEPTH|BEF_FORCEDEPTHTEST|BEF_FORCEDEPTHWRITE|BEF_LINES))
{
if (shaderstate.flags & BEF_FORCEADDITIVE)
bits = (bits & ~(SBITS_MISC_DEPTHWRITE|SBITS_BLEND_BITS|SBITS_ATEST_BITS))
| (SBITS_SRCBLEND_SRC_ALPHA | SBITS_DSTBLEND_ONE);
else if (shaderstate.flags & BEF_FORCETRANSPARENT)
{
if ((bits & SBITS_BLEND_BITS) == (SBITS_SRCBLEND_ONE|SBITS_DSTBLEND_ZERO) || !(bits & SBITS_BLEND_BITS) || (bits&SBITS_ATEST_GE128)) /*if transparency is forced, clear alpha test bits*/
bits = (bits & ~(SBITS_MISC_DEPTHWRITE|SBITS_BLEND_BITS|SBITS_ATEST_BITS))
| (SBITS_SRCBLEND_SRC_ALPHA | SBITS_DSTBLEND_ONE_MINUS_SRC_ALPHA);
}
if (shaderstate.flags & BEF_FORCENODEPTH) /*EF_NODEPTHTEST dp extension*/
bits |= SBITS_MISC_NODEPTHTEST;
else
{
if (shaderstate.flags & BEF_FORCEDEPTHTEST)
bits &= ~SBITS_MISC_NODEPTHTEST;
if (shaderstate.flags & BEF_FORCEDEPTHWRITE)
bits |= SBITS_MISC_DEPTHWRITE;
}
if (shaderstate.flags & BEF_LINES)
bits |= SBITS_LINES;
}
return bits;
}
static const char LIGHTPASS_SHADER[] = "\
{\n\
program rtlight\n\
{\n\
blendfunc add\n\
}\n\
}";
void VKBE_Init(void)
{
int i;
char *c;
sh_config.pDeleteProg = VKBE_DeleteProg;
be_maxpasses = 1;
memset(&shaderstate, 0, sizeof(shaderstate));
shaderstate.inited = true;
for (i = 0; i < MAXRLIGHTMAPS; i++)
shaderstate.dummybatch.lightmap[i] = -1;
shaderstate.identitylighting = 1;
shaderstate.identitylightmap = 1;
//make sure the world draws correctly
r_worldentity.shaderRGBAf[0] = 1;
r_worldentity.shaderRGBAf[1] = 1;
r_worldentity.shaderRGBAf[2] = 1;
r_worldentity.shaderRGBAf[3] = 1;
r_worldentity.axis[0][0] = 1;
r_worldentity.axis[1][1] = 1;
r_worldentity.axis[2][2] = 1;
r_worldentity.light_avg[0] = 1;
r_worldentity.light_avg[1] = 1;
r_worldentity.light_avg[2] = 1;
FTable_Init();
{
unsigned char bibuf[4*4*4] = {0};
if (!qrenderer)
r_blackimage = r_nulltex;
else
r_blackimage = R_LoadTexture("$blackimage", 4, 4, TF_RGBA32, bibuf, IF_NOMIPMAP|IF_NOPICMIP|IF_NEAREST|IF_NOGAMMA);
}
shaderstate.depthonly = R_RegisterShader("depthonly", SUF_NONE,
"{\n"
"program depthonly\n"
"{\n"
"depthwrite\n"
"maskcolor\n"
"}\n"
"}\n");
shaderstate.programfixedemu[0] = Shader_FindGeneric("fixedemu", QR_VULKAN);
shaderstate.programfixedemu[1] = Shader_FindGeneric("fixedemu#CONSTCOLOUR", QR_VULKAN);
R_InitFlashblends();
/*
{
VkDescriptorPoolCreateInfo dpi = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
VkDescriptorPoolSize dpisz[2];
dpi.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
dpi.maxSets = 512;
dpi.poolSizeCount = countof(dpisz);
dpi.pPoolSizes = dpisz;
dpisz[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
dpisz[0].descriptorCount = 2;
dpisz[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpisz[1].descriptorCount = MAX_TMUS;
VkAssert(vkCreateDescriptorPool(vk.device, &dpi, NULL, &shaderstate.texturedescpool));
}
*/
{
struct stagingbuf lazybuf;
void *buffer = VKBE_CreateStagingBuffer(&lazybuf, sizeof(vec4_t)*65536+sizeof(vec3_t)*3*65536, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
vec4_t *col = buffer;
vec3_t *norm = (vec3_t*)(col+65536);
vec3_t *sdir = norm+65536;
vec3_t *tdir = sdir+65536;
for (i = 0; i < 65536; i++)
{
Vector4Set(col[i], 1, 1, 1, 1);
VectorSet(norm[i], 1, 0, 0);
VectorSet(sdir[i], 0, 1, 0);
VectorSet(tdir[i], 0, 0, 1);
}
shaderstate.staticbuf = VKBE_FinishStaging(&lazybuf, &shaderstate.staticbufmem);
}
c = vk_stagingbuffers.string;
if (*c)
{
vk_usedynamicstaging = 0;
while (*c)
{
if (*c == 'u')
vk_usedynamicstaging |= 1u<<DB_UBO;
else if (*c == 'e' || *c == 'i')
vk_usedynamicstaging |= 1u<<DB_EBO;
else if (*c == 'v')
vk_usedynamicstaging |= 1u<<DB_VBO;
else if (*c == '0')
vk_usedynamicstaging |= 0; //for explicly none.
else
Con_Printf("%s: unknown char %c\n", vk_stagingbuffers.string, *c);
c++;
}
}
else
vk_usedynamicstaging = 0u;
}
static struct descpool *VKBE_CreateDescriptorPool(void)
{
struct descpool *np = Z_Malloc(sizeof(*np));
VkDescriptorPoolCreateInfo dpi = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
VkDescriptorPoolSize dpisz[2];
dpi.flags = 0;
dpi.maxSets = np->totalsets = 512;
dpi.poolSizeCount = countof(dpisz);
dpi.pPoolSizes = dpisz;
dpisz[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
dpisz[0].descriptorCount = 2*dpi.maxSets;
dpisz[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpisz[1].descriptorCount = MAX_TMUS*dpi.maxSets;
VkAssert(vkCreateDescriptorPool(vk.device, &dpi, NULL, &np->pool));
return np;
}
static VkDescriptorSet VKBE_TempDescriptorSet(VkDescriptorSetLayout layout)
{
VkDescriptorSet ret;
VkDescriptorSetAllocateInfo setinfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
if (vk.descpool->availsets == 0)
{
if (vk.descpool->next)
vk.descpool = vk.descpool->next;
else
vk.descpool = vk.descpool->next = VKBE_CreateDescriptorPool();
vkResetDescriptorPool(vk.device, vk.descpool->pool, 0);
vk.descpool->availsets = vk.descpool->totalsets;
}
vk.descpool->availsets--;
setinfo.descriptorPool = vk.descpool->pool;
setinfo.descriptorSetCount = 1;
setinfo.pSetLayouts = &layout;
vkAllocateDescriptorSets(vk.device, &setinfo, &ret);
return ret;
}
//creates a new dynamic buffer for us to use while streaming. because spoons.
static struct dynbuffer *VKBE_AllocNewBuffer(struct dynbuffer **link, enum dynbuf_e type, VkDeviceSize minsize)
{
VkBufferUsageFlags ufl[] = {VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT};
VkBufferCreateInfo bufinf = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
VkMemoryRequirements mem_reqs;
VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
struct dynbuffer *n = Z_Malloc(sizeof(*n));
qboolean usestaging = (vk_usedynamicstaging & (1u<<type))!=0;
while(1)
{
bufinf.flags = 0;
bufinf.size = n->size = (1u<<20);
bufinf.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufinf.queueFamilyIndexCount = 0;
bufinf.pQueueFamilyIndices = NULL;
while (bufinf.size < minsize)
bufinf.size *= 2;
n->size = bufinf.size;
if (type != DB_STAGING && usestaging)
{
//create two buffers, one staging/host buffer and one device buffer
bufinf.usage = ufl[type]|VK_BUFFER_USAGE_TRANSFER_DST_BIT;
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->devicebuf);
bufinf.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->stagingbuf);
vkGetBufferMemoryRequirements(vk.device, n->devicebuf, &mem_reqs);
n->align = mem_reqs.alignment-1;
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &n->devicememory));
VkAssert(vkBindBufferMemory(vk.device, n->devicebuf, n->devicememory, 0));
n->renderbuf = n->devicebuf;
}
else
{ //single buffer. we'll write directly to the buffer.
bufinf.usage = ufl[type];
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->stagingbuf);
n->renderbuf = n->stagingbuf;
}
//now allocate some host-visible memory for the buffer that we're going to map.
vkGetBufferMemoryRequirements(vk.device, n->stagingbuf, &mem_reqs);
n->align = mem_reqs.alignment-1;
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = ~0;
// if (memAllocInfo.memoryTypeIndex == ~0)
// memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
if (memAllocInfo.memoryTypeIndex == ~0 && n->renderbuf == n->stagingbuf) //probably won't get anything, but whatever.
memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (memAllocInfo.memoryTypeIndex == ~0)
memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (memAllocInfo.memoryTypeIndex == ~0)
{ //if we can't find any usable memory, force staging instead.
vkDestroyBuffer(vk.device, n->stagingbuf, vkallocationcb);
if (usestaging)
Sys_Error("Unable to allocate buffer memory");
usestaging = true;
continue;
}
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &n->stagingmemory));
VkAssert(vkBindBufferMemory(vk.device, n->stagingbuf, n->stagingmemory, 0));
VkAssert(vkMapMemory(vk.device, n->stagingmemory, 0, n->size, 0, &n->ptr)); //persistent-mapped.
n->stagingcoherent = !!(vk.memory_properties.memoryTypes[memAllocInfo.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
n->next = *link;
*link = n;
return n;
}
}
static void *fte_restrict VKBE_AllocateBufferSpace(enum dynbuf_e type, size_t datasize, VkBuffer *buf, VkDeviceSize *offset)
{ //FIXME: ubos need alignment
struct dynbuffer *b = vk.dynbuf[type];
void *ret;
if (b->offset + datasize > b->size)
{
//flush the old one, just in case.
if (!b->stagingcoherent)
{
VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE};
range.offset = b->flushed;
range.size = b->offset-b->flushed;
range.memory = b->stagingmemory;
vkFlushMappedMemoryRanges(vk.device, 1, &range);
}
if (b->devicebuf != VK_NULL_HANDLE)
{
struct vk_fencework *fence = VK_FencedBegin(NULL, 0);
VkBufferCopy bcr = {0};
bcr.srcOffset = b->flushed;
bcr.dstOffset = b->flushed;
bcr.size = b->offset-b->flushed;
vkCmdCopyBuffer(fence->cbuf, b->stagingbuf, b->devicebuf, 1, &bcr);
VK_FencedSubmit(fence);
}
if (!b->next)
VKBE_AllocNewBuffer(&b->next, type, datasize);
b = vk.dynbuf[type] = b->next;
b->offset = 0;
b->flushed = 0;
}
*buf = b->renderbuf;
*offset = b->offset;
ret = (qbyte*)b->ptr + b->offset;
b->offset += datasize; //FIXME: alignment
return ret;
}
//called when a new swapchain has been created.
//makes sure there's no nulls or anything.
void VKBE_InitFramePools(struct vkframe *frame)
{
uint32_t i;
for (i = 0; i < DB_MAX; i++)
{
frame->dynbufs[i] = NULL;
VKBE_AllocNewBuffer(&frame->dynbufs[i], i, 0);
}
frame->descpools = vk.khr_push_descriptor?NULL:VKBE_CreateDescriptorPool();
frame->numcbufs = 0;
frame->maxcbufs = 0;
frame->cbufs = NULL;
/*{
VkCommandBufferAllocateInfo cbai = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO};
cbai.commandPool = vk.cmdpool;
cbai.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
cbai.commandBufferCount = frame->maxcbufs;
VkAssert(vkAllocateCommandBuffers(vk.device, &cbai, frame->cbufs));
}*/
{
VkFenceCreateInfo fci = {VK_STRUCTURE_TYPE_FENCE_CREATE_INFO};
fci.flags = VK_FENCE_CREATE_SIGNALED_BIT;
VkAssert(vkCreateFence(vk.device,&fci,vkallocationcb,&frame->finishedfence));
}
}
//called just before submits
//makes sure that our persistent-mapped memory writes can actually be seen by the hardware.
void VKBE_FlushDynamicBuffers(void)
{
struct vk_fencework *fence = NULL;
uint32_t i;
struct dynbuffer *d;
VkMappedMemoryRange range = {VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE};
for (i = 0; i < DB_MAX; i++)
{
d = vk.dynbuf[i];
if (d->flushed == d->offset)
continue;
if (!d->stagingcoherent)
{
range.offset = d->flushed;
range.size = d->offset - d->flushed;
range.memory = d->stagingmemory;
vkFlushMappedMemoryRanges(vk.device, 1, &range);
}
if (d->devicebuf != VK_NULL_HANDLE)
{
VkBufferCopy bcr = {0};
bcr.srcOffset = d->flushed;
bcr.dstOffset = d->flushed;
bcr.size = d->offset - d->flushed;
if (!fence)
fence = VK_FencedBegin(NULL, 0);
vkCmdCopyBuffer(fence->cbuf, d->stagingbuf, d->devicebuf, 1, &bcr);
}
d->flushed = d->offset;
}
if (fence)
VK_FencedSubmit(fence);
}
void VKBE_Set2D(qboolean twodee)
{
if (twodee)
shaderstate.forcebeflags = BEF_FORCENODEPTH;
else
shaderstate.forcebeflags = 0;
shaderstate.curtime = realtime;
}
//called at the start of each frame
//resets the working dynamic buffers to this frame's storage, to avoid stepping on frames owned by the gpu
void VKBE_RestartFrame(void)
{
uint32_t i;
for (i = 0; i < DB_MAX; i++)
{
vk.dynbuf[i] = vk.frame->dynbufs[i];
vk.dynbuf[i]->offset = vk.dynbuf[i]->flushed = 0;
}
shaderstate.rc.activepipeline = VK_NULL_HANDLE;
vk.descpool = vk.frame->descpools;
if (vk.descpool)
{
vkResetDescriptorPool(vk.device, vk.descpool->pool, 0);
vk.descpool->availsets = vk.descpool->totalsets;
}
}
void VKBE_ShutdownFramePools(struct vkframe *frame)
{
struct dynbuffer *db;
struct descpool *dp;
uint32_t i;
for (i = 0; i < DB_MAX; i++)
{
while(frame->dynbufs[i])
{
db = frame->dynbufs[i];
vkDestroyBuffer(vk.device, db->stagingbuf, vkallocationcb);
vkFreeMemory(vk.device, db->stagingmemory, vkallocationcb);
if (db->devicebuf != VK_NULL_HANDLE)
{
vkDestroyBuffer(vk.device, db->devicebuf, vkallocationcb);
vkFreeMemory(vk.device, db->devicememory, vkallocationcb);
}
frame->dynbufs[i] = db->next;
Z_Free(db);
}
}
while(frame->descpools)
{
dp = frame->descpools;
vkDestroyDescriptorPool(vk.device, dp->pool, vkallocationcb);
frame->descpools = dp->next;
Z_Free(dp);
}
}
void VKBE_Shutdown(void)
{
if (!shaderstate.inited)
return;
#ifdef RTLIGHTS
Sh_Shutdown();
#endif
Shader_ReleaseGeneric(shaderstate.programfixedemu[0]);
Shader_ReleaseGeneric(shaderstate.programfixedemu[1]);
shaderstate.inited = false;
#ifdef RTLIGHTS
VK_TerminateShadowMap();
#endif
Z_Free(shaderstate.wbatches);
shaderstate.wbatches = NULL;
vkDestroyBuffer(vk.device, shaderstate.staticbuf, vkallocationcb);
VK_ReleasePoolMemory(&shaderstate.staticbufmem);
}
static texid_t SelectPassTexture(const shaderpass_t *pass)
{
switch(pass->texgen)
{
default:
case T_GEN_DIFFUSE:
return shaderstate.curtexnums->base;
case T_GEN_NORMALMAP:
if (TEXLOADED(shaderstate.curtexnums->bump))
return shaderstate.curtexnums->bump;
else
return missing_texture_normal;
case T_GEN_SPECULAR:
if (TEXLOADED(shaderstate.curtexnums->specular))
return shaderstate.curtexnums->specular;
else
return missing_texture_gloss;
case T_GEN_UPPEROVERLAY:
return shaderstate.curtexnums->upperoverlay;
case T_GEN_LOWEROVERLAY:
return shaderstate.curtexnums->loweroverlay;
case T_GEN_FULLBRIGHT:
return shaderstate.curtexnums->fullbright;
case T_GEN_ANIMMAP:
return pass->anim_frames[(int)(pass->anim_fps * shaderstate.curtime) % pass->anim_numframes];
case T_GEN_3DMAP:
case T_GEN_CUBEMAP:
case T_GEN_SINGLEMAP:
return pass->anim_frames[0];
case T_GEN_DELUXMAP:
{
int lmi = shaderstate.curbatch->lightmap[0];
if (lmi < 0 || !lightmap[lmi]->hasdeluxe)
return r_nulltex;
else
{
lmi+=1;
return lightmap[lmi]->lightmap_texture;
}
}
case T_GEN_LIGHTMAP:
{
int lmi = shaderstate.curbatch->lightmap[0];
if (lmi < 0)
return r_whiteimage;
else
return lightmap[lmi]->lightmap_texture;
}
case T_GEN_CURRENTRENDER:
return shaderstate.tex_currentrender;
#ifdef HAVE_MEDIA_DECODER
case T_GEN_VIDEOMAP:
if (pass->cin)
return Media_UpdateForShader(pass->cin);
return r_nulltex;
#endif
case T_GEN_LIGHTCUBEMAP: //light's projected cubemap
if (shaderstate.curdlight)
return shaderstate.curdlight->cubetexture;
else
return r_nulltex;
case T_GEN_SHADOWMAP: //light's depth values.
return shaderstate.currentshadowmap;
case T_GEN_REFLECTION: //reflection image (mirror-as-fbo)
return &shaderstate.rt_reflection.q_colour;
case T_GEN_REFRACTION: //refraction image (portal-as-fbo)
return shaderstate.tex_refraction;
case T_GEN_REFRACTIONDEPTH: //refraction image (portal-as-fbo)
return &shaderstate.rt_refraction.q_depth;
case T_GEN_RIPPLEMAP: //ripplemap image (water surface distortions-as-fbo)
return shaderstate.tex_ripplemap;
case T_GEN_SOURCECOLOUR: //used for render-to-texture targets
return vk.sourcecolour;
case T_GEN_SOURCEDEPTH: //used for render-to-texture targets
return vk.sourcedepth;
case T_GEN_SOURCECUBE: //used for render-to-texture targets
return r_nulltex;
}
}
static void T_Gen_CurrentRender(void)
{
vk_image_t *img;
/*gah... I pitty the gl drivers*/
if (!shaderstate.tex_currentrender)
{
shaderstate.tex_currentrender = Image_CreateTexture("***$currentrender***", NULL, 0);
shaderstate.tex_currentrender->vkimage = Z_Malloc(sizeof(*shaderstate.tex_currentrender->vkimage));
}
img = shaderstate.tex_currentrender->vkimage;
if (img->width != vid.fbpwidth || img->height != vid.fbpheight)
{
//FIXME: free the old image when its safe to do so.
*img = VK_CreateTexture2DArray(vid.fbpwidth, vid.fbpheight, 1, 1, -vk.backbufformat, PTI_2D, true, shaderstate.tex_currentrender->ident);
if (!img->sampler)
VK_CreateSampler(shaderstate.tex_currentrender->flags, img);
}
vkCmdEndRenderPass(vk.rendertarg->cbuf);
//submit now?
//copy the backbuffer to our image
{
VkImageCopy region;
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.srcSubresource.mipLevel = 0;
region.srcSubresource.baseArrayLayer = 0;
region.srcSubresource.layerCount = 1;
region.srcOffset.x = 0;
region.srcOffset.y = 0;
region.srcOffset.z = 0;
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
region.dstSubresource.mipLevel = 0;
region.dstSubresource.baseArrayLayer = 0;
region.dstSubresource.layerCount = 1;
region.dstOffset.x = 0;
region.dstOffset.y = 0;
region.dstOffset.z = 0;
region.extent.width = vid.fbpwidth;
region.extent.height = vid.fbpheight;
region.extent.depth = 1;
set_image_layout(vk.rendertarg->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
set_image_layout(vk.rendertarg->cbuf, img->image, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_UNDEFINED, 0, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
vkCmdCopyImage(vk.rendertarg->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, img->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
set_image_layout(vk.rendertarg->cbuf, img->image, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_ACCESS_TRANSFER_WRITE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
set_image_layout(vk.rendertarg->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_ASPECT_COLOR_BIT,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
}
//submit now?
//barrier?
vkCmdBeginRenderPass(vk.rendertarg->cbuf, &vk.rendertarg->restartinfo, VK_SUBPASS_CONTENTS_INLINE);
//fixme: viewport+scissor?
}
static void R_FetchPlayerColour(unsigned int cv, vec3_t rgb)
{
int i;
if (cv >= 16)
{
rgb[0] = (((cv&0xff0000)>>16)**((unsigned char*)&d_8to24rgbtable[15]+0)) / (256.0*256);
rgb[1] = (((cv&0x00ff00)>>8)**((unsigned char*)&d_8to24rgbtable[15]+1)) / (256.0*256);
rgb[2] = (((cv&0x0000ff)>>0)**((unsigned char*)&d_8to24rgbtable[15]+2)) / (256.0*256);
return;
}
i = cv;
if (i >= 8)
{
i<<=4;
}
else
{
i<<=4;
i+=15;
}
i*=3;
rgb[0] = host_basepal[i+0] / 255.0;
rgb[1] = host_basepal[i+1] / 255.0;
rgb[2] = host_basepal[i+2] / 255.0;
/* if (!gammaworks)
{
*retred = gammatable[*retred];
*retgreen = gammatable[*retgreen];
*retblue = gammatable[*retblue];
}*/
}
//source is always packed
//dest is packed too
static void colourgen(const shaderpass_t *pass, int cnt, byte_vec4_t *srcb, avec4_t *srcf, vec4_t *dst, const mesh_t *mesh)
{
switch (pass->rgbgen)
{
case RGB_GEN_ENTITY:
while((cnt)--)
{
dst[cnt][0] = shaderstate.curentity->shaderRGBAf[0];
dst[cnt][1] = shaderstate.curentity->shaderRGBAf[1];
dst[cnt][2] = shaderstate.curentity->shaderRGBAf[2];
}
break;
case RGB_GEN_ONE_MINUS_ENTITY:
while((cnt)--)
{
dst[cnt][0] = 1-shaderstate.curentity->shaderRGBAf[0];
dst[cnt][1] = 1-shaderstate.curentity->shaderRGBAf[1];
dst[cnt][2] = 1-shaderstate.curentity->shaderRGBAf[2];
}
break;
case RGB_GEN_VERTEX_LIGHTING:
#if MAXRLIGHTMAPS > 1
if (mesh->colors4f_array[1])
{
float lm[MAXRLIGHTMAPS];
lm[0] = d_lightstylevalue[shaderstate.curbatch->vtlightstyle[0]]/256.0f*shaderstate.identitylighting;
lm[1] = d_lightstylevalue[shaderstate.curbatch->vtlightstyle[1]]/256.0f*shaderstate.identitylighting;
lm[2] = d_lightstylevalue[shaderstate.curbatch->vtlightstyle[2]]/256.0f*shaderstate.identitylighting;
lm[3] = d_lightstylevalue[shaderstate.curbatch->vtlightstyle[3]]/256.0f*shaderstate.identitylighting;
while((cnt)--)
{
VectorScale( mesh->colors4f_array[0][cnt], lm[0], dst[cnt]);
VectorMA(dst[cnt], lm[1], mesh->colors4f_array[1][cnt], dst[cnt]);
VectorMA(dst[cnt], lm[2], mesh->colors4f_array[2][cnt], dst[cnt]);
VectorMA(dst[cnt], lm[3], mesh->colors4f_array[3][cnt], dst[cnt]);
}
break;
}
#endif
if (shaderstate.identitylighting != 1)
{
if (srcf)
{
while((cnt)--)
{
dst[cnt][0] = srcf[cnt][0]*shaderstate.identitylighting;
dst[cnt][1] = srcf[cnt][1]*shaderstate.identitylighting;
dst[cnt][2] = srcf[cnt][2]*shaderstate.identitylighting;
}
}
else if (srcb)
{
float t = shaderstate.identitylighting * (1/255.0);
while((cnt)--)
{
dst[cnt][0] = srcb[cnt][0]*t;
dst[cnt][1] = srcb[cnt][1]*t;
dst[cnt][2] = srcb[cnt][2]*t;
}
}
else
{
while((cnt)--)
{
dst[cnt][0] = shaderstate.identitylighting;
dst[cnt][1] = shaderstate.identitylighting;
dst[cnt][2] = shaderstate.identitylighting;
}
}
break;
}
case RGB_GEN_VERTEX_EXACT:
if (srcf)
{
while((cnt)--)
{
dst[cnt][0] = srcf[cnt][0];
dst[cnt][1] = srcf[cnt][1];
dst[cnt][2] = srcf[cnt][2];
}
}
else if (srcb)
{
float t = 1/255.0;
while((cnt)--)
{
dst[cnt][0] = srcb[cnt][0]*t;
dst[cnt][1] = srcb[cnt][1]*t;
dst[cnt][2] = srcb[cnt][2]*t;
}
}
else
{
while((cnt)--)
{
dst[cnt][0] = 1;
dst[cnt][1] = 1;
dst[cnt][2] = 1;
}
break;
}
break;
case RGB_GEN_ONE_MINUS_VERTEX:
if (srcf)
{
while((cnt)--)
{
dst[cnt][0] = 1-srcf[cnt][0];
dst[cnt][1] = 1-srcf[cnt][1];
dst[cnt][2] = 1-srcf[cnt][2];
}
}
break;
case RGB_GEN_IDENTITY_LIGHTING:
if (shaderstate.curbatch->vtlightstyle[0] != 255 && d_lightstylevalue[shaderstate.curbatch->vtlightstyle[0]] != 256)
{
vec_t val = shaderstate.identitylighting * d_lightstylevalue[shaderstate.curbatch->vtlightstyle[0]]/256.0f;
while((cnt)--)
{
dst[cnt][0] = val;
dst[cnt][1] = val;
dst[cnt][2] = val;
}
}
else
{
//compensate for overbrights
while((cnt)--)
{
dst[cnt][0] = shaderstate.identitylighting;
dst[cnt][1] = shaderstate.identitylighting;
dst[cnt][2] = shaderstate.identitylighting;
}
}
break;
case RGB_GEN_IDENTITY_OVERBRIGHT:
while((cnt)--)
{
dst[cnt][0] = shaderstate.identitylightmap;
dst[cnt][1] = shaderstate.identitylightmap;
dst[cnt][2] = shaderstate.identitylightmap;
}
break;
default:
case RGB_GEN_IDENTITY:
while((cnt)--)
{
dst[cnt][0] = shaderstate.identitylighting;
dst[cnt][1] = shaderstate.identitylighting;
dst[cnt][2] = shaderstate.identitylighting;
}
break;
case RGB_GEN_CONST:
while((cnt)--)
{
dst[cnt][0] = pass->rgbgen_func.args[0];
dst[cnt][1] = pass->rgbgen_func.args[1];
dst[cnt][2] = pass->rgbgen_func.args[2];
}
break;
case RGB_GEN_ENTITY_LIGHTING_DIFFUSE:
R_LightArrays(shaderstate.curentity, mesh->xyz_array, dst, cnt, mesh->normals_array, shaderstate.identitylighting, true);
break;
case RGB_GEN_LIGHTING_DIFFUSE:
R_LightArrays(shaderstate.curentity, mesh->xyz_array, dst, cnt, mesh->normals_array, shaderstate.identitylighting, false);
break;
case RGB_GEN_WAVE:
{
float *table;
float c;
table = FTableForFunc(pass->rgbgen_func.type);
c = pass->rgbgen_func.args[2] + shaderstate.curtime * pass->rgbgen_func.args[3];
c = FTABLE_EVALUATE(table, c) * pass->rgbgen_func.args[1] + pass->rgbgen_func.args[0];
c = bound(0.0f, c, 1.0f);
while((cnt)--)
{
dst[cnt][0] = c;
dst[cnt][1] = c;
dst[cnt][2] = c;
}
}
break;
case RGB_GEN_TOPCOLOR:
if (cnt)
{
vec3_t rgb;
R_FetchPlayerColour(shaderstate.curentity->topcolour, rgb);
while((cnt)--)
{
dst[cnt][0] = rgb[0];
dst[cnt][1] = rgb[1];
dst[cnt][2] = rgb[2];
}
}
break;
case RGB_GEN_BOTTOMCOLOR:
if (cnt)
{
vec3_t rgb;
R_FetchPlayerColour(shaderstate.curentity->bottomcolour, rgb);
while((cnt)--)
{
dst[cnt][0] = rgb[0];
dst[cnt][1] = rgb[1];
dst[cnt][2] = rgb[2];
}
}
break;
}
}
static void alphagen(const shaderpass_t *pass, int cnt, byte_vec4_t *srcb, avec4_t *srcf, avec4_t *dst, const mesh_t *mesh)
{
float *table;
float t;
float f;
vec3_t v1, v2;
int i;
switch (pass->alphagen)
{
default:
case ALPHA_GEN_IDENTITY:
if (shaderstate.flags & BEF_FORCETRANSPARENT)
{
while(cnt--)
dst[cnt][3] = shaderstate.curentity->shaderRGBAf[3];
}
else
{
while(cnt--)
dst[cnt][3] = 1;
}
break;
case ALPHA_GEN_CONST:
t = pass->alphagen_func.args[0];
while(cnt--)
dst[cnt][3] = t;
break;
case ALPHA_GEN_WAVE:
table = FTableForFunc(pass->alphagen_func.type);
f = pass->alphagen_func.args[2] + shaderstate.curtime * pass->alphagen_func.args[3];
f = FTABLE_EVALUATE(table, f) * pass->alphagen_func.args[1] + pass->alphagen_func.args[0];
t = bound(0.0f, f, 1.0f);
while(cnt--)
dst[cnt][3] = t;
break;
case ALPHA_GEN_PORTAL:
//FIXME: should this be per-vert?
if (r_refdef.recurse)
f = 1;
else
{
VectorAdd(mesh->xyz_array[0], shaderstate.curentity->origin, v1);
VectorSubtract(r_origin, v1, v2);
f = VectorLength(v2) * (1.0 / shaderstate.curshader->portaldist);
f = bound(0.0f, f, 1.0f);
}
while(cnt--)
dst[cnt][3] = f;
break;
case ALPHA_GEN_VERTEX:
if (srcf)
{
while(cnt--)
{
dst[cnt][3] = srcf[cnt][3];
}
}
else if (srcb)
{
float t = 1/255.0;
while(cnt--)
{
dst[cnt][3] = srcb[cnt][3]*t;
}
}
else
{
while(cnt--)
{
dst[cnt][3] = 1;
}
break;
}
break;
case ALPHA_GEN_ENTITY:
f = bound(0, shaderstate.curentity->shaderRGBAf[3], 1);
while(cnt--)
{
dst[cnt][3] = f;
}
break;
case ALPHA_GEN_SPECULAR:
{
VectorSubtract(r_origin, shaderstate.curentity->origin, v1);
if (!Matrix3_Compare((const vec3_t*)shaderstate.curentity->axis, (const vec3_t*)axisDefault))
{
Matrix3_Multiply_Vec3(shaderstate.curentity->axis, v1, v2);
}
else
{
VectorCopy(v1, v2);
}
for (i = 0; i < cnt; i++)
{
VectorSubtract(v2, mesh->xyz_array[i], v1);
f = DotProduct(v1, mesh->normals_array[i] ) * Q_rsqrt(DotProduct(v1,v1));
f = f * f * f * f * f;
dst[i][3] = bound (0.0f, f, 1.0f);
}
}
break;
}
}
//true if we used an array (flag to use uniforms for it instead if false)
static void BE_GenerateColourMods(unsigned int vertcount, const shaderpass_t *pass, VkBuffer *buffer, VkDeviceSize *offset)
{
const mesh_t *m = shaderstate.meshlist[0];
// if (pass->flags & SHADER_PASS_NOCOLORARRAY)
// error
if ( ((pass->rgbgen == RGB_GEN_VERTEX_LIGHTING) ||
(pass->rgbgen == RGB_GEN_VERTEX_EXACT) ||
(pass->rgbgen == RGB_GEN_ONE_MINUS_VERTEX)) &&
(pass->alphagen == ALPHA_GEN_VERTEX))
{
if (shaderstate.batchvbo)
{ //just use the colour vbo provided
*buffer = shaderstate.batchvbo->colours[0].vk.buff;
*offset = shaderstate.batchvbo->colours[0].vk.offs;
}
else
{ //we can't use the vbo due to gaps that we don't want to have to deal with
//we can at least ensure that the data is written in one go to aid cpu cache.
vec4_t *map;
unsigned int mno;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), buffer, offset);
if (m->colors4f_array[0])
{
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
memcpy(map, m->colors4f_array[0], m->numvertexes * sizeof(vec4_t));
map += m->numvertexes;
}
}
else if (m->colors4b_array)
{
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
uint32_t v;
m = shaderstate.meshlist[mno];
for (v = 0; v < m->numvertexes; v++)
Vector4Scale(m->colors4b_array[v], 1.0/255, map[v]);
map += m->numvertexes;
}
}
else
{
for (mno = 0; mno < vertcount; mno++)
Vector4Set(map[mno], 1, 1, 1, 1);
}
}
}
else
{
vec4_t *map;
unsigned int mno;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), buffer, offset);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
colourgen(pass, m->numvertexes, m->colors4b_array, m->colors4f_array[0], map, m);
alphagen(pass, m->numvertexes, m->colors4b_array, m->colors4f_array[0], map, m);
map += m->numvertexes;
}
}
}
/*********************************************************************************************************/
/*========================================== texture coord generation =====================================*/
static void tcgen_environment(float *st, unsigned int numverts, float *xyz, float *normal)
{
int i;
vec3_t viewer, reflected;
float d;
vec3_t rorg;
RotateLightVector(shaderstate.curentity->axis, shaderstate.curentity->origin, r_origin, rorg);
for (i = 0 ; i < numverts ; i++, xyz += sizeof(vecV_t)/sizeof(vec_t), normal += 3, st += 2 )
{
VectorSubtract (rorg, xyz, viewer);
VectorNormalizeFast (viewer);
d = DotProduct (normal, viewer);
reflected[0] = normal[0]*2*d - viewer[0];
reflected[1] = normal[1]*2*d - viewer[1];
reflected[2] = normal[2]*2*d - viewer[2];
st[0] = 0.5 + reflected[1] * 0.5;
st[1] = 0.5 - reflected[2] * 0.5;
}
}
static float *tcgen(const shaderpass_t *pass, int cnt, float *dst, const mesh_t *mesh)
{
int i;
vecV_t *src;
switch (pass->tcgen)
{
default:
case TC_GEN_BASE:
return (float*)mesh->st_array;
case TC_GEN_LIGHTMAP:
return (float*)mesh->lmst_array[0];
case TC_GEN_NORMAL:
return (float*)mesh->normals_array;
case TC_GEN_SVECTOR:
return (float*)mesh->snormals_array;
case TC_GEN_TVECTOR:
return (float*)mesh->tnormals_array;
case TC_GEN_ENVIRONMENT:
if (!mesh->normals_array)
return (float*)mesh->st_array;
tcgen_environment(dst, cnt, (float*)mesh->xyz_array, (float*)mesh->normals_array);
return dst;
case TC_GEN_DOTPRODUCT:
return dst;//mesh->st_array[0];
case TC_GEN_VECTOR:
src = mesh->xyz_array;
for (i = 0; i < cnt; i++, dst += 2)
{
dst[0] = DotProduct(pass->tcgenvec[0], src[i]);
dst[1] = DotProduct(pass->tcgenvec[1], src[i]);
}
return dst;
}
}
/*src and dst can be the same address when tcmods are chained*/
static void tcmod(const tcmod_t *tcmod, int cnt, const float *src, float *dst, const mesh_t *mesh)
{
float *table;
float t1, t2;
float cost, sint;
int j;
switch (tcmod->type)
{
case SHADER_TCMOD_ROTATE:
cost = tcmod->args[0] * shaderstate.curtime;
sint = R_FastSin(cost);
cost = R_FastSin(cost + 0.25);
for (j = 0; j < cnt; j++, dst+=2,src+=2)
{
t1 = cost * (src[0] - 0.5f) - sint * (src[1] - 0.5f) + 0.5f;
t2 = cost * (src[1] - 0.5f) + sint * (src[0] - 0.5f) + 0.5f;
dst[0] = t1;
dst[1] = t2;
}
break;
case SHADER_TCMOD_SCALE:
t1 = tcmod->args[0];
t2 = tcmod->args[1];
for (j = 0; j < cnt; j++, dst+=2,src+=2)
{
dst[0] = src[0] * t1;
dst[1] = src[1] * t2;
}
break;
case SHADER_TCMOD_TURB:
t1 = tcmod->args[2] + shaderstate.curtime * tcmod->args[3];
t2 = tcmod->args[1];
for (j = 0; j < cnt; j++, dst+=2,src+=2)
{
dst[0] = src[0] + R_FastSin (src[0]*t2+t1) * t2;
dst[1] = src[1] + R_FastSin (src[1]*t2+t1) * t2;
}
break;
case SHADER_TCMOD_STRETCH:
table = FTableForFunc(tcmod->args[0]);
t2 = tcmod->args[3] + shaderstate.curtime * tcmod->args[4];
t1 = FTABLE_EVALUATE(table, t2) * tcmod->args[2] + tcmod->args[1];
t1 = t1 ? 1.0f / t1 : 1.0f;
t2 = 0.5f - 0.5f * t1;
for (j = 0; j < cnt; j++, dst+=2,src+=2)
{
dst[0] = src[0] * t1 + t2;
dst[1] = src[1] * t1 + t2;
}
break;
case SHADER_TCMOD_SCROLL:
t1 = tcmod->args[0] * shaderstate.curtime;
t2 = tcmod->args[1] * shaderstate.curtime;
for (j = 0; j < cnt; j++, dst += 2, src+=2)
{
dst[0] = src[0] + t1;
dst[1] = src[1] + t2;
}
break;
case SHADER_TCMOD_TRANSFORM:
for (j = 0; j < cnt; j++, dst+=2, src+=2)
{
t1 = src[0];
t2 = src[1];
dst[0] = t1 * tcmod->args[0] + t2 * tcmod->args[2] + tcmod->args[4];
dst[1] = t1 * tcmod->args[1] + t1 * tcmod->args[3] + tcmod->args[5];
}
break;
case SHADER_TCMOD_PAGE:
default:
for (j = 0; j < cnt; j++, dst += 2, src+=2)
{
dst[0] = src[0];
dst[1] = src[1];
}
break;
}
}
static void BE_GenerateTCMods(const shaderpass_t *pass, float *dest)
{
mesh_t *mesh;
unsigned int mno;
int i;
float *src;
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
mesh = shaderstate.meshlist[mno];
src = tcgen(pass, mesh->numvertexes, dest, mesh);
//tcgen might return unmodified info
if (pass->numtcmods)
{
tcmod(&pass->tcmods[0], mesh->numvertexes, src, dest, mesh);
for (i = 1; i < pass->numtcmods; i++)
{
tcmod(&pass->tcmods[i], mesh->numvertexes, dest, dest, mesh);
}
}
else if (src != dest)
{
memcpy(dest, src, sizeof(vec2_t)*mesh->numvertexes);
}
dest += mesh->numvertexes*2;
}
}
//end texture coords
/*******************************************************************************************************************/
static void deformgen(const deformv_t *deformv, int cnt, vecV_t *src, vecV_t *dst, const mesh_t *mesh)
{
float *table;
int j, k;
float args[4];
float deflect;
switch (deformv->type)
{
default:
case DEFORMV_NONE:
if (src != dst)
memcpy(dst, src, sizeof(*src)*cnt);
break;
case DEFORMV_WAVE:
if (!mesh->normals_array)
{
if (src != dst)
memcpy(dst, src, sizeof(*src)*cnt);
return;
}
args[0] = deformv->func.args[0];
args[1] = deformv->func.args[1];
args[3] = deformv->func.args[2] + deformv->func.args[3] * shaderstate.curtime;
table = FTableForFunc(deformv->func.type);
for ( j = 0; j < cnt; j++ )
{
deflect = deformv->args[0] * (src[j][0]+src[j][1]+src[j][2]) + args[3];
deflect = FTABLE_EVALUATE(table, deflect) * args[1] + args[0];
// Deflect vertex along its normal by wave amount
VectorMA(src[j], deflect, mesh->normals_array[j], dst[j]);
}
break;
case DEFORMV_NORMAL:
//normal does not actually move the verts, but it does change the normals array
//we don't currently support that.
if (src != dst)
memcpy(dst, src, sizeof(*src)*cnt);
/*
args[0] = deformv->args[1] * shaderstate.curtime;
for ( j = 0; j < cnt; j++ )
{
args[1] = normalsArray[j][2] * args[0];
deflect = deformv->args[0] * R_FastSin(args[1]);
normalsArray[j][0] *= deflect;
deflect = deformv->args[0] * R_FastSin(args[1] + 0.25);
normalsArray[j][1] *= deflect;
VectorNormalizeFast(normalsArray[j]);
}
*/ break;
case DEFORMV_MOVE:
table = FTableForFunc(deformv->func.type);
deflect = deformv->func.args[2] + shaderstate.curtime * deformv->func.args[3];
deflect = FTABLE_EVALUATE(table, deflect) * deformv->func.args[1] + deformv->func.args[0];
for ( j = 0; j < cnt; j++ )
VectorMA(src[j], deflect, deformv->args, dst[j]);
break;
case DEFORMV_BULGE:
args[0] = deformv->args[0]/(2*M_PI);
args[1] = deformv->args[1];
args[2] = shaderstate.curtime * deformv->args[2]/(2*M_PI);
for (j = 0; j < cnt; j++)
{
deflect = R_FastSin(mesh->st_array[j][0]*args[0] + args[2])*args[1];
dst[j][0] = src[j][0]+deflect*mesh->normals_array[j][0];
dst[j][1] = src[j][1]+deflect*mesh->normals_array[j][1];
dst[j][2] = src[j][2]+deflect*mesh->normals_array[j][2];
}
break;
case DEFORMV_AUTOSPRITE:
if (mesh->numindexes < 6)
break;
for (j = 0; j < cnt-3; j+=4, src+=4, dst+=4)
{
vec3_t mid, d;
float radius;
mid[0] = 0.25*(src[0][0] + src[1][0] + src[2][0] + src[3][0]);
mid[1] = 0.25*(src[0][1] + src[1][1] + src[2][1] + src[3][1]);
mid[2] = 0.25*(src[0][2] + src[1][2] + src[2][2] + src[3][2]);
VectorSubtract(src[0], mid, d);
radius = 2*VectorLength(d);
for (k = 0; k < 4; k++)
{
dst[k][0] = mid[0] + radius*((mesh->st_array[j+k][0]-0.5)*r_refdef.m_view[0+0]-(mesh->st_array[j+k][1]-0.5)*r_refdef.m_view[0+1]);
dst[k][1] = mid[1] + radius*((mesh->st_array[j+k][0]-0.5)*r_refdef.m_view[4+0]-(mesh->st_array[j+k][1]-0.5)*r_refdef.m_view[4+1]);
dst[k][2] = mid[2] + radius*((mesh->st_array[j+k][0]-0.5)*r_refdef.m_view[8+0]-(mesh->st_array[j+k][1]-0.5)*r_refdef.m_view[8+1]);
}
}
break;
case DEFORMV_AUTOSPRITE2:
if (mesh->numindexes < 6)
break;
for (k = 0; k < mesh->numindexes; k += 6)
{
int long_axis, short_axis;
vec3_t axis;
float len[3];
mat3_t m0, m1, m2, result;
float *quad[4];
vec3_t rot_centre, tv, tv2;
quad[0] = (float *)(src + mesh->indexes[k+0]);
quad[1] = (float *)(src + mesh->indexes[k+1]);
quad[2] = (float *)(src + mesh->indexes[k+2]);
for (j = 2; j >= 0; j--)
{
quad[3] = (float *)(src + mesh->indexes[k+3+j]);
if (!VectorEquals (quad[3], quad[0]) &&
!VectorEquals (quad[3], quad[1]) &&
!VectorEquals (quad[3], quad[2]))
{
break;
}
}
// build a matrix were the longest axis of the billboard is the Y-Axis
VectorSubtract(quad[1], quad[0], m0[0]);
VectorSubtract(quad[2], quad[0], m0[1]);
VectorSubtract(quad[2], quad[1], m0[2]);
len[0] = DotProduct(m0[0], m0[0]);
len[1] = DotProduct(m0[1], m0[1]);
len[2] = DotProduct(m0[2], m0[2]);
if ((len[2] > len[1]) && (len[2] > len[0]))
{
if (len[1] > len[0])
{
long_axis = 1;
short_axis = 0;
}
else
{
long_axis = 0;
short_axis = 1;
}
}
else if ((len[1] > len[2]) && (len[1] > len[0]))
{
if (len[2] > len[0])
{
long_axis = 2;
short_axis = 0;
}
else
{
long_axis = 0;
short_axis = 2;
}
}
else //if ( (len[0] > len[1]) && (len[0] > len[2]) )
{
if (len[2] > len[1])
{
long_axis = 2;
short_axis = 1;
}
else
{
long_axis = 1;
short_axis = 2;
}
}
if (DotProduct(m0[long_axis], m0[short_axis]))
{
VectorNormalize2(m0[long_axis], axis);
VectorCopy(axis, m0[1]);
if (axis[0] || axis[1])
{
VectorVectors(m0[1], m0[2], m0[0]);
}
else
{
VectorVectors(m0[1], m0[0], m0[2]);
}
}
else
{
VectorNormalize2(m0[long_axis], axis);
VectorNormalize2(m0[short_axis], m0[0]);
VectorCopy(axis, m0[1]);
CrossProduct(m0[0], m0[1], m0[2]);
}
for (j = 0; j < 3; j++)
rot_centre[j] = (quad[0][j] + quad[1][j] + quad[2][j] + quad[3][j]) * 0.25;
if (shaderstate.curentity)
{
VectorAdd(shaderstate.curentity->origin, rot_centre, tv);
}
else
{
VectorCopy(rot_centre, tv);
}
VectorSubtract(r_origin, tv, tv);
// filter any longest-axis-parts off the camera-direction
deflect = -DotProduct(tv, axis);
VectorMA(tv, deflect, axis, m1[2]);
VectorNormalizeFast(m1[2]);
VectorCopy(axis, m1[1]);
CrossProduct(m1[1], m1[2], m1[0]);
Matrix3_Transpose(m1, m2);
Matrix3_Multiply(m2, m0, result);
for (j = 0; j < 4; j++)
{
int v = ((vecV_t*)quad[j]-src);
VectorSubtract(quad[j], rot_centre, tv);
Matrix3_Multiply_Vec3((void *)result, tv, tv2);
VectorAdd(rot_centre, tv2, dst[v]);
}
}
break;
// case DEFORMV_PROJECTION_SHADOW:
// break;
}
}
static void BE_CreatePipeline(program_t *p, unsigned int shaderflags, unsigned int blendflags, unsigned int permu)
{
struct pipeline_s *pipe;
VkDynamicState dynamicStateEnables[VK_DYNAMIC_STATE_RANGE_SIZE]={0};
VkPipelineDynamicStateCreateInfo dyn = {VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO};
VkVertexInputBindingDescription vbinds[VK_BUFF_MAX] = {{0}};
VkVertexInputAttributeDescription vattrs[VK_BUFF_MAX] = {{0}};
VkPipelineVertexInputStateCreateInfo vi = {VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO};
VkPipelineInputAssemblyStateCreateInfo ia = {VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO};
VkPipelineViewportStateCreateInfo vp = {VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO};
VkPipelineRasterizationStateCreateInfo rs = {VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO};
VkPipelineMultisampleStateCreateInfo ms = {VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO};
VkPipelineDepthStencilStateCreateInfo ds = {VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO};
VkPipelineColorBlendStateCreateInfo cb = {VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO};
VkPipelineColorBlendAttachmentState att_state[1];
VkGraphicsPipelineCreateInfo pipeCreateInfo = {VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO};
VkPipelineShaderStageCreateInfo shaderStages[2] = {{0}};
VkPipelineRasterizationStateRasterizationOrderAMD ro = {VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD}; //long enough names for you?
struct specdata_s
{
int alphamode;
int permu[16];
union
{
float f;
int i;
} cvars[64];
} specdata;
VkSpecializationMapEntry specentries[256] = {{0}};
VkSpecializationInfo specInfo = {0}, *bugsbeware;
VkResult err;
uint32_t i, s;
unsigned char *cvardata;
if (!p->vert || !p->frag)
Sys_Error("program missing required shader\n"); //PANIC
pipe = Z_Malloc(sizeof(*pipe));
if (!p->pipelines)
p->pipelines = pipe;
else
{ //insert at end. if it took us a while to realise that we needed it, chances are its not that common.
//so don't cause the other pipelines to waste cycles for it.
struct pipeline_s *prev;
for (prev = p->pipelines; ; prev = prev->next)
if (!prev->next)
break;
prev->next = pipe;
}
pipe->flags = shaderflags;
pipe->blendbits = blendflags;
pipe->permu = permu;
if (permu&PERMUTATION_BEM_WIREFRAME)
{
blendflags |= SBITS_MISC_NODEPTHTEST;
blendflags &= ~SBITS_MISC_DEPTHWRITE;
blendflags &= ~(SHADER_CULL_FRONT|SHADER_CULL_BACK);
}
dyn.flags = 0;
dyn.dynamicStateCount = 0;
dyn.pDynamicStates = dynamicStateEnables;
//it wasn't supposed to be like this!
//this stuff gets messy with tcmods and rgbgen/alphagen stuff
vbinds[VK_BUFF_POS].binding = VK_BUFF_POS;
vbinds[VK_BUFF_POS].stride = sizeof(vecV_t);
vbinds[VK_BUFF_POS].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vattrs[VK_BUFF_POS].binding = vbinds[VK_BUFF_POS].binding;
vattrs[VK_BUFF_POS].location = VK_BUFF_POS;
vattrs[VK_BUFF_POS].format = VK_FORMAT_R32G32B32_SFLOAT;
vattrs[VK_BUFF_POS].offset = 0;
vbinds[VK_BUFF_TC].binding = VK_BUFF_TC;
vbinds[VK_BUFF_TC].stride = sizeof(vec2_t);
vbinds[VK_BUFF_TC].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vattrs[VK_BUFF_TC].binding = vbinds[VK_BUFF_TC].binding;
vattrs[VK_BUFF_TC].location = VK_BUFF_TC;
vattrs[VK_BUFF_TC].format = VK_FORMAT_R32G32_SFLOAT;
vattrs[VK_BUFF_TC].offset = 0;
vbinds[VK_BUFF_COL].binding = VK_BUFF_COL;
vbinds[VK_BUFF_COL].stride = sizeof(vec4_t);
vbinds[VK_BUFF_COL].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vattrs[VK_BUFF_COL].binding = vbinds[VK_BUFF_COL].binding;
vattrs[VK_BUFF_COL].location = VK_BUFF_COL;
vattrs[VK_BUFF_COL].format = VK_FORMAT_R32G32B32A32_SFLOAT;
vattrs[VK_BUFF_COL].offset = 0;
vbinds[VK_BUFF_LMTC].binding = VK_BUFF_LMTC;
vbinds[VK_BUFF_LMTC].stride = sizeof(vec2_t);
vbinds[VK_BUFF_LMTC].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vattrs[VK_BUFF_LMTC].binding = vbinds[VK_BUFF_LMTC].binding;
vattrs[VK_BUFF_LMTC].location = VK_BUFF_LMTC;
vattrs[VK_BUFF_LMTC].format = VK_FORMAT_R32G32_SFLOAT;
vattrs[VK_BUFF_LMTC].offset = 0;
//fixme: in all seriousness, why is this not a single buffer?
vbinds[VK_BUFF_NORM].binding = VK_BUFF_NORM;
vbinds[VK_BUFF_NORM].stride = sizeof(vec3_t);
vbinds[VK_BUFF_NORM].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vattrs[VK_BUFF_NORM].binding = vbinds[VK_BUFF_NORM].binding;
vattrs[VK_BUFF_NORM].location = VK_BUFF_NORM;
vattrs[VK_BUFF_NORM].format = VK_FORMAT_R32G32B32_SFLOAT;
vattrs[VK_BUFF_NORM].offset = 0;
vbinds[VK_BUFF_SDIR].binding = VK_BUFF_SDIR;
vbinds[VK_BUFF_SDIR].stride = sizeof(vec3_t);
vbinds[VK_BUFF_SDIR].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vattrs[VK_BUFF_SDIR].binding = vbinds[VK_BUFF_SDIR].binding;
vattrs[VK_BUFF_SDIR].location = VK_BUFF_SDIR;
vattrs[VK_BUFF_SDIR].format = VK_FORMAT_R32G32B32_SFLOAT;
vattrs[VK_BUFF_SDIR].offset = 0;
vbinds[VK_BUFF_TDIR].binding = VK_BUFF_TDIR;
vbinds[VK_BUFF_TDIR].stride = sizeof(vec3_t);
vbinds[VK_BUFF_TDIR].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
vattrs[VK_BUFF_TDIR].binding = vbinds[VK_BUFF_TDIR].binding;
vattrs[VK_BUFF_TDIR].location = VK_BUFF_TDIR;
vattrs[VK_BUFF_TDIR].format = VK_FORMAT_R32G32B32_SFLOAT;
vattrs[VK_BUFF_TDIR].offset = 0;
vi.vertexBindingDescriptionCount = countof(vbinds);
vi.pVertexBindingDescriptions = vbinds;
vi.vertexAttributeDescriptionCount = countof(vattrs);
vi.pVertexAttributeDescriptions = vattrs;
ia.topology = (blendflags&SBITS_LINES)?VK_PRIMITIVE_TOPOLOGY_LINE_LIST:VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
vp.viewportCount = 1;
dynamicStateEnables[dyn.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
vp.scissorCount = 1;
dynamicStateEnables[dyn.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
//FIXME: fillModeNonSolid might mean mode_line is not supported.
rs.polygonMode = (permu&PERMUTATION_BEM_WIREFRAME)?VK_POLYGON_MODE_LINE:VK_POLYGON_MODE_FILL;
rs.lineWidth = 1;
rs.cullMode = ((shaderflags&SHADER_CULL_FRONT)?VK_CULL_MODE_FRONT_BIT:0) | ((shaderflags&SHADER_CULL_BACK)?VK_CULL_MODE_BACK_BIT:0);
rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
rs.depthClampEnable = VK_FALSE;
rs.rasterizerDiscardEnable = VK_FALSE;
if (shaderflags & SHADER_POLYGONOFFSET)
{
rs.depthBiasEnable = VK_TRUE;
rs.depthBiasConstantFactor = -25;//shader->polyoffset.unit;
rs.depthBiasClamp = 0;
rs.depthBiasSlopeFactor = -0.05;//shader->polyoffset.factor;
}
else
rs.depthBiasEnable = VK_FALSE;
if (vk.amd_rasterization_order)
{
unsigned int b = blendflags & SBITS_BLEND_BITS;
//we potentially allow a little z-fighting if they're equal. a single batch shouldn't really have such primitives.
//must be no blending, or additive blending.
switch(blendflags & SBITS_DEPTHFUNC_BITS)
{
case SBITS_DEPTHFUNC_EQUAL:
break;
default:
if ((blendflags&(SBITS_MISC_NODEPTHTEST|SBITS_MISC_DEPTHWRITE)) == SBITS_MISC_DEPTHWRITE &&
(!b || b == (SBITS_SRCBLEND_ONE|SBITS_DSTBLEND_ZERO) || b == SBITS_DSTBLEND_ONE))
{
rs.pNext = &ro;
ro.rasterizationOrder = VK_RASTERIZATION_ORDER_RELAXED_AMD;
}
}
}
ms.pSampleMask = NULL;
ms.rasterizationSamples = vk.multisamplebits;
// ms.sampleShadingEnable = VK_TRUE; //call the fragment shader multiple times, instead of just once per final pixel
// ms.minSampleShading = 0.25;
ds.depthTestEnable = (blendflags&SBITS_MISC_NODEPTHTEST)?VK_FALSE:VK_TRUE;
ds.depthWriteEnable = (blendflags&SBITS_MISC_DEPTHWRITE)?VK_TRUE:VK_FALSE;
switch(blendflags & SBITS_DEPTHFUNC_BITS)
{
default:
case SBITS_DEPTHFUNC_CLOSEREQUAL: ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL; break;
case SBITS_DEPTHFUNC_EQUAL: ds.depthCompareOp = VK_COMPARE_OP_EQUAL; break;
case SBITS_DEPTHFUNC_CLOSER: ds.depthCompareOp = VK_COMPARE_OP_LESS; break;
case SBITS_DEPTHFUNC_FURTHER: ds.depthCompareOp = VK_COMPARE_OP_GREATER; break;
}
ds.depthBoundsTestEnable = VK_FALSE;
ds.back.failOp = VK_STENCIL_OP_KEEP;
ds.back.passOp = VK_STENCIL_OP_KEEP;
ds.back.compareOp = VK_COMPARE_OP_NEVER;//VK_COMPARE_OP_ALWAYS;
ds.stencilTestEnable = VK_FALSE;
ds.front = ds.back;
memset(att_state, 0, sizeof(att_state));
att_state[0].colorWriteMask =
((blendflags&SBITS_MASK_RED)?0:VK_COLOR_COMPONENT_R_BIT) |
((blendflags&SBITS_MASK_GREEN)?0:VK_COLOR_COMPONENT_G_BIT) |
((blendflags&SBITS_MASK_BLUE)?0:VK_COLOR_COMPONENT_B_BIT) |
((blendflags&SBITS_MASK_ALPHA)?0:VK_COLOR_COMPONENT_A_BIT);
if ((blendflags & SBITS_BLEND_BITS) && (blendflags & SBITS_BLEND_BITS)!=(SBITS_SRCBLEND_ONE|SBITS_DSTBLEND_ZERO))
{
switch(blendflags & SBITS_SRCBLEND_BITS)
{
case SBITS_SRCBLEND_ZERO: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_ZERO; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; break;
case SBITS_SRCBLEND_ONE: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE; break;
case SBITS_SRCBLEND_DST_COLOR: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_DST_COLOR; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_DST_ALPHA; break;
case SBITS_SRCBLEND_ONE_MINUS_DST_COLOR: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; break;
case SBITS_SRCBLEND_SRC_ALPHA: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; break;
case SBITS_SRCBLEND_ONE_MINUS_SRC_ALPHA: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; break;
case SBITS_SRCBLEND_DST_ALPHA: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_DST_ALPHA; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_DST_ALPHA; break;
case SBITS_SRCBLEND_ONE_MINUS_DST_ALPHA: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; break;
case SBITS_SRCBLEND_ALPHA_SATURATE: att_state[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; att_state[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA_SATURATE; break;
default: Sys_Error("Bad shader blend src\n"); return;
}
switch(blendflags & SBITS_DSTBLEND_BITS)
{
case SBITS_DSTBLEND_ZERO: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO; break;
case SBITS_DSTBLEND_ONE: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE; break;
case SBITS_DSTBLEND_SRC_ALPHA: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; break;
case SBITS_DSTBLEND_ONE_MINUS_SRC_ALPHA: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; break;
case SBITS_DSTBLEND_DST_ALPHA: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_DST_ALPHA; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_DST_ALPHA; break;
case SBITS_DSTBLEND_ONE_MINUS_DST_ALPHA: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA; break;
case SBITS_DSTBLEND_SRC_COLOR: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_SRC_COLOR; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; break;
case SBITS_DSTBLEND_ONE_MINUS_SRC_COLOR: att_state[0].dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR; att_state[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; break;
default: Sys_Error("Bad shader blend dst\n"); return;
}
att_state[0].colorBlendOp = VK_BLEND_OP_ADD;
att_state[0].alphaBlendOp = VK_BLEND_OP_ADD;
att_state[0].blendEnable = VK_TRUE;
}
else
{
att_state[0].blendEnable = VK_FALSE;
}
if (permu&PERMUTATION_BEM_DEPTHONLY)
cb.attachmentCount = 0;
else
cb.attachmentCount = 1;
cb.pAttachments = att_state;
s = 0;
specentries[s].constantID = 0;
specentries[s].offset = offsetof(struct specdata_s, alphamode);
specentries[s].size = sizeof(specdata.alphamode);
s++;
if (blendflags & SBITS_ATEST_GE128)
specdata.alphamode = 3;
else if (blendflags & SBITS_ATEST_GT0)
specdata.alphamode = 2;
else if (blendflags & SBITS_ATEST_LT128)
specdata.alphamode = 1;
else //if (blendflags & SBITS_ATEST_NONE)
specdata.alphamode = 0;
for (i = 0; i < countof(specdata.permu); i++)
{
specentries[s].constantID = 16+i;
specentries[s].offset = offsetof(struct specdata_s, permu[i]);
specentries[s].size = sizeof(specdata.permu[i]);
s++;
specdata.permu[i] = !!(permu & (1u<<i));
}
//cvars
for (cvardata = p->cvardata, i = 0; cvardata < p->cvardata + p->cvardatasize; )
{
unsigned short id = (cvardata[0]<<8)|cvardata[1];
unsigned char type = cvardata[2], size = cvardata[3]-'0';
char *name;
cvar_t *var;
unsigned int u;
cvardata += 4;
name = cvardata;
cvardata += strlen(name)+1;
if (i + size > countof(specdata.cvars))
break; //error
if (type >= 'A' && type <= 'Z')
{ //args will be handled by the blob loader.
for (u = 0; u < size && u < 4; u++)
{
specentries[s].constantID = id;
specentries[s].offset = offsetof(struct specdata_s, cvars[i]);
specentries[s].size = sizeof(specdata.cvars[i]);
specdata.cvars[i].i = (cvardata[u*4+0]<<24)|(cvardata[u*4+1]<<16)|(cvardata[u*4+2]<<8)|(cvardata[u*4+3]<<0);
s++;
i++;
id++;
}
}
else
{
var = Cvar_FindVar(name);
if (var)
{
for (u = 0; u < size && u < 4; u++)
{
specentries[s].constantID = id;
specentries[s].offset = offsetof(struct specdata_s, cvars[i]);
specentries[s].size = sizeof(specdata.cvars[i]);
if (type == 'i')
specdata.cvars[i].i = var->ival;
else
specdata.cvars[i].f = var->vec4[u];
s++;
i++;
id++;
}
}
}
cvardata += 4*size;
}
specInfo.mapEntryCount = s;
specInfo.pMapEntries = specentries;
specInfo.dataSize = sizeof(specdata);
specInfo.pData = &specdata;
#if 0//def _DEBUG
//vk_layer_lunarg_drawstate fucks up and pokes invalid bits of stack.
bugsbeware = Z_Malloc(sizeof(*bugsbeware) + sizeof(*specentries)*s + sizeof(specdata));
*bugsbeware = specInfo;
bugsbeware->pData = bugsbeware+1;
bugsbeware->pMapEntries = (VkSpecializationMapEntry*)((char*)bugsbeware->pData + specInfo.dataSize);
memcpy((void*)bugsbeware->pData, specInfo.pData, specInfo.dataSize);
memcpy((void*)bugsbeware->pMapEntries, specInfo.pMapEntries, sizeof(*specInfo.pMapEntries)*specInfo.mapEntryCount);
#else
bugsbeware = &specInfo;
#endif
//fixme: add more specialisations for custom cvars (yes, this'll flush+reload pipelines if they're changed)
//fixme: add specialisations for permutations I guess
//fixme: add geometry+tesselation support. because we can.
shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
shaderStages[0].module = p->vert;
shaderStages[0].pName = "main";
shaderStages[0].pSpecializationInfo = bugsbeware;
shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
shaderStages[1].module = p->frag;
shaderStages[1].pName = "main";
shaderStages[1].pSpecializationInfo = bugsbeware;
pipeCreateInfo.flags = 0;
pipeCreateInfo.stageCount = countof(shaderStages);
pipeCreateInfo.pStages = shaderStages;
pipeCreateInfo.pVertexInputState = &vi;
pipeCreateInfo.pInputAssemblyState = &ia;
pipeCreateInfo.pTessellationState = NULL; //null is okay!
pipeCreateInfo.pViewportState = &vp;
pipeCreateInfo.pRasterizationState = &rs;
pipeCreateInfo.pMultisampleState = &ms;
pipeCreateInfo.pDepthStencilState = &ds;
pipeCreateInfo.pColorBlendState = &cb;
pipeCreateInfo.pDynamicState = &dyn;
pipeCreateInfo.layout = p->layout;
pipeCreateInfo.renderPass = (permu&PERMUTATION_BEM_DEPTHONLY)?vk.shadow_renderpass:vk.renderpass[0];
pipeCreateInfo.subpass = 0;
pipeCreateInfo.basePipelineHandle = VK_NULL_HANDLE;
pipeCreateInfo.basePipelineIndex = -1; //used to create derivatives for pipelines created in the same call.
// pipeCreateInfo.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT;
err = vkCreateGraphicsPipelines(vk.device, vk.pipelinecache, 1, &pipeCreateInfo, vkallocationcb, &pipe->pipeline);
if (err)
{ //valid err values are VK_ERROR_OUT_OF_HOST_MEMORY, VK_ERROR_OUT_OF_DEVICE_MEMORY, VK_ERROR_INVALID_SHADER_NV
//VK_INCOMPLETE is a Qualcom bug with certain spirv-opt optimisations.
shaderstate.rc.activepipeline = VK_NULL_HANDLE;
if (err != VK_ERROR_INVALID_SHADER_NV)
Sys_Error("%s creating pipeline %s for material %s. Check spir-v modules / drivers.\n", VK_VKErrorToString(err), p->name, shaderstate.curshader->name);
else
Con_Printf("Error creating pipeline %s for material %s. Check glsl / spir-v modules / drivers.\n", p->name, shaderstate.curshader->name);
return;
}
vkCmdBindPipeline(vk.rendertarg->cbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, shaderstate.rc.activepipeline=pipe->pipeline);
}
static void BE_BindPipeline(program_t *p, unsigned int shaderflags, unsigned int blendflags, unsigned int permu)
{
struct pipeline_s *pipe;
blendflags &= 0
| SBITS_SRCBLEND_BITS | SBITS_DSTBLEND_BITS | SBITS_MASK_BITS | SBITS_ATEST_BITS
| SBITS_MISC_DEPTHWRITE | SBITS_MISC_NODEPTHTEST | SBITS_DEPTHFUNC_BITS
| SBITS_LINES
;
shaderflags &= 0
| SHADER_CULL_FRONT | SHADER_CULL_BACK
| SHADER_POLYGONOFFSET
;
permu |= shaderstate.modepermutation;
if (shaderflags & (SHADER_CULL_FRONT | SHADER_CULL_BACK))
shaderflags ^= r_refdef.flipcull;
for (pipe = p->pipelines; pipe; pipe = pipe->next)
{
if (pipe->flags == shaderflags)
if (pipe->blendbits == blendflags)
if (pipe->permu == permu)
{
if (pipe->pipeline != shaderstate.rc.activepipeline)
{
shaderstate.rc.activepipeline = pipe->pipeline;
if (shaderstate.rc.activepipeline)
vkCmdBindPipeline(vk.rendertarg->cbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, shaderstate.rc.activepipeline);
}
return;
}
}
//oh look. we need to build an entirely new pipeline object. hurrah... not.
//split into a different function because of abusive stack combined with windows stack probes.
BE_CreatePipeline(p, shaderflags, blendflags, permu);
}
static void BE_SetupTextureDescriptor(texid_t tex, texid_t fallbacktex, VkDescriptorSet set, VkWriteDescriptorSet *firstdesc, VkWriteDescriptorSet *desc, VkDescriptorImageInfo *img)
{
if (!tex || !tex->vkimage)
tex = fallbacktex;
desc->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
desc->pNext = NULL;
desc->dstSet = set;
desc->dstBinding = desc-firstdesc;
desc->dstArrayElement = 0;
desc->descriptorCount = 1;
desc->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
img->imageLayout = tex->vkimage->layout;
img->imageView = tex->vkimage->view;
img->sampler = tex->vkimage->sampler;
desc->pImageInfo = img;
desc->pBufferInfo = NULL;
desc->pTexelBufferView = NULL;
}
static void BE_SetupUBODescriptor(VkDescriptorSet set, VkWriteDescriptorSet *firstdesc, VkWriteDescriptorSet *desc, VkDescriptorBufferInfo *info)
{
desc->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
desc->pNext = NULL;
desc->dstSet = set;
desc->dstBinding = desc-firstdesc;
desc->dstArrayElement = 0;
desc->descriptorCount = 1;
desc->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
desc->pImageInfo = NULL;
desc->pBufferInfo = info;
desc->pTexelBufferView = NULL;
}
static qboolean BE_SetupMeshProgram(program_t *p, shaderpass_t *pass, unsigned int shaderbits, unsigned int idxcount)
{
int perm = 0;
if (!p)
return false;
if (TEXLOADED(shaderstate.curtexnums->bump))
perm |= PERMUTATION_BUMPMAP;
if (TEXLOADED(shaderstate.curtexnums->fullbright))
perm |= PERMUTATION_FULLBRIGHT;
if (TEXLOADED(shaderstate.curtexnums->upperoverlay) || TEXLOADED(shaderstate.curtexnums->loweroverlay))
perm |= PERMUTATION_UPPERLOWER;
if (TEXLOADED(shaderstate.curtexnums->reflectcube) || TEXLOADED(shaderstate.curtexnums->reflectmask))
perm |= PERMUTATION_REFLECTCUBEMASK;
if (r_refdef.globalfog.density)
perm |= PERMUTATION_FOG;
// if (r_glsl_offsetmapping.ival && TEXLOADED(shaderstate.curtexnums->bump))
// perm |= PERMUTATION_OFFSET;
perm &= p->supportedpermutations;
BE_BindPipeline(p, shaderbits, VKBE_ApplyShaderBits(pass->shaderbits), perm);
if (!shaderstate.rc.activepipeline)
return false; //err, something bad happened.
//most gpus will have a fairly low descriptor set limit of 4 (this is the minimum required)
//that isn't enough for all our textures, so we need to make stuff up as required.
{
VkDescriptorSet set = shaderstate.rc.descriptorsets[0] = vk.khr_push_descriptor?VK_NULL_HANDLE:VKBE_TempDescriptorSet(p->desclayout);
VkWriteDescriptorSet descs[MAX_TMUS], *desc = descs;
VkDescriptorImageInfo imgs[MAX_TMUS], *img = imgs;
unsigned int i;
//why do I keep wanting to write 'desk'? its quite annoying.
//light / scene
BE_SetupUBODescriptor(set, descs, desc++, &shaderstate.ubo_entity);
BE_SetupUBODescriptor(set, descs, desc++, &shaderstate.ubo_light);
if (p->defaulttextures & (1u<<0))
BE_SetupTextureDescriptor(shaderstate.currentshadowmap, r_whiteimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<1))
BE_SetupTextureDescriptor(shaderstate.curdlight?shaderstate.curdlight->cubetexture:r_nulltex, r_whiteimage, set, descs, desc++, img++);
//material
if (p->defaulttextures & (1u<<2))
BE_SetupTextureDescriptor(shaderstate.curtexnums->base, r_blackimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<3))
BE_SetupTextureDescriptor(shaderstate.curtexnums->bump, missing_texture_normal, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<4))
BE_SetupTextureDescriptor(shaderstate.curtexnums->specular, missing_texture_gloss, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<5))
BE_SetupTextureDescriptor(shaderstate.curtexnums->upperoverlay, r_blackimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<6))
BE_SetupTextureDescriptor(shaderstate.curtexnums->loweroverlay, r_blackimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<7))
BE_SetupTextureDescriptor(shaderstate.curtexnums->fullbright, r_blackimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<8))
BE_SetupTextureDescriptor(shaderstate.curtexnums->paletted, r_blackimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<9))
BE_SetupTextureDescriptor(shaderstate.curtexnums->reflectcube, r_blackimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<10))
BE_SetupTextureDescriptor(shaderstate.curtexnums->reflectmask, r_whiteimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<11))
BE_SetupTextureDescriptor(shaderstate.curtexnums->displacement, r_whiteimage, set, descs, desc++, img++);
//batch
if (p->defaulttextures & (1u<<12))
{
unsigned int lmi = shaderstate.curbatch->lightmap[0];
BE_SetupTextureDescriptor((lmi<numlightmaps)?lightmap[lmi]->lightmap_texture:NULL, r_whiteimage, set, descs, desc++, img++);
}
if (p->defaulttextures & (1u<<13))
{
texid_t delux = NULL;
unsigned int lmi = shaderstate.curbatch->lightmap[0];
if (lmi<numlightmaps && lightmap[lmi]->hasdeluxe)
delux = lightmap[lmi+1]->lightmap_texture;
BE_SetupTextureDescriptor(delux, r_whiteimage, set, descs, desc++, img++);
}
#if MAXRLIGHTMAPS > 1
if (p->defaulttextures & ((1u<<14)|(1u<<15)|(1u<<16)))
{
int lmi = shaderstate.curbatch->lightmap[1];
BE_SetupTextureDescriptor((lmi<numlightmaps)?lightmap[lmi]->lightmap_texture:NULL, r_whiteimage, set, descs, desc++, img++);
lmi = shaderstate.curbatch->lightmap[2];
BE_SetupTextureDescriptor((lmi<numlightmaps)?lightmap[lmi]->lightmap_texture:NULL, r_whiteimage, set, descs, desc++, img++);
lmi = shaderstate.curbatch->lightmap[3];
BE_SetupTextureDescriptor((lmi<numlightmaps)?lightmap[lmi]->lightmap_texture:NULL, r_whiteimage, set, descs, desc++, img++);
}
if (p->defaulttextures & ((1u<<17)|(1u<<18)|(1u<<19)))
{
int lmi = shaderstate.curbatch->lightmap[1];
if (lmi<numlightmaps && lightmap[lmi]->hasdeluxe)
{
BE_SetupTextureDescriptor((lmi+1<numlightmaps)?lightmap[lmi+1]->lightmap_texture:NULL, r_whiteimage, set, descs, desc++, img++);
lmi = shaderstate.curbatch->lightmap[2];
BE_SetupTextureDescriptor((lmi+1<numlightmaps)?lightmap[lmi+1]->lightmap_texture:NULL, r_whiteimage, set, descs, desc++, img++);
lmi = shaderstate.curbatch->lightmap[3];
BE_SetupTextureDescriptor((lmi+1<numlightmaps)?lightmap[lmi+1]->lightmap_texture:NULL, r_whiteimage, set, descs, desc++, img++);
}
else
{
BE_SetupTextureDescriptor(NULL, r_whiteimage, set, descs, desc++, img++);
BE_SetupTextureDescriptor(NULL, r_whiteimage, set, descs, desc++, img++);
BE_SetupTextureDescriptor(NULL, r_whiteimage, set, descs, desc++, img++);
}
}
#endif
//shader / pass
for (i = 0; i < p->numsamplers; i++)
BE_SetupTextureDescriptor(SelectPassTexture(pass+i), r_blackimage, set, descs, desc++, img++);
if (!set)
vkCmdPushDescriptorSetKHR(vk.rendertarg->cbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, p->layout, 0, desc-descs, descs);
else
vkUpdateDescriptorSets(vk.device, desc-descs, descs, 0, NULL);
}
if (!vk.khr_push_descriptor)
vkCmdBindDescriptorSets(vk.rendertarg->cbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, p->layout, 0, countof(shaderstate.rc.descriptorsets), shaderstate.rc.descriptorsets, 0, NULL);
RQuantAdd(RQUANT_PRIMITIVEINDICIES, idxcount);
RQuantAdd(RQUANT_DRAWS, 1);
return true;
}
static void BE_DrawMeshChain_Internal(void)
{
shader_t *altshader;
unsigned int vertcount, idxcount, idxfirst;
mesh_t *m;
qboolean vblends; //software
// void *map;
// int i;
unsigned int mno;
unsigned int passno;
//extern cvar_t r_polygonoffset_submodel_factor;
// float pushdepth;
// float pushfactor;
//I wasn't going to do this... but gah.
VkBuffer vertexbuffers[VK_BUFF_MAX];
VkDeviceSize vertexoffsets[VK_BUFF_MAX];
altshader = shaderstate.curshader;
switch (shaderstate.mode)
{
case BEM_LIGHT:
altshader = shaderstate.shader_rtlight[shaderstate.curlmode];
break;
case BEM_DEPTHONLY:
altshader = shaderstate.curshader->bemoverrides[bemoverride_depthonly];
if (!altshader)
altshader = shaderstate.depthonly;
break;
case BEM_WIREFRAME:
altshader = R_RegisterShader("wireframe", SUF_NONE,
"{\n"
"{\n"
"map $whiteimage\n"
"}\n"
"}\n"
);
break;
default:
case BEM_STANDARD:
altshader = shaderstate.curshader;
break;
}
if (!altshader)
return;
if (shaderstate.forcebeflags & BEF_FORCENODEPTH)
{
RQuantAdd(RQUANT_2DBATCHES, 1);
}
else if (shaderstate.curentity == &r_worldentity)
{
RQuantAdd(RQUANT_WORLDBATCHES, 1);
}
else
{
RQuantAdd(RQUANT_ENTBATCHES, 1);
}
if (altshader->flags & SHADER_HASCURRENTRENDER)
T_Gen_CurrentRender(); //requires lots of pass-related work...
//if this flag is set, then we have to generate our own arrays. to avoid processing extra verticies this may require that we re-pack the verts
if (shaderstate.meshlist[0]->xyz2_array)// && !altshader->prog)
{
vblends = true;
shaderstate.batchvbo = NULL;
}
else
{
vblends = false;
if (altshader->flags & SHADER_NEEDSARRAYS)
shaderstate.batchvbo = NULL;
else if (shaderstate.curshader->numdeforms)
shaderstate.batchvbo = NULL;
}
/*index buffers are common to all passes*/
if (shaderstate.batchvbo)
{
/*however, we still want to try to avoid discontinuities, because that would otherwise be more draw calls. we can have gaps in verts though*/
if (shaderstate.nummeshes == 1)
{
m = shaderstate.meshlist[0];
vkCmdBindIndexBuffer(vk.rendertarg->cbuf, shaderstate.batchvbo->indicies.vk.buff, shaderstate.batchvbo->indicies.vk.offs, VK_INDEX_TYPE);
idxfirst = m->vbofirstelement;
vertcount = m->vbofirstvert + m->numvertexes;
idxcount = m->numindexes;
}
else if (0)//shaderstate.nummeshes == shaderstate.curbatch->maxmeshes)
{
idxfirst = 0;
vertcount = shaderstate.batchvbo->vertcount;
idxcount = shaderstate.batchvbo->indexcount;
vkCmdBindIndexBuffer(vk.rendertarg->cbuf, shaderstate.batchvbo->indicies.vk.buff, shaderstate.batchvbo->indicies.vk.offs, VK_INDEX_TYPE);
}
else
{
index_t *map;
VkBuffer buf;
unsigned int i;
VkDeviceSize offset;
vertcount = shaderstate.batchvbo->vertcount;
for (mno = 0, idxcount = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
idxcount += m->numindexes;
}
map = VKBE_AllocateBufferSpace(DB_EBO, idxcount * sizeof(*map), &buf, &offset);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
for (i = 0; i < m->numindexes; i++)
map[i] = m->indexes[i]+m->vbofirstvert;
map += m->numindexes;
}
vkCmdBindIndexBuffer(vk.rendertarg->cbuf, buf, offset, VK_INDEX_TYPE);
idxfirst = 0;
}
}
else
{ /*we're going to be using dynamic array stuff here, so generate an index array list that has no vertex gaps*/
index_t *map;
VkBuffer buf;
unsigned int i;
VkDeviceSize offset;
for (mno = 0, vertcount = 0, idxcount = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
vertcount += m->numvertexes;
idxcount += m->numindexes;
}
map = VKBE_AllocateBufferSpace(DB_EBO, idxcount * sizeof(*map), &buf, &offset);
for (mno = 0, vertcount = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
if (!vertcount)
memcpy(map, m->indexes, sizeof(index_t)*m->numindexes);
else
{
for (i = 0; i < m->numindexes; i++)
map[i] = m->indexes[i]+vertcount;
}
map += m->numindexes;
vertcount += m->numvertexes;
}
vkCmdBindIndexBuffer(vk.rendertarg->cbuf, buf, offset, VK_INDEX_TYPE);
idxfirst = 0;
}
/*vertex buffers are common to all passes*/
if (shaderstate.batchvbo && !vblends)
{
vertexbuffers[VK_BUFF_POS] = shaderstate.batchvbo->coord.vk.buff;
vertexoffsets[VK_BUFF_POS] = shaderstate.batchvbo->coord.vk.offs;
}
else
{
vecV_t *map;
const mesh_t *m;
unsigned int mno;
unsigned int i;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vecV_t), &vertexbuffers[VK_BUFF_POS], &vertexoffsets[VK_BUFF_POS]);
if (vblends)
{
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
const mesh_t *m = shaderstate.meshlist[mno];
vecV_t *ov = shaderstate.curshader->numdeforms?tmpbuf:map;
vecV_t *iv1 = m->xyz_array;
vecV_t *iv2 = m->xyz2_array;
float w1 = m->xyz_blendw[0];
float w2 = m->xyz_blendw[1];
for (i = 0; i < m->numvertexes; i++)
{
ov[i][0] = iv1[i][0]*w1 + iv2[i][0]*w2;
ov[i][1] = iv1[i][1]*w1 + iv2[i][1]*w2;
ov[i][2] = iv1[i][2]*w1 + iv2[i][2]*w2;
}
if (shaderstate.curshader->numdeforms)
{
for (i = 0; i < shaderstate.curshader->numdeforms-1; i++)
deformgen(&shaderstate.curshader->deforms[i], m->numvertexes, tmpbuf, tmpbuf, m);
deformgen(&shaderstate.curshader->deforms[i], m->numvertexes, tmpbuf, map, m);
}
map += m->numvertexes;
}
}
else if (shaderstate.curshader->numdeforms > 1)
{ //horrible code, because multiple deforms would otherwise READ from the gpu memory
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
deformgen(&shaderstate.curshader->deforms[0], m->numvertexes, m->xyz_array, tmpbuf, m);
for (i = 1; i < shaderstate.curshader->numdeforms-1; i++)
deformgen(&shaderstate.curshader->deforms[i], m->numvertexes, tmpbuf, tmpbuf, m);
deformgen(&shaderstate.curshader->deforms[i], m->numvertexes, tmpbuf, map, m);
map += m->numvertexes;
}
}
else
{
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
deformgen(&shaderstate.curshader->deforms[0], m->numvertexes, m->xyz_array, map, m);
map += m->numvertexes;
}
}
}
if (altshader->prog)
{
if (shaderstate.batchvbo)
{
vertexbuffers[VK_BUFF_COL] = shaderstate.batchvbo->colours[0].vk.buff;
vertexoffsets[VK_BUFF_COL] = shaderstate.batchvbo->colours[0].vk.offs;
vertexbuffers[VK_BUFF_TC] = shaderstate.batchvbo->texcoord.vk.buff;
vertexoffsets[VK_BUFF_TC] = shaderstate.batchvbo->texcoord.vk.offs;
vertexbuffers[VK_BUFF_LMTC]= shaderstate.batchvbo->lmcoord[0].vk.buff;
vertexoffsets[VK_BUFF_LMTC]= shaderstate.batchvbo->lmcoord[0].vk.offs;
vertexbuffers[VK_BUFF_NORM]= shaderstate.batchvbo->normals.vk.buff;
vertexoffsets[VK_BUFF_NORM]= shaderstate.batchvbo->normals.vk.offs;
vertexbuffers[VK_BUFF_SDIR]= shaderstate.batchvbo->svector.vk.buff;
vertexoffsets[VK_BUFF_SDIR]= shaderstate.batchvbo->svector.vk.offs;
vertexbuffers[VK_BUFF_TDIR]= shaderstate.batchvbo->tvector.vk.buff;
vertexoffsets[VK_BUFF_TDIR]= shaderstate.batchvbo->tvector.vk.offs;
if (!vertexbuffers[VK_BUFF_COL])
{
vertexbuffers[VK_BUFF_COL] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_COL] = 0;
}
if (!vertexbuffers[VK_BUFF_LMTC])
{
vertexbuffers[VK_BUFF_LMTC] = vertexbuffers[VK_BUFF_TC];
vertexoffsets[VK_BUFF_LMTC] = vertexoffsets[VK_BUFF_TC];
}
}
else
{
vec2_t *map;
vec2_t *lmmap;
const mesh_t *m;
unsigned int mno;
unsigned int i;
if (shaderstate.meshlist[0]->normals_array[0])
{
vec4_t *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_NORM], &vertexoffsets[VK_BUFF_NORM]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
memcpy(map, m->normals_array[0], sizeof(vec3_t)*m->numvertexes);
map += m->numvertexes;
}
}
else
{
vertexbuffers[VK_BUFF_NORM] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_NORM] = sizeof(vec4_t)*65536;
}
if (shaderstate.meshlist[0]->snormals_array[0])
{
vec4_t *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_SDIR], &vertexoffsets[VK_BUFF_SDIR]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
memcpy(map, m->snormals_array[0], sizeof(vec3_t)*m->numvertexes);
map += m->numvertexes;
}
}
else
{
vertexbuffers[VK_BUFF_SDIR] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_SDIR] = sizeof(vec4_t)*65536 + sizeof(vec3_t)*65536;
}
if (shaderstate.meshlist[0]->tnormals_array[0])
{
vec4_t *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_TDIR], &vertexoffsets[VK_BUFF_TDIR]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
memcpy(map, m->tnormals_array[0], sizeof(vec3_t)*m->numvertexes);
map += m->numvertexes;
}
}
else
{
vertexbuffers[VK_BUFF_TDIR] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_TDIR] = sizeof(vec4_t)*65536 + sizeof(vec3_t)*65536 + sizeof(vec3_t)*65536;
}
if (shaderstate.meshlist[0]->colors4f_array[0])
{
vec4_t *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
memcpy(map, m->colors4f_array[0], sizeof(vec4_t)*m->numvertexes);
map += m->numvertexes;
}
}
else if (shaderstate.meshlist[0]->colors4b_array)
{
vec4_t *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
for (i = 0; i < m->numvertexes; i++)
{
Vector4Scale(m->colors4b_array[i], (1/255.0), map[i]);
}
map += m->numvertexes;
}
}
else
{ //FIXME: use some predefined buffer
vec4_t *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
for (i = 0; i < vertcount; i++)
{
Vector4Set(map[i], 1, 1, 1, 1);
}
}
if (shaderstate.meshlist[0]->lmst_array[0])
{
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
lmmap = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_LMTC], &vertexoffsets[VK_BUFF_LMTC]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
memcpy(map, m->st_array, sizeof(vec2_t)*m->numvertexes);
memcpy(lmmap, m->lmst_array[0], sizeof(vec2_t)*m->numvertexes);
map += m->numvertexes;
lmmap += m->numvertexes;
}
}
else
{
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
memcpy(map, m->st_array, sizeof(*m->st_array)*m->numvertexes);
map += m->numvertexes;
}
vertexbuffers[VK_BUFF_LMTC] = vertexbuffers[VK_BUFF_TC];
vertexoffsets[VK_BUFF_LMTC] = vertexoffsets[VK_BUFF_TC];
}
}
vkCmdBindVertexBuffers(vk.rendertarg->cbuf, 0, VK_BUFF_MAX, vertexbuffers, vertexoffsets);
if (BE_SetupMeshProgram(altshader->prog, altshader->passes, altshader->flags, idxcount))
{
// vkCmdPushConstants(vk.rendertarg->cbuf, altshader->prog->layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(shaderstate.curtexnums->factors), shaderstate.curtexnums->factors);
vkCmdDrawIndexed(vk.rendertarg->cbuf, idxcount, 1, idxfirst, 0, 0);
}
}
else if (1)
{
shaderpass_t *p;
//Vulkan has no fixed function pipeline. we emulate it if we were given no spir-v to run.
for (passno = 0; passno < altshader->numpasses; passno += p->numMergedPasses)
{
p = &altshader->passes[passno];
if (p->texgen == T_GEN_UPPEROVERLAY && !TEXLOADED(shaderstate.curtexnums->upperoverlay))
continue;
if (p->texgen == T_GEN_LOWEROVERLAY && !TEXLOADED(shaderstate.curtexnums->loweroverlay))
continue;
if (p->texgen == T_GEN_FULLBRIGHT && !TEXLOADED(shaderstate.curtexnums->fullbright))
continue;
if (p->prog)
{
if (shaderstate.batchvbo)
{
vertexbuffers[VK_BUFF_TC] = shaderstate.batchvbo->texcoord.vk.buff;
vertexoffsets[VK_BUFF_TC] = shaderstate.batchvbo->texcoord.vk.offs;
vertexbuffers[VK_BUFF_LMTC] = shaderstate.batchvbo->lmcoord[0].vk.buff;
vertexoffsets[VK_BUFF_LMTC] = shaderstate.batchvbo->lmcoord[0].vk.offs;
}
else
{
float *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
BE_GenerateTCMods(p, map);
vertexbuffers[VK_BUFF_LMTC] = vertexbuffers[VK_BUFF_TC];
vertexoffsets[VK_BUFF_LMTC] = vertexoffsets[VK_BUFF_TC];
}
BE_GenerateColourMods(vertcount, p, &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
vertexbuffers[VK_BUFF_NORM] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_NORM] = sizeof(vec4_t)*65536;
vertexbuffers[VK_BUFF_SDIR] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_SDIR] = vertexoffsets[VK_BUFF_NORM] + sizeof(vec3_t)*65536;
vertexbuffers[VK_BUFF_TDIR] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_TDIR] = vertexoffsets[VK_BUFF_SDIR] + sizeof(vec3_t)*65536;
vkCmdBindVertexBuffers(vk.rendertarg->cbuf, 0, VK_BUFF_MAX, vertexbuffers, vertexoffsets);
if (BE_SetupMeshProgram(p->prog, p, altshader->flags, idxcount))
vkCmdDrawIndexed(vk.rendertarg->cbuf, idxcount, 1, idxfirst, 0, 0);
continue;
}
if (shaderstate.batchvbo)
{ //texcoords are all compatible with static arrays, supposedly
if (p->tcgen == TC_GEN_LIGHTMAP)
{
vertexbuffers[VK_BUFF_TC] = shaderstate.batchvbo->lmcoord[0].vk.buff;
vertexoffsets[VK_BUFF_TC] = shaderstate.batchvbo->lmcoord[0].vk.offs;
}
else if (p->tcgen == TC_GEN_BASE)
{
vertexbuffers[VK_BUFF_TC] = shaderstate.batchvbo->texcoord.vk.buff;
vertexoffsets[VK_BUFF_TC] = shaderstate.batchvbo->texcoord.vk.offs;
}
else
Sys_Error("tcgen %u not supported\n", p->tcgen);
}
else
{
float *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
BE_GenerateTCMods(p, map);
}
vertexbuffers[VK_BUFF_LMTC] = vertexbuffers[VK_BUFF_TC];
vertexoffsets[VK_BUFF_LMTC] = vertexoffsets[VK_BUFF_TC];
vertexbuffers[VK_BUFF_NORM] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_NORM] = sizeof(vec4_t)*65536;
vertexbuffers[VK_BUFF_SDIR] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_SDIR] = vertexoffsets[VK_BUFF_NORM] + sizeof(vec3_t)*65536;
vertexbuffers[VK_BUFF_TDIR] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_TDIR] = vertexoffsets[VK_BUFF_SDIR] + sizeof(vec3_t)*65536;
if (p->flags & SHADER_PASS_NOCOLORARRAY)
{
avec4_t passcolour;
static avec4_t fakesource = {1,1,1,1};
m = shaderstate.meshlist[0];
colourgen(p, 1, NULL, &fakesource, &passcolour, m);
alphagen(p, 1, NULL, &fakesource, &passcolour, m);
//make sure nothing bugs out... this should be pure white.
vertexbuffers[VK_BUFF_COL] = shaderstate.staticbuf;
vertexoffsets[VK_BUFF_COL] = 0;
vkCmdBindVertexBuffers(vk.rendertarg->cbuf, 0, VK_BUFF_MAX, vertexbuffers, vertexoffsets);
if (BE_SetupMeshProgram(shaderstate.programfixedemu[1], p, altshader->flags, idxcount))
{
vkCmdPushConstants(vk.rendertarg->cbuf, shaderstate.programfixedemu[1]->layout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(passcolour), passcolour);
vkCmdDrawIndexed(vk.rendertarg->cbuf, idxcount, 1, idxfirst, 0, 0);
}
}
else
{
BE_GenerateColourMods(vertcount, p, &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
vkCmdBindVertexBuffers(vk.rendertarg->cbuf, 0, VK_BUFF_MAX, vertexbuffers, vertexoffsets);
if (BE_SetupMeshProgram(shaderstate.programfixedemu[0], p, altshader->flags, idxcount))
vkCmdDrawIndexed(vk.rendertarg->cbuf, idxcount, 1, idxfirst, 0, 0);
}
}
}
}
void VKBE_SelectMode(backendmode_t mode)
{
shaderstate.mode = mode;
shaderstate.modepermutation = 0;
switch(mode)
{
default:
break;
case BEM_DEPTHONLY:
shaderstate.modepermutation |= PERMUTATION_BEM_DEPTHONLY;
break;
case BEM_WIREFRAME:
shaderstate.modepermutation |= PERMUTATION_BEM_WIREFRAME;
break;
case BEM_LIGHT:
//fixme: is this actually needed, or just a waste of time?
VKBE_SelectEntity(&r_worldentity);
break;
}
}
qboolean VKBE_GenerateRTLightShader(unsigned int lmode)
{
if (!shaderstate.shader_rtlight[lmode])
{
shaderstate.shader_rtlight[lmode] = R_RegisterShader(va("rtlight%s%s%s",
(lmode & LSHADER_SMAP)?"#PCF=1":"#PCF=0",
(lmode & LSHADER_SPOT)?"#SPOT=1":"#SPOT=0",
(lmode & LSHADER_CUBE)?"#CUBE=1":"#CUBE=0")
, SUF_NONE, LIGHTPASS_SHADER);
}
if (shaderstate.shader_rtlight[lmode]->flags & SHADER_NODRAW)
return false;
return true;
}
qboolean VKBE_SelectDLight(dlight_t *dl, vec3_t colour, vec3_t axis[3], unsigned int lmode)
{
if (dl && TEXLOADED(dl->cubetexture))
lmode |= LSHADER_CUBE;
if (!VKBE_GenerateRTLightShader(lmode))
{
lmode &= ~(LSHADER_SMAP|LSHADER_CUBE);
if (!VKBE_GenerateRTLightShader(lmode))
{
VKBE_SetupLightCBuffer(NULL, colour);
return false;
}
}
shaderstate.curdlight = dl;
shaderstate.curlmode = lmode;
VKBE_SetupLightCBuffer(dl, colour);
return true;
}
void VKBE_SelectEntity(entity_t *ent)
{
BE_RotateForEntity(ent, ent->model);
}
//fixme: create allocations within larger ring buffers, use separate staging.
void *VKBE_CreateStagingBuffer(struct stagingbuf *n, size_t size, VkBufferUsageFlags usage)
{
void *ptr;
VkBufferCreateInfo bufinf = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
VkMemoryRequirements mem_reqs;
VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
memset(&n->mem, 0, sizeof(n->mem));
n->retbuf = VK_NULL_HANDLE;
n->usage = usage | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
bufinf.flags = 0;
bufinf.size = n->size = size;
bufinf.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
bufinf.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufinf.queueFamilyIndexCount = 0;
bufinf.pQueueFamilyIndices = NULL;
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->buf);
vkGetBufferMemoryRequirements(vk.device, n->buf, &mem_reqs);
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
if (memAllocInfo.memoryTypeIndex == ~0)
Sys_Error("Unable to allocate buffer memory");
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &n->mem.memory));
VkAssert(vkBindBufferMemory(vk.device, n->buf, n->mem.memory, n->mem.offset));
VkAssert(vkMapMemory(vk.device, n->mem.memory, 0, n->size, 0, &ptr));
return ptr;
}
struct fencedbufferwork
{
struct vk_fencework fw;
VkBuffer buf;
vk_poolmem_t mem;
};
static void VKBE_DoneBufferStaging(void *staging)
{
struct fencedbufferwork *n = staging;
vkDestroyBuffer(vk.device, n->buf, vkallocationcb);
VK_ReleasePoolMemory(&n->mem);
}
VkBuffer VKBE_FinishStaging(struct stagingbuf *n, vk_poolmem_t *mem)
{
struct fencedbufferwork *fence;
VkBuffer retbuf;
//caller filled the staging buffer, and now wants to copy stuff to the gpu.
vkUnmapMemory(vk.device, n->mem.memory);
//create the hardware buffer
if (n->retbuf)
retbuf = n->retbuf;
else
{
VkBufferCreateInfo bufinf = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bufinf.flags = 0;
bufinf.size = n->size;
bufinf.usage = n->usage;
bufinf.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufinf.queueFamilyIndexCount = 0;
bufinf.pQueueFamilyIndices = NULL;
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &retbuf);
}
//sort out its memory
{
VkMemoryRequirements mem_reqs;
vkGetBufferMemoryRequirements(vk.device, retbuf, &mem_reqs);
if (!VK_AllocatePoolMemory(vk_find_memory_require(mem_reqs.memoryTypeBits, 0), mem_reqs.size, mem_reqs.alignment, mem))
{
VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
VkMemoryDedicatedAllocateInfoKHR khr_mdai = {VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR};
//shouldn't really happen, but just in case...
mem_reqs.size = max(1,mem_reqs.size);
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, 0);
if (vk.khr_dedicated_allocation)
{
khr_mdai.buffer = retbuf;
khr_mdai.pNext = memAllocInfo.pNext;
memAllocInfo.pNext = &khr_mdai;
}
mem->pool = NULL;
mem->offset = 0;
mem->size = mem_reqs.size;
mem->memory = VK_NULL_HANDLE;
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &mem->memory));
}
VkAssert(vkBindBufferMemory(vk.device, retbuf, mem->memory, mem->offset));
}
fence = VK_FencedBegin(VKBE_DoneBufferStaging, sizeof(*fence));
fence->buf = n->buf;
fence->mem = n->mem;
//FIXME: barrier?
//add the copy command
{
VkBufferCopy bcr = {0};
bcr.srcOffset = 0;
bcr.dstOffset = 0;
bcr.size = n->size;
vkCmdCopyBuffer(fence->fw.cbuf, n->buf, retbuf, 1, &bcr);
}
//FIXME: barrier?
VK_FencedSubmit(fence);
return retbuf;
}
void VKBE_GenBatchVBOs(vbo_t **vbochain, batch_t *firstbatch, batch_t *stopbatch)
{
int maxvboelements;
int maxvboverts;
int vert = 0, idx = 0;
batch_t *batch;
vbo_t *vbo;
int i, j;
mesh_t *m;
index_t *vboedata;
qbyte *vbovdatastart, *vbovdata;
struct stagingbuf vbuf, ebuf;
vk_poolmem_t *poolmem;
vbo = Z_Malloc(sizeof(*vbo));
maxvboverts = 0;
maxvboelements = 0;
for(batch = firstbatch; batch != stopbatch; batch = batch->next)
{
for (i=0 ; i<batch->maxmeshes ; i++)
{
m = batch->mesh[i];
maxvboelements += m->numindexes;
maxvboverts += m->numvertexes;
}
}
if (!maxvboverts || !maxvboelements)
return;
//determine array offsets.
vbovdatastart = vbovdata = NULL;
vbo->coord.vk.offs = vbovdata-vbovdatastart; vbovdata += sizeof(vecV_t)*maxvboverts;
vbo->texcoord.vk.offs = vbovdata-vbovdatastart; vbovdata += sizeof(vec2_t)*maxvboverts;
vbo->lmcoord[0].vk.offs = vbovdata-vbovdatastart; vbovdata += sizeof(vec2_t)*maxvboverts;
vbo->normals.vk.offs = vbovdata-vbovdatastart; vbovdata += sizeof(vec3_t)*maxvboverts;
vbo->svector.vk.offs = vbovdata-vbovdatastart; vbovdata += sizeof(vec3_t)*maxvboverts;
vbo->tvector.vk.offs = vbovdata-vbovdatastart; vbovdata += sizeof(vec3_t)*maxvboverts;
vbo->colours[0].vk.offs = vbovdata-vbovdatastart; vbovdata += sizeof(vec4_t)*maxvboverts;
vbovdatastart = vbovdata = VKBE_CreateStagingBuffer(&vbuf, vbovdata-vbovdatastart, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
vboedata = VKBE_CreateStagingBuffer(&ebuf, sizeof(*vboedata) * maxvboelements, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
vbo->indicies.vk.offs = 0;
for(batch = firstbatch; batch != stopbatch; batch = batch->next)
{
batch->vbo = vbo;
for (j=0 ; j<batch->maxmeshes ; j++)
{
m = batch->mesh[j];
m->vbofirstvert = vert;
if (m->xyz_array)
memcpy(vbovdata + vbo->coord.vk.offs + vert*sizeof(vecV_t), m->xyz_array, sizeof(vecV_t)*m->numvertexes);
if (m->st_array)
memcpy(vbovdata + vbo->texcoord.vk.offs + vert*sizeof(vec2_t), m->st_array, sizeof(vec2_t)*m->numvertexes);
if (m->lmst_array[0])
memcpy(vbovdata + vbo->lmcoord[0].vk.offs + vert*sizeof(vec2_t), m->lmst_array[0], sizeof(vec2_t)*m->numvertexes);
if (m->normals_array)
memcpy(vbovdata + vbo->normals.vk.offs + vert*sizeof(vec3_t), m->normals_array, sizeof(vec3_t)*m->numvertexes);
if (m->snormals_array)
memcpy(vbovdata + vbo->svector.vk.offs + vert*sizeof(vec3_t), m->snormals_array, sizeof(vec3_t)*m->numvertexes);
if (m->tnormals_array)
memcpy(vbovdata + vbo->tvector.vk.offs + vert*sizeof(vec3_t), m->tnormals_array, sizeof(vec3_t)*m->numvertexes);
if (m->colors4f_array[0])
memcpy(vbovdata + vbo->colours[0].vk.offs + vert*sizeof(vec4_t), m->colors4f_array[0],sizeof(vec4_t)*m->numvertexes);
m->vbofirstelement = idx;
for (i = 0; i < m->numindexes; i++)
{
*vboedata++ = vert + m->indexes[i];
}
idx += m->numindexes;
vert += m->numvertexes;
}
}
vbo->vbomem = poolmem = Z_Malloc(sizeof(*poolmem));
vbo->coord.vk.buff =
vbo->texcoord.vk.buff =
vbo->lmcoord[0].vk.buff =
vbo->normals.vk.buff =
vbo->svector.vk.buff =
vbo->tvector.vk.buff =
vbo->colours[0].vk.buff = VKBE_FinishStaging(&vbuf, poolmem);
vbo->ebomem = poolmem = Z_Malloc(sizeof(*poolmem));
vbo->indicies.vk.buff = VKBE_FinishStaging(&ebuf, poolmem);
vbo->indicies.vk.offs = 0;
vbo->indexcount = maxvboelements;
vbo->vertcount = maxvboverts;
vbo->next = *vbochain;
*vbochain = vbo;
}
void VKBE_GenBrushModelVBO(model_t *mod)
{
unsigned int vcount, cvcount;
batch_t *batch, *fbatch;
int sortid;
int i;
fbatch = NULL;
vcount = 0;
for (sortid = 0; sortid < SHADER_SORT_COUNT; sortid++)
{
if (!mod->batches[sortid])
continue;
for (fbatch = batch = mod->batches[sortid]; batch != NULL; batch = batch->next)
{
for (i = 0, cvcount = 0; i < batch->maxmeshes; i++)
cvcount += batch->mesh[i]->numvertexes;
if (vcount + cvcount > MAX_INDICIES)
{
VKBE_GenBatchVBOs(&mod->vbos, fbatch, batch);
fbatch = batch;
vcount = 0;
}
vcount += cvcount;
}
VKBE_GenBatchVBOs(&mod->vbos, fbatch, batch);
}
}
struct vkbe_clearvbo
{
struct vk_frameend fe;
vbo_t *vbo;
};
static void VKBE_SafeClearVBO(void *vboptr)
{
vbo_t *vbo = *(vbo_t**)vboptr;
if (vbo->indicies.vk.buff)
{
vkDestroyBuffer(vk.device, vbo->indicies.vk.buff, vkallocationcb);
VK_ReleasePoolMemory(vbo->ebomem);
BZ_Free(vbo->ebomem);
}
if (vbo->coord.vk.buff)
{
vkDestroyBuffer(vk.device, vbo->coord.vk.buff, vkallocationcb);
VK_ReleasePoolMemory(vbo->vbomem);
BZ_Free(vbo->vbomem);
}
BZ_Free(vbo);
}
/*Wipes a vbo*/
void VKBE_ClearVBO(vbo_t *vbo, qboolean dataonly)
{
if (dataonly)
{
//create one for the safe callback to clear.
vbo_t *nvbo = BZ_Malloc(sizeof(*vbo));
nvbo->indicies = vbo->indicies;
nvbo->coord = vbo->coord;
//scrub it now
memset(&vbo->indicies, 0, sizeof(vbo->indicies));
memset(&vbo->coord, 0, sizeof(vbo->coord));
vbo = nvbo;
}
VK_AtFrameEnd(VKBE_SafeClearVBO, &vbo, sizeof(vbo));
}
void VK_UploadLightmap(lightmapinfo_t *lm)
{
extern cvar_t r_lightmap_nearest;
struct pendingtextureinfo mips;
image_t *tex;
lm->modified = false;
if (!TEXVALID(lm->lightmap_texture))
{
lm->lightmap_texture = Image_CreateTexture("***lightmap***", NULL, (r_lightmap_nearest.ival?IF_NEAREST:IF_LINEAR));
if (!lm->lightmap_texture)
return;
}
tex = lm->lightmap_texture;
if (0)//vk.frame && tex->vkimage)
{ //the inline streaming path.
//the double-copy sucks but at least ensures that the dma copies stuff from THIS frame and not some of the next one too.
int *data;
VkBufferImageCopy bic;
VkBuffer buf;
//size_t x = 0, w = lm->width;
size_t x = lm->rectchange.l, w = lm->rectchange.r - lm->rectchange.l;
size_t y = lm->rectchange.t, h = lm->rectchange.b - lm->rectchange.t, i;
data = VKBE_AllocateBufferSpace(DB_STAGING, w * h * 4, &buf, &bic.bufferOffset);
bic.bufferRowLength = w;
bic.bufferImageHeight = h;
bic.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
bic.imageSubresource.mipLevel = 0;
bic.imageSubresource.baseArrayLayer = 0;
bic.imageSubresource.layerCount = 1;
bic.imageOffset.x = x;
bic.imageOffset.y = y;
bic.imageOffset.z = 0;
bic.imageExtent.width = w;
bic.imageExtent.height = h;
bic.imageExtent.depth = 1;
if (w == lm->width) //can just copy the lot in a single call.
memcpy(data, lm->lightmaps + 4*(y * lm->width), w*h*4);
else
{ //there's unused data on each row, oh well.
for (i = 0; i < h; i++)
memcpy(data + i * w, lm->lightmaps + 4*((y+i) * lm->width + x), w*4);
}
vkCmdCopyBufferToImage(vk.rendertarg->cbuf, buf, tex->vkimage->image, tex->vkimage->layout, 1, &bic);
}
else
{ //the slow out-of-frame generic path.
mips.extrafree = NULL;
mips.type = PTI_2D;
mips.mip[0].data = lm->lightmaps;
mips.mip[0].needfree = false;
mips.mip[0].width = lm->width;
mips.mip[0].height = lm->height;
mips.mip[0].depth = 1;
switch(lm->fmt)
{
default:
case PTI_A2BGR10:
case PTI_E5BGR9:
case PTI_RGBA16F:
case PTI_RGBA32F:
case PTI_L8:
mips.encoding = lm->fmt;
break;
case PTI_BGRA8:
mips.encoding = PTI_BGRX8;
break;
case TF_BGR24: //shouldn't happen
mips.encoding = PTI_R8;
break;
}
mips.mipcount = 1;
VK_LoadTextureMips(tex, &mips);
tex->status = TEX_LOADED;
tex->width = lm->width;
tex->height = lm->height;
}
//invert the size so we're not always updating the entire thing.
lm->rectchange.l = lm->width;
lm->rectchange.t = lm->height;
lm->rectchange.r = 0;
lm->rectchange.b = 0;
lm->modified = false;
}
/*upload all lightmaps at the start to reduce lags*/
static void BE_UploadLightmaps(qboolean force)
{
int i;
for (i = 0; i < numlightmaps; i++)
{
if (!lightmap[i])
continue;
if (force && !lightmap[i]->external)
{
lightmap[i]->rectchange.l = 0;
lightmap[i]->rectchange.t = 0;
lightmap[i]->rectchange.r = lightmap[i]->width;
lightmap[i]->rectchange.b = lightmap[i]->height;
lightmap[i]->modified = true;
}
if (lightmap[i]->modified)
{
VK_UploadLightmap(lightmap[i]);
}
}
}
void VKBE_UploadAllLightmaps(void)
{
BE_UploadLightmaps(true);
}
qboolean VKBE_LightCullModel(vec3_t org, model_t *model)
{
#ifdef RTLIGHTS
if ((shaderstate.mode == BEM_LIGHT || shaderstate.mode == BEM_STENCIL || shaderstate.mode == BEM_DEPTHONLY))
{
float dist;
vec3_t disp;
if (model->type == mod_alias)
{
VectorSubtract(org, shaderstate.lightinfo, disp);
dist = DotProduct(disp, disp);
if (dist > model->radius*model->radius + shaderstate.lightinfo[3]*shaderstate.lightinfo[3])
return true;
}
else
{
int i;
for (i = 0; i < 3; i++)
{
if (shaderstate.lightinfo[i]-shaderstate.lightinfo[3] > org[i] + model->maxs[i])
return true;
if (shaderstate.lightinfo[i]+shaderstate.lightinfo[3] < org[i] + model->mins[i])
return true;
}
}
}
#endif
return false;
}
batch_t *VKBE_GetTempBatch(void)
{
if (shaderstate.wbatch >= shaderstate.maxwbatches)
{
shaderstate.wbatch++;
return NULL;
}
return &shaderstate.wbatches[shaderstate.wbatch++];
}
void VKBE_SetupLightCBuffer(dlight_t *l, vec3_t colour)
{
#ifdef RTLIGHTS
extern cvar_t gl_specular;
#endif
vkcbuf_light_t *cbl = VKBE_AllocateBufferSpace(DB_UBO, (sizeof(*cbl) + 0x0ff) & ~0xff, &shaderstate.ubo_light.buffer, &shaderstate.ubo_light.offset);
shaderstate.ubo_light.range = sizeof(*cbl);
if (!l)
{
memset(cbl, 0, sizeof(*cbl));
Vector4Set(shaderstate.lightinfo, 0, 0, 0, 0);
return;
}
cbl->l_lightradius = l->radius;
#ifdef RTLIGHTS
if (shaderstate.curlmode & LSHADER_SPOT)
{
float view[16];
float proj[16];
extern cvar_t r_shadow_shadowmapping_nearclip;
Matrix4x4_CM_Projection_Far(proj, l->fov, l->fov, l->nearclip?l->nearclip:r_shadow_shadowmapping_nearclip.value, l->radius, false);
Matrix4x4_CM_ModelViewMatrixFromAxis(view, l->axis[0], l->axis[1], l->axis[2], l->origin);
Matrix4_Multiply(proj, view, cbl->l_cubematrix);
}
else
#endif
Matrix4x4_CM_LightMatrixFromAxis(cbl->l_cubematrix, l->axis[0], l->axis[1], l->axis[2], l->origin);
VectorCopy(l->origin, cbl->l_lightposition);
cbl->padl1 = 0;
VectorCopy(colour, cbl->l_colour);
#ifdef RTLIGHTS
VectorCopy(l->lightcolourscales, cbl->l_lightcolourscale);
cbl->l_lightcolourscale[0] = l->lightcolourscales[0];
cbl->l_lightcolourscale[1] = l->lightcolourscales[1];
cbl->l_lightcolourscale[2] = l->lightcolourscales[2] * gl_specular.value;
#endif
cbl->l_lightradius = l->radius;
Vector4Copy(shaderstate.lightshadowmapproj, cbl->l_shadowmapproj);
Vector2Copy(shaderstate.lightshadowmapscale, cbl->l_shadowmapscale);
VectorCopy(l->origin, shaderstate.lightinfo);
shaderstate.lightinfo[3] = l->radius;
}
//also updates the entity constant buffer
static void BE_RotateForEntity (const entity_t *e, const model_t *mod)
{
int i;
float modelmatrix[16];
float *m = modelmatrix;
float *proj;
vkcbuf_entity_t *cbe = VKBE_AllocateBufferSpace(DB_UBO, (sizeof(*cbe) + 0x0ff) & ~0xff, &shaderstate.ubo_entity.buffer, &shaderstate.ubo_entity.offset);
shaderstate.ubo_entity.range = sizeof(*cbe);
shaderstate.curentity = e;
if (e->flags & RF_DEPTHHACK)
proj = r_refdef.m_projection_view;
else
proj = r_refdef.m_projection_std;
if ((e->flags & RF_WEAPONMODEL) && r_refdef.playerview->viewentity > 0)
{
float em[16];
float vm[16];
if (e->flags & RF_WEAPONMODELNOBOB)
{
vm[0] = vpn[0];
vm[1] = vpn[1];
vm[2] = vpn[2];
vm[3] = 0;
vm[4] = -vright[0];
vm[5] = -vright[1];
vm[6] = -vright[2];
vm[7] = 0;
vm[8] = vup[0];
vm[9] = vup[1];
vm[10] = vup[2];
vm[11] = 0;
vm[12] = r_refdef.vieworg[0];
vm[13] = r_refdef.vieworg[1];
vm[14] = r_refdef.vieworg[2];
vm[15] = 1;
}
else
{
vm[0] = r_refdef.playerview->vw_axis[0][0];
vm[1] = r_refdef.playerview->vw_axis[0][1];
vm[2] = r_refdef.playerview->vw_axis[0][2];
vm[3] = 0;
vm[4] = r_refdef.playerview->vw_axis[1][0];
vm[5] = r_refdef.playerview->vw_axis[1][1];
vm[6] = r_refdef.playerview->vw_axis[1][2];
vm[7] = 0;
vm[8] = r_refdef.playerview->vw_axis[2][0];
vm[9] = r_refdef.playerview->vw_axis[2][1];
vm[10] = r_refdef.playerview->vw_axis[2][2];
vm[11] = 0;
vm[12] = r_refdef.playerview->vw_origin[0];
vm[13] = r_refdef.playerview->vw_origin[1];
vm[14] = r_refdef.playerview->vw_origin[2];
vm[15] = 1;
}
em[0] = e->axis[0][0];
em[1] = e->axis[0][1];
em[2] = e->axis[0][2];
em[3] = 0;
em[4] = e->axis[1][0];
em[5] = e->axis[1][1];
em[6] = e->axis[1][2];
em[7] = 0;
em[8] = e->axis[2][0];
em[9] = e->axis[2][1];
em[10] = e->axis[2][2];
em[11] = 0;
em[12] = e->origin[0];
em[13] = e->origin[1];
em[14] = e->origin[2];
em[15] = 1;
Matrix4_Multiply(vm, em, m);
}
else
{
m[0] = e->axis[0][0];
m[1] = e->axis[0][1];
m[2] = e->axis[0][2];
m[3] = 0;
m[4] = e->axis[1][0];
m[5] = e->axis[1][1];
m[6] = e->axis[1][2];
m[7] = 0;
m[8] = e->axis[2][0];
m[9] = e->axis[2][1];
m[10] = e->axis[2][2];
m[11] = 0;
m[12] = e->origin[0];
m[13] = e->origin[1];
m[14] = e->origin[2];
m[15] = 1;
}
if (e->scale != 1 && e->scale != 0) //hexen 2 stuff
{
#ifdef HEXEN2
float z;
float escale;
escale = e->scale;
switch(e->drawflags&SCALE_TYPE_MASK)
{
default:
case SCALE_TYPE_UNIFORM:
VectorScale((m+0), escale, (m+0));
VectorScale((m+4), escale, (m+4));
VectorScale((m+8), escale, (m+8));
break;
case SCALE_TYPE_XYONLY:
VectorScale((m+0), escale, (m+0));
VectorScale((m+4), escale, (m+4));
break;
case SCALE_TYPE_ZONLY:
VectorScale((m+8), escale, (m+8));
break;
}
if (mod && (e->drawflags&SCALE_TYPE_MASK) != SCALE_TYPE_XYONLY)
{
switch(e->drawflags&SCALE_ORIGIN_MASK)
{
case SCALE_ORIGIN_CENTER:
z = ((mod->maxs[2] + mod->mins[2]) * (1-escale))/2;
VectorMA((m+12), z, e->axis[2], (m+12));
break;
case SCALE_ORIGIN_BOTTOM:
VectorMA((m+12), mod->mins[2]*(1-escale), e->axis[2], (m+12));
break;
case SCALE_ORIGIN_TOP:
VectorMA((m+12), -mod->maxs[2], e->axis[2], (m+12));
break;
}
}
#else
VectorScale((m+0), e->scale, (m+0));
VectorScale((m+4), e->scale, (m+4));
VectorScale((m+8), e->scale, (m+8));
#endif
}
else if (mod && !strcmp(mod->name, "progs/eyes.mdl"))
{
/*resize eyes, to make them easier to see*/
m[14] -= (22 + 8);
VectorScale((m+0), 2, (m+0));
VectorScale((m+4), 2, (m+4));
VectorScale((m+8), 2, (m+8));
}
if (mod && !ruleset_allow_larger_models.ival && mod->clampscale != 1)
{ //possibly this should be on a per-frame basis, but that's a real pain to do
Con_DPrintf("Rescaling %s by %f\n", mod->name, mod->clampscale);
VectorScale((m+0), mod->clampscale, (m+0));
VectorScale((m+4), mod->clampscale, (m+4));
VectorScale((m+8), mod->clampscale, (m+8));
}
{
float modelview[16];
Matrix4_Multiply(r_refdef.m_view, m, modelview);
Matrix4_Multiply(proj, modelview, cbe->m_modelviewproj);
}
memcpy(cbe->m_model, m, sizeof(cbe->m_model));
Matrix4_Invert(modelmatrix, cbe->m_modelinv);
Matrix4x4_CM_Transform3(cbe->m_modelinv, r_origin, cbe->e_eyepos);
cbe->e_time = shaderstate.curtime = r_refdef.time - shaderstate.curentity->shaderTime;
VectorCopy(e->light_avg, cbe->e_light_ambient); cbe->pad1 = 0;
VectorCopy(e->light_dir, cbe->e_light_dir); cbe->pad2 = 0;
VectorCopy(e->light_range, cbe->e_light_mul); cbe->pad3 = 0;
for (i = 0; i < MAXRLIGHTMAPS ; i++)
{
//FIXME: this is fucked, the batch isn't known yet.
#if 0
extern cvar_t gl_overbright;
unsigned char s = shaderstate.curbatch?shaderstate.curbatch->lmlightstyle[i]:0;
float sc;
if (s == 255)
{
if (i == 0)
{
if (shaderstate.curentity->model && shaderstate.curentity->model->engineflags & MDLF_NEEDOVERBRIGHT)
sc = (1<<bound(0, gl_overbright.ival, 2)) * shaderstate.identitylighting;
else
sc = shaderstate.identitylighting;
cbe->e_lmscale[i][0] = sc;
cbe->e_lmscale[i][1] = sc;
cbe->e_lmscale[i][2] = sc;
cbe->e_lmscale[i][3] = 1;
i++;
}
for (; i < MAXRLIGHTMAPS ; i++)
{
cbe->e_lmscale[i][0] = 0;
cbe->e_lmscale[i][1] = 0;
cbe->e_lmscale[i][2] = 0;
cbe->e_lmscale[i][3] = 1;
}
break;
}
#else
float sc = 1;
#endif
if (shaderstate.curentity->model && shaderstate.curentity->model->engineflags & MDLF_NEEDOVERBRIGHT)
sc = (1<<bound(0, gl_overbright.ival, 2)) * shaderstate.identitylighting;
else
sc = shaderstate.identitylighting;
// sc *= d_lightstylevalue[s]/256.0f;
Vector4Set(cbe->e_lmscale[i], sc, sc, sc, 1);
}
R_FetchPlayerColour(e->topcolour, cbe->e_uppercolour); cbe->pad4 = 0;
R_FetchPlayerColour(e->bottomcolour, cbe->e_lowercolour); cbe->pad5 = 0;
VectorCopy(e->glowmod, cbe->e_glowmod); cbe->pad6 = 0;
if (shaderstate.flags & BEF_FORCECOLOURMOD)
Vector4Copy(e->shaderRGBAf, cbe->e_colourident);
else
Vector4Set(cbe->e_colourident, 1, 1, 1, e->shaderRGBAf[3]);
VectorCopy(r_refdef.globalfog.colour, cbe->w_fogcolours);
cbe->w_fogcolours[3] = r_refdef.globalfog.alpha;
cbe->w_fogdensity = r_refdef.globalfog.density;
cbe->w_fogdepthbias = r_refdef.globalfog.depthbias;
Vector2Set(cbe->pad7, 0, 0);
/*ndr = (e->flags & RF_DEPTHHACK)?0.333:1;
if (ndr != shaderstate.rc.depthrange)
{
VkViewport viewport;
shaderstate.rc.depthrange = ndr;
viewport.x = r_refdef.pxrect.x;
viewport.y = r_refdef.pxrect.y;
viewport.width = r_refdef.pxrect.width;
viewport.height = r_refdef.pxrect.height;
viewport.minDepth = 0;
viewport.maxDepth = ndr;
vkCmdSetViewport(vk.rendertarg->cbuf, 0, 1, &viewport);
}*/
}
void VKBE_SubmitBatch(batch_t *batch)
{
shader_t *shader = batch->shader;
unsigned int bf;
shaderstate.nummeshes = batch->meshes - batch->firstmesh;
if (!shaderstate.nummeshes)
return;
shaderstate.curbatch = batch;
shaderstate.batchvbo = batch->vbo;
shaderstate.meshlist = batch->mesh + batch->firstmesh;
shaderstate.curshader = shader->remapto;
bf = batch->flags | shaderstate.forcebeflags;
if (shaderstate.curentity != batch->ent || shaderstate.flags != bf)
{
shaderstate.flags = bf;
BE_RotateForEntity(batch->ent, batch->ent->model);
shaderstate.curtime = r_refdef.time - shaderstate.curentity->shaderTime;
}
if (batch->skin)
shaderstate.curtexnums = batch->skin;
else if (shader->numdefaulttextures)
shaderstate.curtexnums = shader->defaulttextures + ((int)(shader->defaulttextures_fps * shaderstate.curtime) % shader->numdefaulttextures);
else
shaderstate.curtexnums = shader->defaulttextures;
BE_DrawMeshChain_Internal();
}
void VKBE_DrawMesh_List(shader_t *shader, int nummeshes, mesh_t **meshlist, vbo_t *vbo, texnums_t *texnums, unsigned int beflags)
{
shaderstate.curbatch = &shaderstate.dummybatch;
shaderstate.batchvbo = vbo;
shaderstate.curshader = shader->remapto;
if (texnums)
shaderstate.curtexnums = texnums;
else if (shader->numdefaulttextures)
shaderstate.curtexnums = shader->defaulttextures + ((int)(shader->defaulttextures_fps * shaderstate.curtime) % shader->numdefaulttextures);
else
shaderstate.curtexnums = shader->defaulttextures;
shaderstate.meshlist = meshlist;
shaderstate.nummeshes = nummeshes;
shaderstate.flags = beflags | shaderstate.forcebeflags;
BE_DrawMeshChain_Internal();
}
void VKBE_DrawMesh_Single(shader_t *shader, mesh_t *meshchain, vbo_t *vbo, unsigned int beflags)
{
shaderstate.curbatch = &shaderstate.dummybatch;
shaderstate.batchvbo = vbo;
shaderstate.curtime = realtime;
shaderstate.curshader = shader->remapto;
if (shader->numdefaulttextures)
shaderstate.curtexnums = shader->defaulttextures + ((int)(shader->defaulttextures_fps * shaderstate.curtime) % shader->numdefaulttextures);
else
shaderstate.curtexnums = shader->defaulttextures;
shaderstate.meshlist = &meshchain;
shaderstate.nummeshes = 1;
shaderstate.flags = beflags | shaderstate.forcebeflags;
BE_DrawMeshChain_Internal();
}
void VKBE_RT_Destroy(struct vk_rendertarg *targ)
{
if (targ->framebuffer)
{
vkDestroyFramebuffer(vk.device, targ->framebuffer, vkallocationcb);
VK_DestroyVkTexture(&targ->depth);
VK_DestroyVkTexture(&targ->colour);
}
memset(targ, 0, sizeof(*targ));
}
struct vkbe_rtpurge
{
VkFramebuffer framebuffer;
vk_image_t colour;
vk_image_t depth;
};
static void VKBE_RT_Purge(void *ptr)
{
struct vkbe_rtpurge *ctx = ptr;
vkDestroyFramebuffer(vk.device, ctx->framebuffer, vkallocationcb);
VK_DestroyVkTexture(&ctx->depth);
VK_DestroyVkTexture(&ctx->colour);
}
void VKBE_RT_Gen(struct vk_rendertarg *targ, uint32_t width, uint32_t height, qboolean clear, unsigned int flags)
{
//sooooo much work...
VkImageCreateInfo colour_imginfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
VkImageCreateInfo depth_imginfo;
struct vkbe_rtpurge *purge;
static VkClearValue clearvalues[2];
if (clear)
targ->restartinfo.renderPass = vk.renderpass[2];
else
targ->restartinfo.renderPass = vk.renderpass[1]; //don't care
targ->restartinfo.clearValueCount = 2;
targ->depthcleared = true; //will be once its activated.
if (targ->width == width && targ->height == height && targ->q_colour.flags == flags)
return; //no work to do.
if (targ->framebuffer)
{ //schedule the old one to be destroyed at the end of the current frame. DIE OLD ONE, DIE!
purge = VK_AtFrameEnd(VKBE_RT_Purge, NULL, sizeof(*purge));
purge->framebuffer = targ->framebuffer;
purge->colour = targ->colour;
purge->depth = targ->depth;
memset(&targ->colour, 0, sizeof(targ->colour));
memset(&targ->depth, 0, sizeof(targ->depth));
targ->framebuffer = VK_NULL_HANDLE;
}
targ->q_colour.vkimage = &targ->colour;
targ->q_depth.vkimage = &targ->depth;
targ->q_colour.status = TEX_LOADED;
targ->q_colour.width = width;
targ->q_colour.height = height;
targ->q_colour.flags = flags;
targ->width = width;
targ->height = height;
if (width == 0 && height == 0)
return; //destroyed
colour_imginfo.format = vk.backbufformat;
colour_imginfo.flags = 0;
colour_imginfo.imageType = VK_IMAGE_TYPE_2D;
colour_imginfo.extent.width = width;
colour_imginfo.extent.height = height;
colour_imginfo.extent.depth = 1;
colour_imginfo.mipLevels = 1;
colour_imginfo.arrayLayers = 1;
colour_imginfo.samples = VK_SAMPLE_COUNT_1_BIT;
colour_imginfo.tiling = VK_IMAGE_TILING_OPTIMAL;
colour_imginfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT;
colour_imginfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
colour_imginfo.queueFamilyIndexCount = 0;
colour_imginfo.pQueueFamilyIndices = NULL;
colour_imginfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkAssert(vkCreateImage(vk.device, &colour_imginfo, vkallocationcb, &targ->colour.image));
depth_imginfo = colour_imginfo;
depth_imginfo.format = VK_FORMAT_D32_SFLOAT;
depth_imginfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT;
VkAssert(vkCreateImage(vk.device, &depth_imginfo, vkallocationcb, &targ->depth.image));
VK_AllocateBindImageMemory(&targ->colour, true);
VK_AllocateBindImageMemory(&targ->depth, true);
// set_image_layout(vk.frame->cbuf, targ->colour.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
// set_image_layout(vk.frame->cbuf, targ->depth.image, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
{
VkImageViewCreateInfo ivci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
ivci.components.r = VK_COMPONENT_SWIZZLE_R;
ivci.components.g = VK_COMPONENT_SWIZZLE_G;
ivci.components.b = VK_COMPONENT_SWIZZLE_B;
ivci.components.a = VK_COMPONENT_SWIZZLE_A;
ivci.subresourceRange.baseMipLevel = 0;
ivci.subresourceRange.levelCount = 1;
ivci.subresourceRange.baseArrayLayer = 0;
ivci.subresourceRange.layerCount = 1;
ivci.viewType = VK_IMAGE_VIEW_TYPE_2D;
ivci.flags = 0;
ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
ivci.format = colour_imginfo.format;
ivci.image = targ->colour.image;
VkAssert(vkCreateImageView(vk.device, &ivci, vkallocationcb, &targ->colour.view));
ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
ivci.format = depth_imginfo.format;
ivci.image = targ->depth.image;
VkAssert(vkCreateImageView(vk.device, &ivci, vkallocationcb, &targ->depth.view));
}
{
VkSamplerCreateInfo lmsampinfo = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
lmsampinfo.minFilter = lmsampinfo.magFilter = (flags&IF_NEAREST)?VK_FILTER_NEAREST:VK_FILTER_LINEAR;
lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
lmsampinfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.mipLodBias = 0.0;
lmsampinfo.anisotropyEnable = VK_FALSE;
lmsampinfo.maxAnisotropy = 1.0;
lmsampinfo.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
lmsampinfo.minLod = 0;
lmsampinfo.maxLod = 0;
lmsampinfo.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
lmsampinfo.unnormalizedCoordinates = VK_FALSE;
lmsampinfo.compareEnable = VK_FALSE;
VkAssert(vkCreateSampler(vk.device, &lmsampinfo, NULL, &targ->colour.sampler));
lmsampinfo.compareEnable = VK_TRUE;
VkAssert(vkCreateSampler(vk.device, &lmsampinfo, NULL, &targ->depth.sampler));
}
targ->colour.layout = VK_IMAGE_LAYOUT_UNDEFINED;
targ->mscolour.layout = VK_IMAGE_LAYOUT_UNDEFINED;
targ->depth.layout = VK_IMAGE_LAYOUT_UNDEFINED;
{
VkFramebufferCreateInfo fbinfo = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO};
VkImageView attachments[3] = {targ->colour.view, targ->depth.view, targ->mscolour.view};
fbinfo.flags = 0;
fbinfo.renderPass = vk.renderpass[2];
fbinfo.attachmentCount = (vk.multisamplebits!=VK_SAMPLE_COUNT_1_BIT)?3:2;
fbinfo.pAttachments = attachments;
fbinfo.width = width;
fbinfo.height = height;
fbinfo.layers = 1;
VkAssert(vkCreateFramebuffer(vk.device, &fbinfo, vkallocationcb, &targ->framebuffer));
}
targ->restartinfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
targ->restartinfo.pNext = NULL;
targ->restartinfo.framebuffer = targ->framebuffer;
targ->restartinfo.renderArea.offset.x = 0;
targ->restartinfo.renderArea.offset.y = 0;
targ->restartinfo.renderArea.extent.width = width;
targ->restartinfo.renderArea.extent.height = height;
targ->restartinfo.pClearValues = clearvalues;
clearvalues[1].depthStencil.depth = 1;
}
struct vkbe_rtpurge_cube
{
vk_image_t colour;
vk_image_t depth;
struct
{
VkFramebuffer framebuffer;
VkImageView iv[2];
} face[6];
};
static void VKBE_RT_Purge_Cube(void *ptr)
{
uint32_t f;
struct vkbe_rtpurge_cube *ctx = ptr;
for (f = 0; f < 6; f++)
{
vkDestroyFramebuffer(vk.device, ctx->face[f].framebuffer, vkallocationcb);
vkDestroyImageView(vk.device, ctx->face[f].iv[0], vkallocationcb);
vkDestroyImageView(vk.device, ctx->face[f].iv[1], vkallocationcb);
}
VK_DestroyVkTexture(&ctx->depth);
VK_DestroyVkTexture(&ctx->colour);
}
//generate a cubemap-compatible 2d array, set up 6 render targets that render to their own views
void VKBE_RT_Gen_Cube(struct vk_rendertarg_cube *targ, uint32_t size, qboolean clear)
{
VkImageCreateInfo colour_imginfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
VkImageCreateInfo depth_imginfo;
struct vkbe_rtpurge_cube *purge;
uint32_t f;
static VkClearValue clearvalues[2];
for (f = 0; f < 6; f++)
{
if (clear)
targ->face[f].restartinfo.renderPass = vk.renderpass[2];
else
targ->face[f].restartinfo.renderPass = vk.renderpass[1]; //don't care
targ->face[f].restartinfo.clearValueCount = 2;
}
if (targ->size == size)
return; //no work to do.
if (targ->size)
{ //schedule the old one to be destroyed at the end of the current frame. DIE OLD ONE, DIE!
purge = VK_AtFrameEnd(VKBE_RT_Purge_Cube, NULL, sizeof(*purge));
for (f = 0; f < 6; f++)
{
purge->face[f].framebuffer = targ->face[f].framebuffer;
targ->face[f].framebuffer = VK_NULL_HANDLE;
purge->face[f].iv[0] = targ->face[f].colour.view;
purge->face[f].iv[1] = targ->face[f].depth.view;
targ->face[f].colour.view = VK_NULL_HANDLE;
targ->face[f].depth.view = VK_NULL_HANDLE;
}
purge->colour = targ->colour;
purge->depth = targ->depth;
memset(&targ->colour, 0, sizeof(targ->colour));
memset(&targ->depth, 0, sizeof(targ->depth));
}
targ->size = size;
if (!size)
return;
targ->q_colour.vkimage = &targ->colour;
targ->q_depth.vkimage = &targ->depth;
colour_imginfo.format = VK_FORMAT_R8G8B8A8_UNORM;
colour_imginfo.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
colour_imginfo.imageType = VK_IMAGE_TYPE_2D;
colour_imginfo.extent.width = size;
colour_imginfo.extent.height = size;
colour_imginfo.mipLevels = 1;
colour_imginfo.arrayLayers = 6;
colour_imginfo.samples = VK_SAMPLE_COUNT_1_BIT;
colour_imginfo.tiling = VK_IMAGE_TILING_OPTIMAL;
colour_imginfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT;
colour_imginfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
colour_imginfo.queueFamilyIndexCount = 0;
colour_imginfo.pQueueFamilyIndices = NULL;
colour_imginfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkAssert(vkCreateImage(vk.device, &colour_imginfo, vkallocationcb, &targ->colour.image));
depth_imginfo = colour_imginfo;
depth_imginfo.format = vk.depthformat;
depth_imginfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT;
VkAssert(vkCreateImage(vk.device, &depth_imginfo, vkallocationcb, &targ->depth.image));
VK_AllocateBindImageMemory(&targ->colour, true);
VK_AllocateBindImageMemory(&targ->depth, true);
// set_image_layout(vk.frame->cbuf, targ->colour.image, VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
// set_image_layout(vk.frame->cbuf, targ->depth.image, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
//public sampler
{
VkSamplerCreateInfo lmsampinfo = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
lmsampinfo.minFilter = lmsampinfo.magFilter = VK_FILTER_LINEAR;
lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
lmsampinfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.mipLodBias = 0.0;
lmsampinfo.anisotropyEnable = VK_FALSE;
lmsampinfo.maxAnisotropy = 1.0;
lmsampinfo.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
lmsampinfo.minLod = 0;
lmsampinfo.maxLod = 0;
lmsampinfo.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
lmsampinfo.unnormalizedCoordinates = VK_FALSE;
lmsampinfo.compareEnable = VK_FALSE;
VkAssert(vkCreateSampler(vk.device, &lmsampinfo, NULL, &targ->colour.sampler));
lmsampinfo.compareEnable = VK_TRUE;
VkAssert(vkCreateSampler(vk.device, &lmsampinfo, NULL, &targ->depth.sampler));
}
//public cubemap views
{
VkImageViewCreateInfo ivci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
ivci.components.r = VK_COMPONENT_SWIZZLE_R;
ivci.components.g = VK_COMPONENT_SWIZZLE_G;
ivci.components.b = VK_COMPONENT_SWIZZLE_B;
ivci.components.a = VK_COMPONENT_SWIZZLE_A;
ivci.subresourceRange.baseMipLevel = 0;
ivci.subresourceRange.levelCount = 1;
ivci.subresourceRange.baseArrayLayer = 0;
ivci.subresourceRange.layerCount = 6;
ivci.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
ivci.flags = 0;
ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
ivci.format = colour_imginfo.format;
ivci.image = targ->colour.image;
VkAssert(vkCreateImageView(vk.device, &ivci, vkallocationcb, &targ->colour.view));
ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
ivci.format = depth_imginfo.format;
ivci.image = targ->depth.image;
VkAssert(vkCreateImageView(vk.device, &ivci, vkallocationcb, &targ->depth.view));
}
for (f = 0; f < 6; f++)
{
targ->face[f].width = targ->face[f].height = size;
//per-face view for the framebuffer
{
VkImageViewCreateInfo ivci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
ivci.components.r = VK_COMPONENT_SWIZZLE_R;
ivci.components.g = VK_COMPONENT_SWIZZLE_G;
ivci.components.b = VK_COMPONENT_SWIZZLE_B;
ivci.components.a = VK_COMPONENT_SWIZZLE_A;
ivci.subresourceRange.baseMipLevel = 0;
ivci.subresourceRange.levelCount = 1;
ivci.subresourceRange.baseArrayLayer = f;
ivci.subresourceRange.layerCount = 1;
ivci.viewType = VK_IMAGE_VIEW_TYPE_2D;
ivci.flags = 0;
ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
ivci.format = colour_imginfo.format;
ivci.image = targ->colour.image;
VkAssert(vkCreateImageView(vk.device, &ivci, vkallocationcb, &targ->face[f].colour.view));
ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
ivci.format = depth_imginfo.format;
ivci.image = targ->depth.image;
VkAssert(vkCreateImageView(vk.device, &ivci, vkallocationcb, &targ->face[f].depth.view));
}
targ->colour.layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
targ->depth.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
{
VkFramebufferCreateInfo fbinfo = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO};
VkImageView attachments[2] = {targ->face[f].colour.view, targ->face[f].depth.view};
fbinfo.flags = 0;
fbinfo.renderPass = vk.renderpass[2];
fbinfo.attachmentCount = countof(attachments);
fbinfo.pAttachments = attachments;
fbinfo.width = size;
fbinfo.height = size;
fbinfo.layers = 1;
VkAssert(vkCreateFramebuffer(vk.device, &fbinfo, vkallocationcb, &targ->face[f].framebuffer));
}
targ->face[f].restartinfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
targ->face[f].restartinfo.pNext = NULL;
targ->face[f].restartinfo.framebuffer = targ->face[f].framebuffer;
targ->face[f].restartinfo.renderArea.offset.x = 0;
targ->face[f].restartinfo.renderArea.offset.y = 0;
targ->face[f].restartinfo.renderArea.extent.width = size;
targ->face[f].restartinfo.renderArea.extent.height = size;
targ->face[f].restartinfo.pClearValues = clearvalues;
}
clearvalues[1].depthStencil.depth = 1;
}
void VKBE_RT_Begin(struct vk_rendertarg *targ)
{
if (vk.rendertarg == targ)
return;
r_refdef.pxrect.x = 0;
r_refdef.pxrect.y = 0;
r_refdef.pxrect.width = targ->width;
r_refdef.pxrect.height = targ->height;
r_refdef.pxrect.maxheight = targ->height;
vid.fbpwidth = targ->width;
vid.fbpheight = targ->height;
#if 0
targ->cbuf = vk.rendertarg->cbuf;
if (vk.rendertarg)
vkCmdEndRenderPass(vk.rendertarg->cbuf);
#else
shaderstate.rc.activepipeline = VK_NULL_HANDLE;
targ->cbuf = VK_AllocFrameCBuf();
{
VkCommandBufferBeginInfo begininf = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
VkCommandBufferInheritanceInfo inh = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO};
begininf.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
begininf.pInheritanceInfo = &inh;
inh.renderPass = VK_NULL_HANDLE; //unused
inh.subpass = 0; //unused
inh.framebuffer = VK_NULL_HANDLE; //unused
inh.occlusionQueryEnable = VK_FALSE;
inh.queryFlags = 0;
inh.pipelineStatistics = 0;
vkBeginCommandBuffer(targ->cbuf, &begininf);
}
#endif
targ->prevtarg = vk.rendertarg;
vk.rendertarg = targ;
vkCmdBeginRenderPass(vk.rendertarg->cbuf, &targ->restartinfo, VK_SUBPASS_CONTENTS_INLINE);
//future reuse shouldn't clear stuff
if (targ->restartinfo.clearValueCount)
{
targ->depthcleared = true;
targ->restartinfo.renderPass = vk.renderpass[0];
targ->restartinfo.clearValueCount = 0;
}
{
VkRect2D wrekt;
VkViewport viewport;
viewport.x = r_refdef.pxrect.x;
viewport.y = r_refdef.pxrect.y;
viewport.width = r_refdef.pxrect.width;
viewport.height = r_refdef.pxrect.height;
viewport.minDepth = 0;
viewport.maxDepth = 1;
vkCmdSetViewport(vk.rendertarg->cbuf, 0, 1, &viewport);
wrekt.offset.x = viewport.x;
wrekt.offset.y = viewport.y;
wrekt.extent.width = viewport.width;
wrekt.extent.height = viewport.height;
vkCmdSetScissor(vk.rendertarg->cbuf, 0, 1, &wrekt);
}
}
void VKBE_RT_End(struct vk_rendertarg *targ)
{
if (R2D_Flush)
R2D_Flush();
vk.rendertarg = vk.rendertarg->prevtarg;
vid.fbpwidth = vk.rendertarg->width;
vid.fbpheight = vk.rendertarg->height;
#if 0
#else
shaderstate.rc.activepipeline = VK_NULL_HANDLE;
vkCmdEndRenderPass(targ->cbuf);
vkEndCommandBuffer(targ->cbuf);
VK_Submit_Work(targ->cbuf, VK_NULL_HANDLE, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_NULL_HANDLE, VK_NULL_HANDLE, NULL, NULL);
// VK_Submit_Work(VkCommandBuffer cmdbuf, VkSemaphore semwait, VkPipelineStageFlags semwaitstagemask, VkSemaphore semsignal, VkFence fencesignal, struct vkframe *presentframe, struct vk_fencework *fencedwork)
#endif
}
static qboolean BE_GenerateRefraction(batch_t *batch, shader_t *bs)
{
float oldil;
int oldbem;
// struct vk_rendertarg *targ;
//these flags require rendering some view as an fbo
if (r_refdef.recurse)
return false;
if (r_refdef.recurse == r_portalrecursion.ival || r_refdef.recurse == R_MAX_RECURSE)
return false;
if (shaderstate.mode != BEM_STANDARD && shaderstate.mode != BEM_DEPTHDARK)
return false;
if (vk.multisamplebits != VK_SAMPLE_COUNT_1_BIT)
return false; //multisample rendering can't deal with this.
oldbem = shaderstate.mode;
oldil = shaderstate.identitylighting;
// targ = vk.rendertarg;
if (bs->flags & SHADER_HASREFLECT)
{
vrect_t orect = r_refdef.vrect;
pxrect_t oprect = r_refdef.pxrect;
r_refdef.vrect.x = 0;
r_refdef.vrect.y = 0;
r_refdef.vrect.width = max(1, vid.fbvwidth*bs->portalfboscale);
r_refdef.vrect.height = max(1, vid.fbvheight*bs->portalfboscale);
VKBE_RT_Gen(&shaderstate.rt_reflection, r_refdef.vrect.width, r_refdef.vrect.height, false, RT_IMAGEFLAGS);
VKBE_RT_Begin(&shaderstate.rt_reflection);
R_DrawPortal(batch, cl.worldmodel->batches, NULL, 1);
VKBE_RT_End(&shaderstate.rt_reflection);
r_refdef.vrect = orect;
r_refdef.pxrect = oprect;
}
if (bs->flags & (SHADER_HASREFRACT|SHADER_HASREFRACTDEPTH))
{
extern cvar_t r_refract_fbo;
if (r_refract_fbo.ival || (bs->flags & SHADER_HASREFRACTDEPTH))
{
vrect_t ovrect = r_refdef.vrect;
pxrect_t oprect = r_refdef.pxrect;
r_refdef.vrect.x = 0;
r_refdef.vrect.y = 0;
r_refdef.vrect.width = vid.fbvwidth/2;
r_refdef.vrect.height = vid.fbvheight/2;
VKBE_RT_Gen(&shaderstate.rt_refraction, r_refdef.vrect.width, r_refdef.vrect.height, false, RT_IMAGEFLAGS);
VKBE_RT_Begin(&shaderstate.rt_refraction);
R_DrawPortal(batch, cl.worldmodel->batches, NULL, ((bs->flags & SHADER_HASREFRACTDEPTH)?3:2)); //fixme
VKBE_RT_End(&shaderstate.rt_refraction);
r_refdef.vrect = ovrect;
r_refdef.pxrect = oprect;
shaderstate.tex_refraction = &shaderstate.rt_refraction.q_colour;
}
else
{
R_DrawPortal(batch, cl.worldmodel->batches, NULL, 3);
T_Gen_CurrentRender();
shaderstate.tex_refraction = shaderstate.tex_currentrender;
}
}
/*
if (bs->flags & SHADER_HASRIPPLEMAP)
{
vrect_t orect = r_refdef.vrect;
pxrect_t oprect = r_refdef.pxrect;
r_refdef.vrect.x = 0;
r_refdef.vrect.y = 0;
r_refdef.vrect.width = vid.fbvwidth/2;
r_refdef.vrect.height = vid.fbvheight/2;
r_refdef.pxrect.x = 0;
r_refdef.pxrect.y = 0;
r_refdef.pxrect.width = vid.fbpwidth/2;
r_refdef.pxrect.height = vid.fbpheight/2;
if (!shaderstate.tex_ripplemap)
{
//FIXME: can we use RGB8 instead?
shaderstate.tex_ripplemap = Image_CreateTexture("***tex_ripplemap***", NULL, 0);
if (!shaderstate.tex_ripplemap->num)
qglGenTextures(1, &shaderstate.tex_ripplemap->num);
}
if (shaderstate.tex_ripplemap->width != r_refdef.pxrect.width || shaderstate.tex_ripplemap->height != r_refdef.pxrect.height)
{
shaderstate.tex_ripplemap->width = r_refdef.pxrect.width;
shaderstate.tex_ripplemap->height = r_refdef.pxrect.height;
GL_MTBind(0, GL_TEXTURE_2D, shaderstate.tex_ripplemap);
qglTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, r_refdef.pxrect.width, r_refdef.pxrect.height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
qglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
qglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
qglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
qglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
}
oldfbo = GLBE_FBO_Update(&shaderstate.fbo_reflectrefrac, 0, &shaderstate.tex_ripplemap, 1, r_nulltex, r_refdef.pxrect.width, r_refdef.pxrect.height, 0);
r_refdef.pxrect.maxheight = shaderstate.fbo_reflectrefrac.rb_size[1];
GL_ViewportUpdate();
qglClearColor(0, 0, 0, 1);
qglClear(GL_COLOR_BUFFER_BIT);
r_refdef.vrect.x = 0;
r_refdef.vrect.y = 0;
r_refdef.vrect.width = vid.fbvwidth;
r_refdef.vrect.height = vid.fbvheight;
BE_RT_Begin(&shaderstate.rt_refraction, vid.fbpwidth, vid.fbpheight);
r_refdef.recurse+=1; //paranoid, should stop potential infinite loops
GLBE_SubmitMeshes(cl.worldmodel->batches, SHADER_SORT_RIPPLE, SHADER_SORT_RIPPLE);
r_refdef.recurse-=1;
r_refdef.vrect = orect;
r_refdef.pxrect = oprect;
BE_RT_End();
}
*/
VKBE_SelectMode(oldbem);
shaderstate.identitylighting = oldil;
return true;
}
static void BE_SubmitMeshesSortList(batch_t *sortlist)
{
batch_t *batch;
for (batch = sortlist; batch; batch = batch->next)
{
if (batch->meshes == batch->firstmesh)
continue;
if (batch->buildmeshes)
batch->buildmeshes(batch);
if (batch->shader->flags & SHADER_NODLIGHT)
if (shaderstate.mode == BEM_LIGHT)
continue;
if (batch->shader->flags & SHADER_SKY)
{
if (shaderstate.mode == BEM_STANDARD || shaderstate.mode == BEM_DEPTHDARK)
{
if (R_DrawSkyChain (batch))
continue;
}
else if (shaderstate.mode != BEM_FOG && shaderstate.mode != BEM_CREPUSCULAR && shaderstate.mode != BEM_WIREFRAME)
continue;
}
if ((batch->shader->flags & (SHADER_HASREFLECT | SHADER_HASREFRACT | SHADER_HASRIPPLEMAP)) && shaderstate.mode != BEM_WIREFRAME)
if (!BE_GenerateRefraction(batch, batch->shader))
continue;
VKBE_SubmitBatch(batch);
}
}
/*generates a new modelview matrix, as well as vpn vectors*/
static void R_MirrorMatrix(plane_t *plane)
{
float mirror[16];
float view[16];
float result[16];
vec3_t pnorm;
VectorNegate(plane->normal, pnorm);
mirror[0] = 1-2*pnorm[0]*pnorm[0];
mirror[1] = -2*pnorm[0]*pnorm[1];
mirror[2] = -2*pnorm[0]*pnorm[2];
mirror[3] = 0;
mirror[4] = -2*pnorm[1]*pnorm[0];
mirror[5] = 1-2*pnorm[1]*pnorm[1];
mirror[6] = -2*pnorm[1]*pnorm[2] ;
mirror[7] = 0;
mirror[8] = -2*pnorm[2]*pnorm[0];
mirror[9] = -2*pnorm[2]*pnorm[1];
mirror[10] = 1-2*pnorm[2]*pnorm[2];
mirror[11] = 0;
mirror[12] = -2*pnorm[0]*plane->dist;
mirror[13] = -2*pnorm[1]*plane->dist;
mirror[14] = -2*pnorm[2]*plane->dist;
mirror[15] = 1;
view[0] = vpn[0];
view[1] = vpn[1];
view[2] = vpn[2];
view[3] = 0;
view[4] = -vright[0];
view[5] = -vright[1];
view[6] = -vright[2];
view[7] = 0;
view[8] = vup[0];
view[9] = vup[1];
view[10] = vup[2];
view[11] = 0;
view[12] = r_refdef.vieworg[0];
view[13] = r_refdef.vieworg[1];
view[14] = r_refdef.vieworg[2];
view[15] = 1;
VectorMA(r_refdef.vieworg, 0.25, plane->normal, r_refdef.pvsorigin);
Matrix4_Multiply(mirror, view, result);
vpn[0] = result[0];
vpn[1] = result[1];
vpn[2] = result[2];
vright[0] = -result[4];
vright[1] = -result[5];
vright[2] = -result[6];
vup[0] = result[8];
vup[1] = result[9];
vup[2] = result[10];
r_refdef.vieworg[0] = result[12];
r_refdef.vieworg[1] = result[13];
r_refdef.vieworg[2] = result[14];
}
static entity_t *R_NearestPortal(plane_t *plane)
{
int i;
entity_t *best = NULL;
float dist, bestd = 0;
//for q3-compat, portals on world scan for a visedict to use for their view.
for (i = 0; i < cl_numvisedicts; i++)
{
if (cl_visedicts[i].rtype == RT_PORTALSURFACE)
{
dist = DotProduct(cl_visedicts[i].origin, plane->normal)-plane->dist;
dist = fabs(dist);
if (dist < 64 && (!best || dist < bestd))
best = &cl_visedicts[i];
}
}
return best;
}
static void TransformCoord(vec3_t in, vec3_t planea[3], vec3_t planeo, vec3_t viewa[3], vec3_t viewo, vec3_t result)
{
int i;
vec3_t local;
vec3_t transformed;
float d;
local[0] = in[0] - planeo[0];
local[1] = in[1] - planeo[1];
local[2] = in[2] - planeo[2];
VectorClear(transformed);
for ( i = 0 ; i < 3 ; i++ )
{
d = DotProduct(local, planea[i]);
VectorMA(transformed, d, viewa[i], transformed);
}
result[0] = transformed[0] + viewo[0];
result[1] = transformed[1] + viewo[1];
result[2] = transformed[2] + viewo[2];
}
static void TransformDir(vec3_t in, vec3_t planea[3], vec3_t viewa[3], vec3_t result)
{
int i;
float d;
vec3_t tmp;
VectorCopy(in, tmp);
VectorClear(result);
for ( i = 0 ; i < 3 ; i++ )
{
d = DotProduct(tmp, planea[i]);
VectorMA(result, d, viewa[i], result);
}
}
void R_ObliqueNearClip(float *viewmat, mplane_t *wplane);
void CL_DrawDebugPlane(float *normal, float dist, float r, float g, float b, qboolean enqueue);
static void R_DrawPortal(batch_t *batch, batch_t **blist, batch_t *depthmasklist[2], int portaltype)
{
entity_t *view;
plane_t plane, oplane;
float vmat[16];
refdef_t oldrefdef;
vec3_t r;
int i;
mesh_t *mesh = batch->mesh[batch->firstmesh];
pvsbuffer_t newvis;
float ivmat[16], trmat[16];
if (r_refdef.recurse >= R_MAX_RECURSE-1)
return;
if (!mesh->xyz_array)
return;
if (!mesh->normals_array)
{
VectorSet(plane.normal, 0, 0, 1);
}
else
{
VectorCopy(mesh->normals_array[0], plane.normal);
}
if (batch->ent == &r_worldentity)
{
plane.dist = DotProduct(mesh->xyz_array[0], plane.normal);
}
else
{
vec3_t point;
VectorCopy(plane.normal, oplane.normal);
//rotate the surface normal around its entity's matrix
plane.normal[0] = oplane.normal[0]*batch->ent->axis[0][0] + oplane.normal[1]*batch->ent->axis[1][0] + oplane.normal[2]*batch->ent->axis[2][0];
plane.normal[1] = oplane.normal[0]*batch->ent->axis[0][1] + oplane.normal[1]*batch->ent->axis[1][1] + oplane.normal[2]*batch->ent->axis[2][1];
plane.normal[2] = oplane.normal[0]*batch->ent->axis[0][2] + oplane.normal[1]*batch->ent->axis[1][2] + oplane.normal[2]*batch->ent->axis[2][2];
//rotate some point on the mesh around its entity's matrix
point[0] = mesh->xyz_array[0][0]*batch->ent->axis[0][0] + mesh->xyz_array[0][1]*batch->ent->axis[1][0] + mesh->xyz_array[0][2]*batch->ent->axis[2][0] + batch->ent->origin[0];
point[1] = mesh->xyz_array[0][0]*batch->ent->axis[0][1] + mesh->xyz_array[0][1]*batch->ent->axis[1][1] + mesh->xyz_array[0][2]*batch->ent->axis[2][1] + batch->ent->origin[1];
point[2] = mesh->xyz_array[0][0]*batch->ent->axis[0][2] + mesh->xyz_array[0][1]*batch->ent->axis[1][2] + mesh->xyz_array[0][2]*batch->ent->axis[2][2] + batch->ent->origin[2];
//now we can figure out the plane dist
plane.dist = DotProduct(point, plane.normal);
}
//if we're too far away from the surface, don't draw anything
if (batch->shader->flags & SHADER_AGEN_PORTAL)
{
/*there's a portal alpha blend on that surface, that fades out after this distance*/
if (DotProduct(r_refdef.vieworg, plane.normal)-plane.dist > batch->shader->portaldist)
return;
}
//if we're behind it, then also don't draw anything. for our purposes, behind is when the entire near clipplane is behind.
if (DotProduct(r_refdef.vieworg, plane.normal)-plane.dist < -r_refdef.mindist)
return;
TRACE(("R_DrawPortal: portal type %i\n", portaltype));
oldrefdef = r_refdef;
r_refdef.recurse+=1;
r_refdef.externalview = true;
switch(portaltype)
{
case 1: /*fbo explicit mirror (fucked depth, working clip plane)*/
//fixme: pvs is surely wrong?
// r_refdef.flipcull ^= SHADER_CULL_FLIP;
R_MirrorMatrix(&plane);
Matrix4x4_CM_ModelViewMatrixFromAxis(vmat, vpn, vright, vup, r_refdef.vieworg);
VectorCopy(mesh->xyz_array[0], r_refdef.pvsorigin);
for (i = 1; i < mesh->numvertexes; i++)
VectorAdd(r_refdef.pvsorigin, mesh->xyz_array[i], r_refdef.pvsorigin);
VectorScale(r_refdef.pvsorigin, 1.0/mesh->numvertexes, r_refdef.pvsorigin);
break;
case 2: /*fbo refraction (fucked depth, working clip plane)*/
case 3: /*screen copy refraction (screen depth, fucked clip planes)*/
/*refraction image (same view, just with things culled*/
r_refdef.externalview = oldrefdef.externalview;
VectorNegate(plane.normal, plane.normal);
plane.dist = -plane.dist;
//use the player's origin for r_viewleaf, because there's not much we can do anyway*/
VectorCopy(r_origin, r_refdef.pvsorigin);
if (cl.worldmodel && cl.worldmodel->funcs.ClusterPVS && !r_novis.ival)
{
int clust, i, j;
float d;
vec3_t point;
r_refdef.forcevis = true;
r_refdef.forcedvis = NULL;
newvis.buffer = alloca(newvis.buffersize=cl.worldmodel->pvsbytes);
for (i = batch->firstmesh; i < batch->meshes; i++)
{
mesh = batch->mesh[i];
VectorClear(point);
for (j = 0; j < mesh->numvertexes; j++)
VectorAdd(point, mesh->xyz_array[j], point);
VectorScale(point, 1.0f/mesh->numvertexes, point);
d = DotProduct(point, plane.normal) - plane.dist;
d += 0.1; //an epsilon on the far side
VectorMA(point, d, plane.normal, point);
clust = cl.worldmodel->funcs.ClusterForPoint(cl.worldmodel, point);
if (i == batch->firstmesh)
r_refdef.forcedvis = cl.worldmodel->funcs.ClusterPVS(cl.worldmodel, clust, &newvis, PVM_REPLACE);
else
r_refdef.forcedvis = cl.worldmodel->funcs.ClusterPVS(cl.worldmodel, clust, &newvis, PVM_MERGE);
}
// memset(newvis, 0xff, pvsbytes);
}
Matrix4x4_CM_ModelViewMatrixFromAxis(vmat, vpn, vright, vup, r_refdef.vieworg);
break;
case 0: /*q3 portal*/
default:
#ifdef CSQC_DAT
if (CSQC_SetupToRenderPortal(batch->ent->keynum))
{
oplane = plane;
//transform the old surface plane into the new view matrix
Matrix4_Invert(r_refdef.m_view, ivmat);
Matrix4x4_CM_ModelViewMatrixFromAxis(vmat, vpn, vright, vup, r_refdef.vieworg);
Matrix4_Multiply(ivmat, vmat, trmat);
plane.normal[0] = -(oplane.normal[0] * trmat[0] + oplane.normal[1] * trmat[1] + oplane.normal[2] * trmat[2]);
plane.normal[1] = -(oplane.normal[0] * trmat[4] + oplane.normal[1] * trmat[5] + oplane.normal[2] * trmat[6]);
plane.normal[2] = -(oplane.normal[0] * trmat[8] + oplane.normal[1] * trmat[9] + oplane.normal[2] * trmat[10]);
plane.dist = -oplane.dist + trmat[12]*oplane.normal[0] + trmat[13]*oplane.normal[1] + trmat[14]*oplane.normal[2];
if (Cvar_Get("temp_useplaneclip", "1", 0, "temp")->ival)
portaltype = 1; //make sure the near clipplane is used.
}
else
#endif
if (!(view = R_NearestPortal(&plane)) || VectorCompare(view->origin, view->oldorigin))
{
//a portal with no portal entity, or a portal rentity with an origin equal to its oldorigin, is a mirror.
// r_refdef.flipcull ^= SHADER_CULL_FLIP;
R_MirrorMatrix(&plane);
Matrix4x4_CM_ModelViewMatrixFromAxis(vmat, vpn, vright, vup, r_refdef.vieworg);
VectorCopy(mesh->xyz_array[0], r_refdef.pvsorigin);
for (i = 1; i < mesh->numvertexes; i++)
VectorAdd(r_refdef.pvsorigin, mesh->xyz_array[i], r_refdef.pvsorigin);
VectorScale(r_refdef.pvsorigin, 1.0/mesh->numvertexes, r_refdef.pvsorigin);
portaltype = 1;
}
else
{
float d;
vec3_t paxis[3], porigin, vaxis[3], vorg;
void PerpendicularVector( vec3_t dst, const vec3_t src );
oplane = plane;
/*calculate where the surface is meant to be*/
VectorCopy(mesh->normals_array[0], paxis[0]);
PerpendicularVector(paxis[1], paxis[0]);
CrossProduct(paxis[0], paxis[1], paxis[2]);
d = DotProduct(view->origin, plane.normal) - plane.dist;
VectorMA(view->origin, -d, paxis[0], porigin);
/*grab the camera origin*/
VectorNegate(view->axis[0], vaxis[0]);
VectorNegate(view->axis[1], vaxis[1]);
VectorCopy(view->axis[2], vaxis[2]);
VectorCopy(view->oldorigin, vorg);
VectorCopy(vorg, r_refdef.pvsorigin);
/*rotate it a bit*/
if (view->framestate.g[FS_REG].frame[1]) //oldframe
{
if (view->framestate.g[FS_REG].frame[0]) //newframe
d = realtime * view->framestate.g[FS_REG].frame[0]; //newframe
else
d = view->skinnum + sin(realtime)*4;
}
else
d = view->skinnum;
if (d)
{
vec3_t rdir;
VectorCopy(vaxis[1], rdir);
RotatePointAroundVector(vaxis[1], vaxis[0], rdir, d);
CrossProduct(vaxis[0], vaxis[1], vaxis[2]);
}
TransformCoord(oldrefdef.vieworg, paxis, porigin, vaxis, vorg, r_refdef.vieworg);
TransformDir(vpn, paxis, vaxis, vpn);
TransformDir(vright, paxis, vaxis, vright);
TransformDir(vup, paxis, vaxis, vup);
Matrix4x4_CM_ModelViewMatrixFromAxis(vmat, vpn, vright, vup, r_refdef.vieworg);
//transform the old surface plane into the new view matrix
if (Matrix4_Invert(r_refdef.m_view, ivmat))
{
Matrix4_Multiply(ivmat, vmat, trmat);
plane.normal[0] = -(oplane.normal[0] * trmat[0] + oplane.normal[1] * trmat[1] + oplane.normal[2] * trmat[2]);
plane.normal[1] = -(oplane.normal[0] * trmat[4] + oplane.normal[1] * trmat[5] + oplane.normal[2] * trmat[6]);
plane.normal[2] = -(oplane.normal[0] * trmat[8] + oplane.normal[1] * trmat[9] + oplane.normal[2] * trmat[10]);
plane.dist = -oplane.dist + trmat[12]*oplane.normal[0] + trmat[13]*oplane.normal[1] + trmat[14]*oplane.normal[2];
portaltype = 1;
}
}
break;
}
/*FIXME: can we get away with stenciling the screen?*/
/*Add to frustum culling instead of clip planes?*/
/* if (qglClipPlane && portaltype)
{
GLdouble glplane[4];
glplane[0] = plane.normal[0];
glplane[1] = plane.normal[1];
glplane[2] = plane.normal[2];
glplane[3] = plane.dist;
qglClipPlane(GL_CLIP_PLANE0, glplane);
qglEnable(GL_CLIP_PLANE0);
}
*/ //fixme: we can probably scissor a smaller frusum
R_SetFrustum (r_refdef.m_projection_std, vmat);
if (r_refdef.frustum_numplanes < MAXFRUSTUMPLANES)
{
extern int SignbitsForPlane (mplane_t *out);
mplane_t fp;
VectorCopy(plane.normal, fp.normal);
fp.dist = plane.dist;
// if (DotProduct(fp.normal, vpn) < 0)
// {
// VectorNegate(fp.normal, fp.normal);
// fp.dist *= -1;
// }
fp.type = PLANE_ANYZ;
fp.signbits = SignbitsForPlane (&fp);
if (portaltype == 1 || portaltype == 2)
R_ObliqueNearClip(vmat, &fp);
//our own culling should be an epsilon forwards so we don't still draw things behind the line due to precision issues.
fp.dist += 0.01;
r_refdef.frustum[r_refdef.frustum_numplanes++] = fp;
}
//force culling to update to match the new front face.
// memcpy(r_refdef.m_view, vmat, sizeof(float)*16);
#if 0
if (depthmasklist)
{
/*draw already-drawn portals as depth-only, to ensure that their contents are not harmed*/
/*we can only do this AFTER the oblique perspective matrix is calculated, to avoid depth inconsistancies, while we still have the old view matrix*/
int i;
batch_t *dmask = NULL;
//portals to mask are relative to the old view still.
GLBE_SelectEntity(&r_worldentity);
currententity = NULL;
if (gl_config.arb_depth_clamp)
qglEnable(GL_DEPTH_CLAMP_ARB); //ignore the near clip plane(ish), this means nearer portals can still mask further ones.
GL_ForceDepthWritable();
GLBE_SelectMode(BEM_DEPTHONLY);
for (i = 0; i < 2; i++)
{
for (dmask = depthmasklist[i]; dmask; dmask = dmask->next)
{
if (dmask == batch)
continue;
if (dmask->meshes == dmask->firstmesh)
continue;
GLBE_SubmitBatch(dmask);
}
}
GLBE_SelectMode(BEM_STANDARD);
if (gl_config.arb_depth_clamp)
qglDisable(GL_DEPTH_CLAMP_ARB);
currententity = NULL;
}
#endif
currententity = NULL;
//now determine the stuff the backend will use.
memcpy(r_refdef.m_view, vmat, sizeof(float)*16);
VectorAngles(vpn, vup, r_refdef.viewangles, false);
VectorCopy(r_refdef.vieworg, r_origin);
//determine r_refdef.flipcull & SHADER_CULL_FLIP based upon whether right is right or not.
CrossProduct(vpn, vup, r);
if (DotProduct(r, vright) < 0)
r_refdef.flipcull |= SHADER_CULL_FLIP;
else
r_refdef.flipcull &= ~SHADER_CULL_FLIP;
if (r_refdef.m_projection_std[5]<0)
r_refdef.flipcull ^= SHADER_CULL_FLIP;
VKBE_SelectEntity(&r_worldentity);
Surf_SetupFrame();
Surf_DrawWorld();
//FIXME: just call Surf_DrawWorld instead?
// R_RenderScene();
#if 0
if (r_portaldrawplanes.ival)
{
//the front of the plane should generally point away from the camera, and will be drawn in bright green. woo
CL_DrawDebugPlane(plane.normal, plane.dist+0.01, 0.0, 0.5, 0.0, false);
CL_DrawDebugPlane(plane.normal, plane.dist-0.01, 0.0, 0.5, 0.0, false);
//the back of the plane points towards the camera, and will be drawn in blue, for the luls
VectorNegate(plane.normal, plane.normal);
plane.dist *= -1;
CL_DrawDebugPlane(plane.normal, plane.dist+0.01, 0.0, 0.0, 0.2, false);
CL_DrawDebugPlane(plane.normal, plane.dist-0.01, 0.0, 0.0, 0.2, false);
}
#endif
r_refdef = oldrefdef;
/*broken stuff*/
AngleVectors (r_refdef.viewangles, vpn, vright, vup);
VectorCopy (r_refdef.vieworg, r_origin);
VKBE_SelectEntity(&r_worldentity);
TRACE(("GLR_DrawPortal: portal drawn\n"));
currententity = NULL;
}
static void BE_SubmitMeshesPortals(batch_t **worldlist, batch_t *dynamiclist)
{
batch_t *batch, *old;
int i;
/*attempt to draw portal shaders*/
if (shaderstate.mode == BEM_STANDARD)
{
for (i = 0; i < 2; i++)
{
for (batch = i?dynamiclist:worldlist[SHADER_SORT_PORTAL]; batch; batch = batch->next)
{
if (batch->meshes == batch->firstmesh)
continue;
if (batch->buildmeshes)
batch->buildmeshes(batch);
/*draw already-drawn portals as depth-only, to ensure that their contents are not harmed*/
VKBE_SelectMode(BEM_DEPTHONLY);
for (old = worldlist[SHADER_SORT_PORTAL]; old && old != batch; old = old->next)
{
if (old->meshes == old->firstmesh)
continue;
VKBE_SubmitBatch(old);
}
if (!old)
{
for (old = dynamiclist; old != batch; old = old->next)
{
if (old->meshes == old->firstmesh)
continue;
VKBE_SubmitBatch(old);
}
}
VKBE_SelectMode(BEM_STANDARD);
R_DrawPortal(batch, worldlist, NULL, 0);
{
VkClearAttachment clr;
VkClearRect rect;
clr.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
clr.clearValue.depthStencil.depth = 1;
clr.clearValue.depthStencil.stencil = 0;
clr.colorAttachment = 1;
rect.rect.offset.x = r_refdef.pxrect.x;
rect.rect.offset.y = r_refdef.pxrect.y;
rect.rect.extent.width = r_refdef.pxrect.width;
rect.rect.extent.height = r_refdef.pxrect.height;
rect.layerCount = 1;
rect.baseArrayLayer = 0;
vkCmdClearAttachments(vk.rendertarg->cbuf, 1, &clr, 1, &rect);
}
VKBE_SelectMode(BEM_DEPTHONLY);
VKBE_SubmitBatch(batch);
VKBE_SelectMode(BEM_STANDARD);
}
}
}
}
void VKBE_SubmitMeshes (batch_t **worldbatches, batch_t **blist, int first, int stop)
{
int i;
for (i = first; i < stop; i++)
{
if (worldbatches)
{
if (i == SHADER_SORT_PORTAL && !r_refdef.recurse)
BE_SubmitMeshesPortals(worldbatches, blist[i]);
BE_SubmitMeshesSortList(worldbatches[i]);
}
BE_SubmitMeshesSortList(blist[i]);
}
}
#ifdef RTLIGHTS
//FIXME: needs context for threading
void VKBE_BaseEntTextures(qbyte *scenepvs)
{
batch_t *batches[SHADER_SORT_COUNT];
BE_GenModelBatches(batches, shaderstate.curdlight, shaderstate.mode, scenepvs);
VKBE_SubmitMeshes(NULL, batches, SHADER_SORT_PORTAL, SHADER_SORT_SEETHROUGH+1);
VKBE_SelectEntity(&r_worldentity);
}
struct vk_shadowbuffer
{
qboolean isstatic;
VkBuffer vbuffer;
VkDeviceSize voffset;
vk_poolmem_t vmemory;
unsigned int numverts;
VkBuffer ibuffer;
VkDeviceSize ioffset;
vk_poolmem_t imemory;
unsigned int numindicies;
};
//FIXME: needs context for threading
struct vk_shadowbuffer *VKBE_GenerateShadowBuffer(vecV_t *verts, int numverts, index_t *indicies, int numindicies, qboolean istemp)
{
static struct vk_shadowbuffer tempbuf;
if (!numverts || !numindicies)
return NULL;
if (istemp)
{
struct vk_shadowbuffer *buf = &tempbuf;
void *map;
map = VKBE_AllocateBufferSpace(DB_VBO, sizeof(*verts)*numverts, &buf->vbuffer, &buf->voffset);
memcpy(map, verts, sizeof(*verts)*numverts);
buf->numverts = numverts;
map = VKBE_AllocateBufferSpace(DB_EBO, sizeof(*indicies)*numindicies, &buf->ibuffer, &buf->ioffset);
memcpy(map, indicies, sizeof(*indicies)*numindicies);
buf->numindicies = numindicies;
return buf;
}
else
{
//FIXME: these buffers should really be some subsection of a larger buffer
struct vk_shadowbuffer *buf = BZ_Malloc(sizeof(*buf));
struct stagingbuf vbuf;
void *map;
buf->isstatic = true;
map = VKBE_CreateStagingBuffer(&vbuf, sizeof(*verts) * numverts, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
memcpy(map, verts, sizeof(*verts) * numverts);
buf->vbuffer = VKBE_FinishStaging(&vbuf, &buf->vmemory);
buf->voffset = 0;
buf->numverts = numverts;
map = VKBE_CreateStagingBuffer(&vbuf, sizeof(*indicies) * numindicies, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
memcpy(map, indicies, sizeof(*indicies) * numindicies);
buf->ibuffer = VKBE_FinishStaging(&vbuf, &buf->imemory);
buf->ioffset = 0;
buf->numindicies = numindicies;
return buf;
}
}
static void VKBE_DestroyShadowBuffer_Delayed(void *ctx)
{
struct vk_shadowbuffer *buf = ctx;
vkDestroyBuffer(vk.device, buf->vbuffer, vkallocationcb);
vkDestroyBuffer(vk.device, buf->ibuffer, vkallocationcb);
VK_ReleasePoolMemory(&buf->vmemory);
VK_ReleasePoolMemory(&buf->imemory);
}
void VKBE_DestroyShadowBuffer(struct vk_shadowbuffer *buf)
{
if (buf && buf->isstatic)
{
VK_AtFrameEnd(VKBE_DestroyShadowBuffer_Delayed, buf, sizeof(*buf));
Z_Free(buf);
}
}
//draws all depth-only surfaces from the perspective of the light.
//FIXME: needs context for threading
void VKBE_RenderShadowBuffer(struct vk_shadowbuffer *buf)
{
shader_t *depthonlyshader;
if (!buf)
return;
depthonlyshader = R_RegisterShader("depthonly", SUF_NONE,
"{\n"
"program depthonly\n"
"{\n"
"depthwrite\n"
"maskcolor\n"
"}\n"
"}\n"
);
vkCmdBindVertexBuffers(vk.rendertarg->cbuf, 0, 1, &buf->vbuffer, &buf->voffset);
vkCmdBindIndexBuffer(vk.rendertarg->cbuf, buf->ibuffer, buf->ioffset, VK_INDEX_TYPE);
if (BE_SetupMeshProgram(depthonlyshader->prog, depthonlyshader->passes, 0, buf->numindicies))
vkCmdDrawIndexed(vk.rendertarg->cbuf, buf->numindicies, 1, 0, 0, 0);
}
static void VK_TerminateShadowMap(void)
{
struct shadowmaps_s *shad;
unsigned int sbuf, i;
if (vk.shadow_renderpass != VK_NULL_HANDLE)
{
vkDestroyRenderPass(vk.device, vk.shadow_renderpass, vkallocationcb);
vk.shadow_renderpass = VK_NULL_HANDLE;
}
for (sbuf = 0; sbuf < countof(shaderstate.shadow); sbuf++)
{
shad = &shaderstate.shadow[sbuf];
if (!shad->image)
continue;
for (i = 0; i < countof(shad->buf); i++)
{
vkDestroyImageView(vk.device, shad->buf[i].vimage.view, vkallocationcb);
vkDestroySampler(vk.device, shad->buf[i].vimage.sampler, vkallocationcb);
vkDestroyFramebuffer(vk.device, shad->buf[i].framebuffer, vkallocationcb);
}
vkDestroyImage(vk.device, shad->image, vkallocationcb);
vkFreeMemory(vk.device, shad->memory, vkallocationcb);
shad->width = 0;
shad->height = 0;
}
}
qboolean VKBE_BeginShadowmap(qboolean isspot, uint32_t width, uint32_t height)
{
struct shadowmaps_s *shad = &shaderstate.shadow[isspot];
unsigned int sbuf;
// const qboolean altqueue = false;
// if (!altqueue)
vkCmdEndRenderPass(vk.rendertarg->cbuf);
if (shad->width != width || shad->height != height)
{
//actually, this will really only happen once per.
//so we can be lazy and not free here... check out validation/leak warnings if this changes...
unsigned int i;
VkFramebufferCreateInfo fbinfo = {VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO};
VkImageCreateInfo imginfo = {VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO};
imginfo.format = VK_FORMAT_D32_SFLOAT;
imginfo.flags = 0;
imginfo.imageType = VK_IMAGE_TYPE_2D;
imginfo.extent.width = width;
imginfo.extent.height = height;
imginfo.extent.depth = 1;
imginfo.mipLevels = 1;
imginfo.arrayLayers = countof(shad->buf);
imginfo.samples = VK_SAMPLE_COUNT_1_BIT;
imginfo.tiling = VK_IMAGE_TILING_OPTIMAL;
imginfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT;
imginfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
imginfo.queueFamilyIndexCount = 0;
imginfo.pQueueFamilyIndices = NULL;
imginfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkAssert(vkCreateImage(vk.device, &imginfo, vkallocationcb, &shad->image));
{
VkMemoryRequirements mem_reqs;
VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
vkGetImageMemoryRequirements(vk.device, shad->image, &mem_reqs);
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
if (memAllocInfo.memoryTypeIndex == ~0)
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, 0);
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &shad->memory));
VkAssert(vkBindImageMemory(vk.device, shad->image, shad->memory, 0));
}
if (vk.shadow_renderpass == VK_NULL_HANDLE)
{
VkAttachmentReference depth_reference;
VkAttachmentDescription attachments[1] = {{0}};
VkSubpassDescription subpass = {0};
VkRenderPassCreateInfo rp_info = {VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO};
depth_reference.attachment = 0;
depth_reference.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[depth_reference.attachment].format = imginfo.format;
attachments[depth_reference.attachment].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[depth_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[depth_reference.attachment].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[depth_reference.attachment].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[depth_reference.attachment].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachments[depth_reference.attachment].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
attachments[depth_reference.attachment].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass.flags = 0;
subpass.inputAttachmentCount = 0;
subpass.pInputAttachments = NULL;
subpass.colorAttachmentCount = 0;
subpass.pColorAttachments = NULL;
subpass.pResolveAttachments = NULL;
subpass.pDepthStencilAttachment = &depth_reference;
subpass.preserveAttachmentCount = 0;
subpass.pPreserveAttachments = NULL;
rp_info.attachmentCount = countof(attachments);
rp_info.pAttachments = attachments;
rp_info.subpassCount = 1;
rp_info.pSubpasses = &subpass;
rp_info.dependencyCount = 0;
rp_info.pDependencies = NULL;
VkAssert(vkCreateRenderPass(vk.device, &rp_info, vkallocationcb, &vk.shadow_renderpass));
}
fbinfo.flags = 0;
fbinfo.renderPass = vk.shadow_renderpass;
fbinfo.attachmentCount = 1;
fbinfo.width = width;
fbinfo.height = height;
fbinfo.layers = 1;
for (i = 0; i < countof(shad->buf); i++)
{
VkImageViewCreateInfo ivci = {VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO};
ivci.format = imginfo.format;
ivci.components.r = VK_COMPONENT_SWIZZLE_R;
ivci.components.g = VK_COMPONENT_SWIZZLE_G;
ivci.components.b = VK_COMPONENT_SWIZZLE_B;
ivci.components.a = VK_COMPONENT_SWIZZLE_A;
ivci.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
ivci.subresourceRange.baseMipLevel = 0;
ivci.subresourceRange.levelCount = 1;
ivci.subresourceRange.baseArrayLayer = i;
ivci.subresourceRange.layerCount = 1;
ivci.viewType = VK_IMAGE_VIEW_TYPE_2D;
ivci.flags = 0;
ivci.image = shad->image;
shad->buf[i].vimage.image = shad->image;
VkAssert(vkCreateImageView(vk.device, &ivci, vkallocationcb, &shad->buf[i].vimage.view));
{
VkSamplerCreateInfo lmsampinfo = {VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO};
lmsampinfo.minFilter = lmsampinfo.magFilter = VK_FILTER_LINEAR;
lmsampinfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
lmsampinfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
lmsampinfo.mipLodBias = 0.0;
lmsampinfo.anisotropyEnable = VK_FALSE;
lmsampinfo.maxAnisotropy = 1.0;
lmsampinfo.compareEnable = VK_TRUE;
lmsampinfo.compareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
lmsampinfo.minLod = 0;
lmsampinfo.maxLod = 0;
lmsampinfo.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
lmsampinfo.unnormalizedCoordinates = VK_FALSE;
VkAssert(vkCreateSampler(vk.device, &lmsampinfo, NULL, &shad->buf[i].vimage.sampler));
}
shad->buf[i].qimage.vkimage = &shad->buf[i].vimage;
shad->buf[i].vimage.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
fbinfo.pAttachments = &shad->buf[i].vimage.view;
VkAssert(vkCreateFramebuffer(vk.device, &fbinfo, vkallocationcb, &shad->buf[i].framebuffer));
}
shad->width = width;
shad->height = height;
}
sbuf = shad->seq++%countof(shad->buf);
shaderstate.currentshadowmap = &shad->buf[sbuf].qimage;
{
VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
imgbarrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
imgbarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
imgbarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; //we don't actually care because we'll be clearing it anyway, making this more of a no-op than anything else.
imgbarrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
imgbarrier.image = shad->buf[sbuf].vimage.image;
imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
imgbarrier.subresourceRange.baseMipLevel = 0;
imgbarrier.subresourceRange.levelCount = 1;
imgbarrier.subresourceRange.baseArrayLayer = sbuf;
imgbarrier.subresourceRange.layerCount = 1;
imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier);
}
{
VkClearValue clearval;
VkRenderPassBeginInfo rpass = {VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO};
clearval.depthStencil.depth = 1;
clearval.depthStencil.stencil = 0;
rpass.renderPass = vk.shadow_renderpass;
rpass.framebuffer = shad->buf[sbuf].framebuffer;
rpass.renderArea.offset.x = 0;
rpass.renderArea.offset.y = 0;
rpass.renderArea.extent.width = width;
rpass.renderArea.extent.height = height;
rpass.clearValueCount = 1;
rpass.pClearValues = &clearval;
vkCmdBeginRenderPass(vk.rendertarg->cbuf, &rpass, VK_SUBPASS_CONTENTS_INLINE);
}
//viewport+scissor will be done elsewhere
//that wasn't too painful, was it?...
return true;
}
void VKBE_DoneShadows(void)
{
// struct shadowmaps_s *shad = &shaderstate.shadow[isspot];
VkViewport viewport;
// const qboolean altqueue = false;
//we've rendered the shadowmap, but now we need to blit it to the screen
//so set stuff back to the main view. FIXME: do these in batches to ease the load on tilers.
vkCmdEndRenderPass(vk.rendertarg->cbuf);
/*if (altqueue)
{
vkCmdSetEvent(alt, shadowcompleteevent);
VKBE_FlushDynamicBuffers();
VK_Submit_Work();
vkCmdWaitEvents(main, 1, &shadowcompleteevent, barrierstuff);
vkCmdResetEvent(main, shadowcompleteevent);
}
else*/
{
/*
set_image_layout(vk.frame->cbuf, shad->image, VK_IMAGE_ASPECT_DEPTH_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_READ_BIT);
{
VkImageMemoryBarrier imgbarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER};
imgbarrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
imgbarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
imgbarrier.oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
imgbarrier.newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL;
imgbarrier.image = image;
imgbarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
imgbarrier.subresourceRange.baseMipLevel = 0;
imgbarrier.subresourceRange.levelCount = 1;
imgbarrier.subresourceRange.baseArrayLayer = 0;
imgbarrier.subresourceRange.layerCount = 1;
imgbarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier);
}
*/
vkCmdBeginRenderPass(vk.rendertarg->cbuf, &vk.rendertarg->restartinfo, VK_SUBPASS_CONTENTS_INLINE);
viewport.x = r_refdef.pxrect.x;
viewport.y = r_refdef.pxrect.y;//r_refdef.pxrect.maxheight - (r_refdef.pxrect.y+r_refdef.pxrect.height); //silly GL...
viewport.width = r_refdef.pxrect.width;
viewport.height = r_refdef.pxrect.height;
viewport.minDepth = 0;
viewport.maxDepth = 1;
vkCmdSetViewport(vk.rendertarg->cbuf, 0, 1, &viewport);
}
VKBE_SelectEntity(&r_worldentity);
}
void VKBE_SetupForShadowMap(dlight_t *dl, int texwidth, int texheight, float shadowscale)
{
#define SHADOWMAP_SIZE 512
extern cvar_t r_shadow_shadowmapping_nearclip, r_shadow_shadowmapping_bias;
float nc = dl->nearclip?dl->nearclip:r_shadow_shadowmapping_nearclip.value;
float bias = r_shadow_shadowmapping_bias.value;
//much of the projection matrix cancels out due to symmetry and stuff
//we need to scale between -0.5,0.5 within the sub-image. the fragment shader will center on the subimage based upon the major axis.
//in d3d, the depth value is scaled between 0 and 1 (gl is -1 to 1).
//d3d's framebuffer is upside down or something annoying like that.
shaderstate.lightshadowmapproj[0] = shadowscale * (1.0-(1.0/texwidth)) * 0.5/3.0; //pinch x inwards
shaderstate.lightshadowmapproj[1] = -shadowscale * (1.0-(1.0/texheight)) * 0.5/2.0; //pinch y inwards
shaderstate.lightshadowmapproj[2] = 0.5*(dl->radius+nc)/(nc-dl->radius); //proj matrix 10
shaderstate.lightshadowmapproj[3] = (dl->radius*nc)/(nc-dl->radius) - bias*nc*(1024/texheight); //proj matrix 14
shaderstate.lightshadowmapscale[0] = 1.0/(SHADOWMAP_SIZE*3);
shaderstate.lightshadowmapscale[1] = -1.0/(SHADOWMAP_SIZE*2);
}
//FIXME: needs context for threading
void VKBE_BeginShadowmapFace(void)
{
VkRect2D wrekt;
VkViewport viewport;
viewport.x = r_refdef.pxrect.x;
viewport.y = r_refdef.pxrect.maxheight - (r_refdef.pxrect.y+r_refdef.pxrect.height); //silly GL...
viewport.width = r_refdef.pxrect.width;
viewport.height = r_refdef.pxrect.height;
viewport.minDepth = 0;
viewport.maxDepth = 1;
vkCmdSetViewport(vk.rendertarg->cbuf, 0, 1, &viewport);
wrekt.offset.x = viewport.x;
wrekt.offset.y = viewport.y;
wrekt.extent.width = viewport.width;
wrekt.extent.height = viewport.height;
vkCmdSetScissor(vk.rendertarg->cbuf, 0, 1, &wrekt);
}
#endif
void VKBE_DrawWorld (batch_t **worldbatches)
{
batch_t *batches[SHADER_SORT_COUNT];
RSpeedLocals();
shaderstate.curentity = NULL;
{
VkViewport viewport;
viewport.x = r_refdef.pxrect.x;
viewport.y = r_refdef.pxrect.y;
viewport.width = r_refdef.pxrect.width;
viewport.height = r_refdef.pxrect.height;
viewport.minDepth = 0;
viewport.maxDepth = 1;
vkCmdSetViewport(vk.rendertarg->cbuf, 0, 1, &viewport);
}
if (!r_refdef.recurse)
{
if (shaderstate.wbatch > shaderstate.maxwbatches)
{
int newm = shaderstate.wbatch;
Z_Free(shaderstate.wbatches);
shaderstate.wbatches = Z_Malloc(newm * sizeof(*shaderstate.wbatches));
memset(shaderstate.wbatches + shaderstate.maxwbatches, 0, (newm - shaderstate.maxwbatches) * sizeof(*shaderstate.wbatches));
shaderstate.maxwbatches = newm;
}
shaderstate.wbatch = 0;
}
RSpeedRemark();
shaderstate.curdlight = NULL;
//fixme: figure out some way to safely orphan this data so that we can throw the rest to a worker.
BE_GenModelBatches(batches, shaderstate.curdlight, BEM_STANDARD, r_refdef.scenevis);
BE_UploadLightmaps(false);
if (r_refdef.scenevis)
{
//make sure the world draws correctly
r_worldentity.shaderRGBAf[0] = 1;
r_worldentity.shaderRGBAf[1] = 1;
r_worldentity.shaderRGBAf[2] = 1;
r_worldentity.shaderRGBAf[3] = 1;
r_worldentity.axis[0][0] = 1;
r_worldentity.axis[1][1] = 1;
r_worldentity.axis[2][2] = 1;
#ifdef RTLIGHTS
if (r_refdef.scenevis && r_shadow_realtime_world.ival)
shaderstate.identitylighting = r_shadow_realtime_world_lightmaps.value;
else
#endif
shaderstate.identitylighting = r_lightmap_scale.value;
shaderstate.identitylighting *= r_refdef.hdr_value;
shaderstate.identitylightmap = shaderstate.identitylighting / (1<<gl_overbright.ival);
if (r_lightprepass)
{
//set up render target for gbuffer
//draw opaque gbuffers
//switch render targets to lighting (renderpasses?)
//draw lpp lights
//revert to screen
//draw opaques again.
}
else
{
VKBE_SelectMode(BEM_STANDARD);
VKBE_SubmitMeshes(worldbatches, batches, SHADER_SORT_PORTAL, SHADER_SORT_SEETHROUGH+1);
RSpeedEnd(RSPEED_OPAQUE);
#ifdef RTLIGHTS
RSpeedRemark();
VKBE_SelectEntity(&r_worldentity);
Sh_DrawLights(r_refdef.scenevis);
RSpeedEnd(RSPEED_RTLIGHTS);
#endif
}
RSpeedRemark();
VKBE_SubmitMeshes(worldbatches, batches, SHADER_SORT_SEETHROUGH+1, SHADER_SORT_COUNT);
RSpeedEnd(RSPEED_TRANSPARENTS);
if (r_wireframe.ival)
{
VKBE_SelectMode(BEM_WIREFRAME);
VKBE_SubmitMeshes(worldbatches, batches, SHADER_SORT_PORTAL, SHADER_SORT_NEAREST);
VKBE_SelectMode(BEM_STANDARD);
}
}
else
{
shaderstate.identitylighting = 1;
shaderstate.identitylightmap = 1;
VKBE_SubmitMeshes(NULL, batches, SHADER_SORT_PORTAL, SHADER_SORT_COUNT);
RSpeedEnd(RSPEED_TRANSPARENTS);
}
R_RenderDlights ();
if (r_refdef.recurse)
RQ_RenderBatch();
else
RQ_RenderBatchClear();
shaderstate.identitylighting = 1;
BE_RotateForEntity(&r_worldentity, NULL);
}
void VKBE_VBO_Begin(vbobctx_t *ctx, size_t maxsize)
{
struct stagingbuf *n = Z_Malloc(sizeof(*n));
ctx->vboptr[0] = n;
ctx->maxsize = maxsize;
ctx->pos = 0;
ctx->fallback = VKBE_CreateStagingBuffer(n, maxsize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
//preallocate the target buffer, so we can prematurely refer to it.
{
VkBufferCreateInfo bufinf = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
bufinf.flags = 0;
bufinf.size = n->size;
bufinf.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT;
bufinf.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufinf.queueFamilyIndexCount = 0;
bufinf.pQueueFamilyIndices = NULL;
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->retbuf);
}
}
void VKBE_VBO_Data(vbobctx_t *ctx, void *data, size_t size, vboarray_t *varray)
{
struct stagingbuf *n = ctx->vboptr[0];
varray->vk.offs = ctx->pos;
varray->vk.buff = n->retbuf;
ctx->pos += size;
memcpy((char*)ctx->fallback + varray->vk.offs, data, size);
}
void VKBE_VBO_Finish(vbobctx_t *ctx, void *edata, size_t esize, vboarray_t *earray, void **vbomem, void **ebomem)
{
struct stagingbuf *n;
struct stagingbuf ebo;
vk_poolmem_t *poolmem;
index_t *map = VKBE_CreateStagingBuffer(&ebo, esize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
memcpy(map, edata, esize);
*ebomem = poolmem = Z_Malloc(sizeof(*poolmem));
earray->vk.buff = VKBE_FinishStaging(&ebo, poolmem);
earray->vk.offs = 0;
if (ctx)
{
n = ctx->vboptr[0];
*vbomem = poolmem = Z_Malloc(sizeof(*poolmem));
/*buffer was pre-created*/VKBE_FinishStaging(n, poolmem);
Z_Free(n);
}
}
void VKBE_VBO_Destroy(vboarray_t *vearray, void *mem)
{
vk_poolmem_t *poolmem = mem;
struct fencedbufferwork *fence;
if (!vearray->vk.buff)
return; //not actually allocated...
fence = VK_AtFrameEnd(VKBE_DoneBufferStaging, NULL, sizeof(*fence));
fence->buf = vearray->vk.buff;
fence->mem = *poolmem;
Z_Free(poolmem);
}
void VKBE_Scissor(srect_t *rect)
{
VkRect2D wrekt;
if (rect)
{
wrekt.offset.x = rect->x * vid.fbpwidth;
wrekt.offset.y = (1 - (rect->height + rect->y))*vid.fbpheight; //our api was made for gl. :(
wrekt.extent.width = rect->width * vid.fbpwidth;
wrekt.extent.height = rect->height * vid.fbpheight;
if (wrekt.offset.x+wrekt.extent.width > vid.fbpwidth)
wrekt.extent.width = vid.fbpwidth - wrekt.offset.x;
if (wrekt.offset.y+wrekt.extent.height > vid.fbpheight)
wrekt.extent.height = vid.fbpheight - wrekt.offset.y;
if (wrekt.offset.x < 0)
{
wrekt.extent.width += wrekt.offset.x;
wrekt.offset.x = 0;
}
if (wrekt.offset.y < 0)
{
wrekt.extent.height += wrekt.offset.x;
wrekt.offset.y = 0;
}
}
else
{
wrekt.offset.x = 0;
wrekt.offset.y = 0;
wrekt.extent.width = vid.fbpwidth;
wrekt.extent.height = vid.fbpheight;
}
vkCmdSetScissor(vk.rendertarg->cbuf, 0, 1, &wrekt);
}
#endif