1
0
Fork 0
forked from fte/fteqw

Add vk_khr_ray_query support as an option instead of using shadowmaps.

currently world-only shadows, and inefficient to compute.
This commit is contained in:
Shpoike 2024-04-13 22:13:06 +01:00
parent 1a446879d8
commit f61f98fe6f
24 changed files with 6649 additions and 7264 deletions

View file

@ -785,7 +785,7 @@ static void MenuDrawItems(int xpos, int ypos, menuoption_t *option, emenu_t *men
if (option->check.text)
{
Draw_FunStringWidth(x, y, option->check.text, option->check.textwidth, true, !menu->cursoritem && menu->selecteditem == option);
Draw_FunStringWidth(x, y, option->check.text, option->check.textwidth, true, (!menu->cursoritem && menu->selecteditem == option) | ((option->check.var && (option->check.var->flags&CVAR_RENDEREROVERRIDE))?4:0));
x += option->check.textwidth + 3*8;
}
#if 0

View file

@ -1613,7 +1613,7 @@ void M_Menu_FPS_f (void)
emenu_t *menu;
// fpsmenuinfo_t *info;
extern cvar_t v_contentblend, show_fps, cl_r2g, cl_gibfilter, cl_expsprite, cl_deadbodyfilter, cl_lerp_players, cl_nolerp, cl_maxfps, cl_yieldcpu;
extern cvar_t v_contentblend, show_fps, cl_r2g, cl_gibfilter, cl_expsprite, cl_deadbodyfilter, cl_lerp_players, cl_nolerp, cl_maxfps, cl_yieldcpu, r_halfrate;
static menuresel_t resel;
int y;
menu = M_Options_Title(&y, 0);
@ -1633,6 +1633,7 @@ void M_Menu_FPS_f (void)
MB_COMBOCVAR("Show FPS", show_fps, fpsopts, fpsvalues, "Display FPS or frame millisecond values on screen. Settings except immediate are for values across 1 second."),
MB_EDITCVARSLIM("Framerate Limiter", cl_maxfps.name, "Limits the maximum framerate. Set to 0 for none."),
MB_CHECKBOXCVARTIP("Yield CPU", cl_yieldcpu, 1, "Reduce CPU usage between frames.\nShould probably be off when using vsync."),
MB_CHECKBOXCVARTIP("Half-Rate Shading", r_halfrate, 0, "Reduce the number of shader invocations to save gpu time (doesn't harm edges)."),
MB_COMBOCVAR("Player lerping", cl_lerp_players, playerlerpopts, values_0_1, "Smooth movement of other players, but will increase effective latency. Does not affect all network protocols."),
MB_COMBOCVAR("Entity lerping", cl_nolerp, entlerpopts, values_0_1_2, "Smooth movement of entities, but will increase effective latency."),
MB_CHECKBOXCVAR("Content Blend", v_contentblend, 0),
@ -2089,6 +2090,9 @@ void M_Menu_Lighting_f (void)
MB_COMBORETURN("Lighting Mode", lightingopts, lightselect, info->lightcombo, "Selects method used for world lighting. Realtime lighting requires appropriate realtime lighting files for maps."),
MB_COMBORETURN("Dynamic Lighting Mode", dlightopts, dlightselect, info->dlightcombo, "Selects method used for dynamic lighting such as explosion lights and muzzle flashes."),
#ifdef RTLIGHTS
#ifdef VKQUAKE
MB_CHECKBOXCVARTIP("Raytrace Shadows", r_shadow_raytrace, 0, "Enables raytraced shadows when supported by hardware+drivers. Consider combining with half-rate shading."),
#endif
MB_CHECKBOXCVARTIP("Soft Shadows", r_shadow_shadowmapping, 0, "Enables softer shadows instead of course-edged pixelated shadows."),
MB_CMD("Apply Lighting", M_VideoApplyShadowLighting, "Applies set lighting modes and restarts video."),
MB_SPACING(4),
@ -3030,7 +3034,7 @@ void M_Menu_Video_f (void)
"OpenGL",
#endif
#ifdef VKQUAKE
"Vulkan (Experimental)",
"Vulkan",
#endif
#ifdef D3D8QUAKE
"Direct3D 8 (limited)",

View file

@ -674,7 +674,9 @@ extern cvar_t r_shadow_realtime_dlight_diffuse;
extern cvar_t r_shadow_realtime_dlight_specular;
extern cvar_t r_shadow_realtime_world, r_shadow_realtime_world_shadows, r_shadow_realtime_world_lightmaps, r_shadow_realtime_world_importlightentitiesfrommap;
extern float r_shadow_realtime_world_lightmaps_force;
extern cvar_t r_shadow_raytrace;
extern cvar_t r_shadow_shadowmapping;
extern cvar_t r_halfrate;
extern cvar_t r_mirroralpha;
extern cvar_t r_wateralpha;
extern cvar_t r_lavaalpha;

View file

@ -1506,6 +1506,8 @@ qboolean R_ApplyRenderer_Load (rendererstate_t *newr)
vid.dpi_x = 96;
vid.dpi_y = 96;
Cvar_ApplyLatches(CVAR_RENDEREROVERRIDE, true);
#ifndef CLIENTONLY
sv.world.lastcheckpvs = NULL;
#endif
@ -2355,6 +2357,8 @@ static void R_EnumeratedRenderer(void *ctx, const char *devname, const char *out
rendererinfo_t *r = ctx;
char quoteddesc[1024];
qboolean iscurrent = (currentrendererstate.renderer == r && (!*devname || !strcmp(devname, currentrendererstate.subrenderer)));
const char *dev;
if (*outputname)
dev = va("%s %s %s", r->name[0], devname, outputname);
@ -2368,14 +2372,14 @@ static void R_EnumeratedRenderer(void *ctx, const char *devname, const char *out
Con_Printf("^[%s (%s, %s)\\type\\/setrenderer %s^]^7: %s%s\n",
r->name[0], devname, outputname, //link text
dev, //link itself.
desc, (currentrendererstate.renderer == r)?" ^2(current)":"");
desc, iscurrent?" ^2(current)":"");
else if (*devname)
Con_Printf("^[%s (%s)\\type\\/setrenderer %s^]^7: %s%s\n",
r->name[0], devname, //link text
dev, //link itself.
desc, (currentrendererstate.renderer == r)?" ^2(current)":"");
desc, iscurrent?" ^2(current)":"");
else
Con_Printf("^[%s\\type\\/setrenderer %s^]^7: %s%s\n", r->name[0], dev, r->description, (currentrendererstate.renderer == r)?" ^2(current)":"");
Con_Printf("^[%s\\type\\/setrenderer %s^]^7: %s%s\n", r->name[0], dev, r->description, iscurrent?" ^2(current)":"");
}
void R_SetRenderer_f (void)

View file

@ -233,7 +233,10 @@ void Draw_FunStringWidthFont(struct font_s *font, float x, float y, const void *
//be generous and round up, to avoid too many issues with truncations
width = ceil((width*(float)vid.rotpixelwidth)/vid.width);
codeflags = (highlight&1)?CON_ALTMASK:CON_WHITEMASK;
if (highlight&4)
codeflags = COLOR_GREY<<CON_FGSHIFT;
else
codeflags = (highlight&1)?CON_ALTMASK:CON_WHITEMASK;
if (highlight&2)
codeflags |= CON_BLINKTEXT;
COM_ParseFunString(codeflags, str, buffer, sizeof(buffer), false);

View file

@ -247,6 +247,8 @@ static char *Cvar_FlagToName(int flag)
return "";
case CVAR_NORESET:
return "noreset";
case CVAR_RENDEREROVERRIDE:
return "rendereroverride";
}
return NULL;
@ -929,6 +931,8 @@ static cvar_t *Cvar_SetCore (cvar_t *var, const char *value, qboolean force)
latch = "variable %s will be changed after a vid_restart\n";
else if (var->flags & CVAR_RENDERERLATCH && qrenderer != QR_NONE)
latch = "variable %s will be changed after a vid_reload\n";
else if (var->flags & CVAR_RENDEREROVERRIDE && qrenderer != QR_NONE)
latch = "variable %s is not supported by the current renderer/gpu/drivers\n";
#endif
else if (var->flags & CVAR_RULESETLATCH)
latch = "variable %s is latched due to current ruleset\n";
@ -1463,6 +1467,7 @@ void Cvar_LockFromServer(cvar_t *var, const char *str)
Con_DPrintf("Server taking control of cvar %s (%s)\n", var->name, str);
var->flags |= CVAR_SERVEROVERRIDE;
}
var->flags &= ~CVAR_RENDEREROVERRIDE;
oldlatch = var->latched_string;
if (oldlatch) //maintaining control
@ -1477,6 +1482,20 @@ void Cvar_LockFromServer(cvar_t *var, const char *str)
var->latched_string = oldlatch; //keep track of the original value.
}
//not all renderers support all cvars. lets latch some of them if they're unavailable.
void Cvar_LockUnsupportedRendererCvar(cvar_t *var, const char *str)
{
char *oldlatch;
if (var->latched_string)
return; //err... its not going to do anything anyway so just leave it.
if (!(var->flags & CVAR_RENDEREROVERRIDE))
var->flags |= CVAR_RENDEREROVERRIDE;
oldlatch = (char*)Z_StrDup(var->string);
Cvar_SetCore (var, str, true); //will use all, quote included
var->latched_string = oldlatch; //keep track of the original value.
}
/*
============

View file

@ -141,6 +141,7 @@ typedef struct cvar_group_s
#define CVAR_WATCHED (1<<22) //report any attempts to change this cvar.
#define CVAR_VIDEOLATCH (1<<23)
#define CVAR_WARNONCHANGE (1<<24) //print a warning when changed to a value other than its default.
#define CVAR_RENDEREROVERRIDE (1<<25) //the renderer has forced the cvar to indicate that only that value is supported
#define CVAR_LASTFLAG CVAR_VIDEOLATCH
@ -154,6 +155,7 @@ cvar_t *Cvar_Get2 (const char *var_name, const char *value, int flags, const cha
#define Cvar_Get(n,v,f,g) Cvar_Get2(n,v,f,NULL,g)
void Cvar_LockFromServer(cvar_t *var, const char *str);
void Cvar_LockUnsupportedRendererCvar(cvar_t *var, const char *str);
qboolean Cvar_Register (cvar_t *variable, const char *cvargroup);
// registers a cvar that already has the name, string, and optionally the

View file

@ -1949,7 +1949,7 @@ void QCBUILTIN PF_cvar_type (pubprogfuncs_t *prinst, struct globalvars_s *pr_glo
ret |= CVAR_TYPEFLAG_ENGINE;
if (v->description)
ret |= CVAR_TYPEFLAG_HASDESCRIPTION;
if (v->flags & CVAR_NOSET)
if (v->flags & (CVAR_NOSET|CVAR_RENDEREROVERRIDE))
ret |= CVAR_TYPEFLAG_READONLY;
}
G_FLOAT(OFS_RETURN) = ret;

View file

@ -88,8 +88,8 @@ static struct {
// int vbo_texcoords[SHADER_PASS_MAX];
// int vbo_deforms; //holds verticies... in case you didn't realise.
const shader_t *shader_light[1u<<LSHADER_MODES];
qboolean inited_shader_light[1u<<LSHADER_MODES];
const shader_t *shader_light[LSHADER_MODES];
qboolean inited_shader_light[LSHADER_MODES];
const shader_t *crepskyshader;
const shader_t *crepopaqueshader;

View file

@ -24,6 +24,9 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include "quakedef.h"
#ifndef SERVERONLY
#include "glquake.h"
#ifdef VKQUAKE
#include "../vk/vkrenderer.h"
#endif
#include "shader.h"
#include "hash.h"
@ -46,7 +49,7 @@ cvar_t r_forceprogramify = CVARAFD("r_forceprogramify", "0", "dpcompat_makeshitu
cvar_t dpcompat_nopremulpics = CVARFD("dpcompat_nopremulpics", "0", CVAR_SHADERSYSTEM, "By default FTE uses premultiplied alpha for hud/2d images, while DP does not (which results in halos with low-res content). Unfortunately DDS files would need to be recompressed, resulting in visible issues.");
#endif
cvar_t r_glsl_precache = CVARFD("r_glsl_precache", "0", CVAR_SHADERSYSTEM, "Force all relevant glsl permutations to load upfront.");
cvar_t r_halfrate = CVARFD("r_halfrate", "0", CVAR_SHADERSYSTEM, "Use half-rate shading (where supported by gpu).");
cvar_t r_halfrate = CVARFD("r_halfrate", "0", CVAR_ARCHIVE|CVAR_SHADERSYSTEM, "Use half-rate shading (where supported by gpu).");
extern cvar_t r_glsl_offsetmapping_reliefmapping;
extern cvar_t r_drawflat;
@ -1569,19 +1572,18 @@ static qboolean Shader_LoadPermutations(char *name, program_t *prog, char *scrip
size_t offset = 0;
#endif
#ifdef VKQUAKE
if (qrenderer == QR_VULKAN && (qrtype == QR_VULKAN || qrtype == QR_OPENGL))
{ //vulkan can potentially load glsl, f it has the extensions enabled.
if (qrtype == QR_VULKAN && VK_LoadBlob(prog, script, name))
return true;
}
else
#endif
if (qrenderer != qrtype)
{
return false;
}
#ifdef VKQUAKE
if (qrenderer == QR_VULKAN && qrtype == QR_VULKAN)
{ //vulkan 'scripts' are just blobs. could maybe base64 the spirv but eww.
return VK_LoadBlob(prog, script, name);
}
#endif
#if defined(GLQUAKE) || defined(D3DQUAKE)
ver = 0;
@ -1597,6 +1599,7 @@ static qboolean Shader_LoadPermutations(char *name, program_t *prog, char *scrip
prog->name = Z_StrDup(name);
prog->geom = false;
prog->tess = false;
prog->rayquery = false;
prog->calcgens = false;
prog->numsamplers = 0;
prog->defaulttextures = 0;
@ -2852,11 +2855,17 @@ static void Shader_BEMode(parsestate_t *ps, const char **ptr)
//shorthand for rtlights
for (mode = 0; mode < LSHADER_MODES; mode++)
{
if ((mode & LSHADER_RAYQUERY) && !r_shadow_raytrace.ival)
continue; //no. just no.
if ((mode & LSHADER_SMAP) && r_shadow_raytrace.ival)
continue; //don't waste time.
if ((mode & LSHADER_CUBE) && (mode & (LSHADER_SPOT|LSHADER_ORTHO)))
continue; //cube projections don't make sense when the light isn't projecting a cube
if ((mode & LSHADER_ORTHO) && (mode & LSHADER_SPOT))
continue; //ortho+spot are mutually exclusive.
Q_snprintfz(subname, sizeof(subname), "%s%s%s%s%s%s", tokencopy,
Q_snprintfz(subname, sizeof(subname), "%s%s%s%s%s%s%s",
(mode & LSHADER_RAYQUERY)?"rq_":"",
tokencopy,
(mode & LSHADER_SMAP)?"#PCF":"",
(mode & LSHADER_SPOT)?"#SPOT":"",
(mode & LSHADER_CUBE)?"#CUBE":"",

View file

@ -65,6 +65,7 @@ cvar_t r_shadow_realtime_dlight_ambient = CVAR ("r_shadow_realtime_dlight_ambie
cvar_t r_shadow_realtime_dlight_diffuse = CVAR ("r_shadow_realtime_dlight_diffuse", "1");
cvar_t r_shadow_realtime_dlight_specular = CVAR ("r_shadow_realtime_dlight_specular", "4"); //excessive, but noticable. its called stylized, okay? shiesh, some people
cvar_t r_shadow_playershadows = CVARD ("r_shadow_playershadows", "1", "Controls the presence of shadows on the local player.");
cvar_t r_shadow_raytrace = CVARFD ("r_shadow_raytrace", "0", CVAR_ARCHIVE, "Enables use of hardware raytracing for shadows. Consider also using with r_halfrate.");
cvar_t r_shadow_shadowmapping = CVARFD ("r_shadow_shadowmapping", "1", CVAR_ARCHIVE, "Enables soft shadows instead of stencil shadows.");
cvar_t r_shadow_shadowmapping_precision = CVARD ("r_shadow_shadowmapping_precision", "1", "Scales the shadowmap detail level up or down.");
static cvar_t r_shadow_shadowmapping_depthbits = CVARD ("r_shadow_shadowmapping_depthbits", "16", "Shadowmap depth bits. 16, 24, or 32.");
@ -3628,7 +3629,7 @@ qboolean Sh_CullLight(dlight_t *dl, qbyte *vvis)
return false; //please draw this...
}
static void Sh_DrawShadowlessLight(dlight_t *dl, vec3_t colour, vec3_t axis[3], qbyte *vvis)
static void Sh_DrawShadowlessLight(dlight_t *dl, vec3_t colour, vec3_t axis[3], qbyte *vvis, unsigned int lshaderflags)
{
vec3_t mins, maxs;
srect_t rect;
@ -3691,7 +3692,9 @@ static void Sh_DrawShadowlessLight(dlight_t *dl, vec3_t colour, vec3_t axis[3],
RQuantAdd(RQUANT_RTLIGHT_DRAWN, 1);
BE_SelectDLight(dl, colour, axis, dl->fov?LSHADER_SPOT:LSHADER_STANDARD);
if (dl->fov)
lshaderflags |= LSHADER_SPOT;
BE_SelectDLight(dl, colour, axis, lshaderflags);
BE_SelectMode(BEM_LIGHT);
Sh_DrawEntLighting(dl, colour, vvis);
}
@ -3874,6 +3877,8 @@ void Sh_PreGenerateLights(void)
if (((!dl->die)?!r_shadow_realtime_world_shadows.ival:!r_shadow_realtime_dlight_shadows.ival) || (dl->flags & LFLAG_NOSHADOWS))
shadowtype = SMT_SHADOWLESS;
else if (r_shadow_raytrace.ival)
shadowtype = SMT_SHADOWLESS; //shadows are done via acceleration structures set up by the backend. don't need to worry about them here, they're effectively shadowless.
else if (dl->flags & LFLAG_SHADOWMAP || r_shadow_shadowmapping.ival)
shadowtype = SMT_SHADOWMAP;
else
@ -3917,7 +3922,8 @@ void Com_ParseVector(char *str, vec3_t out)
void Sh_CheckSettings(void)
{
extern cvar_t r_shadows;
qboolean canstencil = false, cansmap = false, canshadowless = false;
qboolean canstencil = false, cansmap = false, canshadowless = false, canraytrace = false;
r_shadow_raytrace.ival = r_shadow_raytrace.value;
r_shadow_shadowmapping.ival = r_shadow_shadowmapping.value;
r_shadow_realtime_world.ival = r_shadow_realtime_world.value;
r_shadow_realtime_dlight.ival = r_shadow_realtime_dlight.value;
@ -3930,6 +3936,7 @@ void Sh_CheckSettings(void)
case QR_VULKAN:
canshadowless = true;
cansmap = vk.multisamplebits==VK_SAMPLE_COUNT_1_BIT; //FIXME - we need to render shadowmaps without needing to restart the current scene.
canraytrace = vk.khr_ray_query;
canstencil = false;
break;
#endif
@ -3982,28 +3989,35 @@ void Sh_CheckSettings(void)
Con_Printf("Missing rendering features: realtime %s lighting is not possible.\n", r_shadow_realtime_world.ival?"world":"dynamic");
r_shadow_realtime_world.ival = 0;
r_shadow_realtime_dlight.ival = 0;
r_shadow_raytrace.ival = 0;
}
else if (!canstencil && !cansmap)
{
//no shadow methods available at all.
if ((r_shadow_realtime_world.ival&&r_shadow_realtime_world_shadows.ival)||(r_shadow_realtime_dlight.ival&&r_shadow_realtime_dlight_shadows.ival))
Con_Printf("Missing rendering features: realtime shadows are not possible.\n");
r_shadow_realtime_world_shadows.ival = 0;
r_shadow_realtime_dlight_shadows.ival = 0;
}
else if (!canstencil || !cansmap)
{
//only one shadow method
if (!!r_shadow_shadowmapping.ival != cansmap)
if (canraytrace) //just silently force raytrace on if we're not allowed stencil nor shadowmaps, but can use rt...
r_shadow_raytrace.ival = true;
else
{
if (r_shadow_shadowmapping.ival && ((r_shadow_realtime_world.ival&&r_shadow_realtime_world_shadows.ival)||(r_shadow_realtime_dlight.ival&&r_shadow_realtime_dlight_shadows.ival)))
Con_Printf("Missing rendering features: forcing shadowmapping %s.\n", cansmap?"on":"off");
r_shadow_shadowmapping.ival = cansmap;
//no shadow methods available at all.
if ((r_shadow_realtime_world.ival&&r_shadow_realtime_world_shadows.ival)||(r_shadow_realtime_dlight.ival&&r_shadow_realtime_dlight_shadows.ival))
Con_Printf("Missing rendering features: realtime shadows are not possible.\n");
r_shadow_realtime_world_shadows.ival = 0;
r_shadow_realtime_dlight_shadows.ival = 0;
r_shadow_raytrace.ival = 0;
}
}
else
{
//both shadow methods available.
r_shadow_raytrace.ival = r_shadow_raytrace.ival && canraytrace;
if (!canstencil || !cansmap)
{
//only one shadow method
if (!!r_shadow_shadowmapping.ival != cansmap)
{
if (!r_shadow_raytrace.ival && r_shadow_shadowmapping.ival && ((r_shadow_realtime_world.ival&&r_shadow_realtime_world_shadows.ival)||(r_shadow_realtime_dlight.ival&&r_shadow_realtime_dlight_shadows.ival)))
Con_Printf("Missing rendering features: forcing shadowmapping %s.\n", cansmap?"on":"off");
r_shadow_shadowmapping.ival = cansmap;
}
}
}
cansmap = cansmap && (r_shadows.ival==2);
@ -4234,8 +4248,10 @@ void Sh_DrawLights(qbyte *vis)
((i >= RTL_FIRST)?!r_shadow_realtime_world_shadows.ival:!r_shadow_realtime_dlight_shadows.ival) || //force shadowless when configured that way...
ignoreflags==LFLAG_LIGHTMAP) //scenecache fallback...
{
Sh_DrawShadowlessLight(dl, colour, axis, vis);
Sh_DrawShadowlessLight(dl, colour, axis, vis, LSHADER_STANDARD);
}
else if (r_shadow_raytrace.ival)
Sh_DrawShadowlessLight(dl, colour, axis, vis, LSHADER_RAYQUERY);
else if ((dl->flags & LFLAG_SHADOWMAP) || r_shadow_shadowmapping.ival)
{
Sh_DrawShadowMapLight(dl, colour, axis, vis);
@ -4322,6 +4338,7 @@ void Sh_RegisterCvars(void)
Cvar_Register (&r_shadow_realtime_dlight_shadows, REALTIMELIGHTING);
Cvar_Register (&r_shadow_realtime_world_lightmaps, REALTIMELIGHTING);
Cvar_Register (&r_shadow_playershadows, REALTIMELIGHTING);
Cvar_Register (&r_shadow_raytrace, REALTIMELIGHTING);
Cvar_Register (&r_shadow_shadowmapping, REALTIMELIGHTING);
Cvar_Register (&r_shadow_shadowmapping_precision, REALTIMELIGHTING);
Cvar_Register (&r_shadow_shadowmapping_nearclip, REALTIMELIGHTING);

View file

@ -1409,6 +1409,9 @@ static qboolean GL_CheckExtensions (void *(*getglfunction) (char *name))
qglGenVertexArrays = NULL;
qglBindVertexArray = NULL;
}
Cvar_LockUnsupportedRendererCvar(&r_halfrate, "0");
Cvar_LockUnsupportedRendererCvar(&r_shadow_raytrace, "0");
return true; //all okay.
}

File diff suppressed because it is too large Load diff

View file

@ -544,6 +544,7 @@ typedef struct programshared_s
unsigned explicitsyms:1; //avoid defining symbol names that'll conflict with other glsl (any fte-specific names must have an fte_ prefix)
unsigned tess:1; //has a tessellation control+evaluation shader
unsigned geom:1; //has a geometry shader
unsigned rayquery:1; //needs a top-level acceleration structure.
unsigned warned:1; //one of the permutations of this shader has already been warned about. don't warn about all of them because that's potentially spammy.
unsigned short numsamplers; //shader system can strip any passes above this
unsigned int defaulttextures; //diffuse etc
@ -577,12 +578,13 @@ enum
LSHADER_CUBE=1u<<0, //has a cubemap filter (FIXME: use custom 2d filter on spot lights)
LSHADER_SMAP=1u<<1, //filter based upon a shadowmap instead of stencil/unlit
LSHADER_SPOT=1u<<2, //filter based upon a single spotlight shadowmap
LSHADER_RAYQUERY=1u<<3, //hardware raytrace.
#ifdef LFLAG_ORTHO
LSHADER_ORTHO=1u<<3, //uses a parallel projection(ortho) matrix, with the light source being an entire plane instead of a singular point. which is weird. read: infinitely far away sunlight
LSHADER_MODES=1u<<4,
LSHADER_ORTHO=1u<<4, //uses a parallel projection(ortho) matrix, with the light source being an entire plane instead of a singular point. which is weird. read: infinitely far away sunlight
LSHADER_MODES=1u<<5,
#else
LSHADER_ORTHO=0, //so bitmasks return false
LSHADER_MODES=1u<<3,
LSHADER_MODES=1u<<4,
#endif
LSHADER_FAKESHADOWS=1u<<10, //special 'light' type that isn't a light but still needs a shadowmap. ignores world+bsp shadows.

View file

@ -16,6 +16,7 @@ ALLNAMES:=$(realpath $(ALLNAMES))
VKNAMES=$(realpath $(foreach v,$(NAMES),vulkan/$v.glsl))
VKNAMES+=postproc_fisheye
VKNAMES+=rq_rtlight
VKNAMES:=$(foreach v,$(VKNAMES),vulkanblobs/$(notdir $(basename $v)).fvb)
ALLNAMES+=$(VKNAMES)
@ -25,8 +26,11 @@ generatebuiltinsl: generatebuiltinsl.c
makevulkanblob: makevulkanblob.c
$(CC) $< -o $@
vulkanblobs/rq_%.fvb: vulkan/%.glsl makevulkanblob vulkan/sys/defs.h vulkan/sys/fog.h vulkan/sys/offsetmapping.h vulkan/sys/skeletal.h
@echo Making $@ from $<
@PATH=$(PATH):$(VKSDKPATH) ./makevulkanblob $< $@ rq
vulkanblobs/%.fvb: vulkan/%.glsl makevulkanblob vulkan/sys/defs.h vulkan/sys/fog.h vulkan/sys/offsetmapping.h vulkan/sys/skeletal.h
@echo Making $<
@echo Making $@ from $<
@PATH=$(PATH):$(VKSDKPATH) ./makevulkanblob $< $@
#vulkanblobs/%.fvb: glsl/%.glsl makevulkanblob vulkan/sys/defs.h vulkan/sys/fog.h vulkan/sys/offsetmapping.h vulkan/sys/skeletal.h

View file

@ -44,6 +44,7 @@ char shaders[][64] =
"menutint",
"terrain",
"rtlight",
"rq_rtlight",
""
};

View file

@ -89,7 +89,7 @@ void main ()
#else
//_DOES_ get darker in the shade, despite the light not lighting it at all....
float d = dot(n,e_light_dir);
if (d < 0)
if (d < 0.0)
d *= 13.0/44.0; //a wtfery factor to approximate glquake's anorm_dots.h
light.rgb += d * e_light_mul;
#endif

View file

@ -74,7 +74,7 @@ void generateprogsblob(struct blobheader *prototype, FILE *out, FILE *vert, FILE
}
int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char *glslname)
int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char *glslname, int rayquery)
{
char command[1024];
char tempname[256];
@ -101,6 +101,7 @@ int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char
const char *tmppath = "/tmp/";
char customsamplerlines[16][256];
FILE *glsl, *temp;
snprintf(tempname, sizeof(tempname), "%stemp.tmp", tmppath);
snprintf(tempvert, sizeof(tempvert), "%stemp.vert", tmppath);
@ -120,13 +121,19 @@ int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char
blob->cvarsoffset = sizeof(*blob);
blob->cvarslength = 0;
FILE *glsl = fopen(glslname, "rt");
if (!strncmp(glslname, "rq_", 3))
{ //hack, to avoid copypasta
rayquery = 2;
glsl = fopen(glslname+3, "rt");
}
else
glsl = fopen(glslname, "rt");
if (!glsl)
{
printf("Unable to read %s\n", glslname);
return 0;
}
FILE *temp = fopen(tempname, "wt");
temp = fopen(tempname, "wt");
if (!temp)
printf("Unable to write %s\n", tempname);
while(fgets(command, sizeof(command), glsl))
@ -194,6 +201,12 @@ int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char
blob->cvarslength = cb - ((unsigned char*)blob + blob->cvarsoffset);
constid += size;
}
else if (!strncmp(command, "!!tess", 6))
printf("!!tess not supported\n");
else if (!strncmp(command, "!!geom", 6))
printf("!!geom not supported\n");
else if (!strncmp(command, "!!rayquery", 6))
rayquery = true;
else if (!strncmp(command, "!!permu", 7))
{
char *arg = strtok(command+7, " ,\r\n");
@ -324,12 +337,18 @@ int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char
"uniform sampler2D s_deluxmap2;\n",
"uniform sampler2D s_deluxmap3;\n"
};
int binding = 2;
int binding = 2; //defined in sys/defs.h
inheader = 0;
if (rayquery == 2)
blob->defaulttextures &= ~(1u<<S_SHADOWMAP); //part of the earlier hack.
if (rayquery)
fprintf(temp, "#define RAY_QUERY\n");
fprintf(temp, "#define s_deluxmap s_deluxemap\n");
fprintf(temp, "#define OFFSETMAPPING (cvar_r_glsl_offsetmapping>0)\n");
fprintf(temp, "#define SPECULAR (cvar_gl_specular>0)\n");
fprintf(temp, "#ifdef FRAGMENT_SHADER\n");
if (rayquery)
fprintf(temp, "layout(set=0, binding=%u) uniform accelerationStructureEXT toplevelaccel;\n", binding++);
for (i = 0; i < sizeof(specialnames)/sizeof(specialnames[0]); i++)
{
if (blob->defaulttextures & (1u<<i))
@ -454,31 +473,31 @@ int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char
fclose(temp);
fclose(glsl);
temp = fopen(tempvert, "wt");
fprintf(temp, "#version 460 core\n");
fclose(temp);
temp = fopen(tempfrag, "wt");
fprintf(temp, "#version 460 core\n");
if (rayquery)
fprintf(temp, "#extension GL_EXT_ray_query : require\n");
fclose(temp);
snprintf(command, sizeof(command),
/*preprocess the vertex shader*/
#ifdef _WIN32
"echo #version 450 core > %s && "
#else
"echo \"#version 450 core\" > %s && "
#endif
"cpp %s -I%s -DVULKAN -DVERTEX_SHADER -P >> %s && "
/*preprocess the fragment shader*/
#ifdef _WIN32
"echo #version 450 core > %s && "
#else
"echo \"#version 450 core\" > %s && "
#endif
"cpp %s -I%s -DVULKAN -DFRAGMENT_SHADER -P >> %s && "
/*convert to spir-v (annoyingly we have no control over the output file names*/
"glslangValidator -V -l -d %s %s"
"glslangValidator -g0 -V -l -d %s %s"
/*strip stuff out, so drivers don't glitch out from stuff that we don't use*/
// " && spirv-remap -i vert.spv frag.spv -o vulkan/remap"
,tempvert, tempname, incpath, tempvert //vertex shader args
,tempfrag, tempname, incpath, tempfrag //fragment shader args
,tempname, incpath, tempvert //vertex shader args
,tempname, incpath, tempfrag //fragment shader args
,tempvert, tempfrag); //compile/link args.
system(command);
@ -487,6 +506,8 @@ int generatevulkanblobs(struct blobheader *blob, size_t maxblobsize, const char
// remove(tempvert);
// remove(tempfrag);
if (rayquery)
blob->permutations |= 1u<<31;
return 1;
}
@ -497,6 +518,7 @@ int main(int argc, const char **argv)
FILE *v, *f, *o;
char proto[8192];
char line[256];
int rayquery = (argc>=4)?!!strstr(argv[3], "rq"):0;
int r = 1;
if (argc == 1)
@ -505,7 +527,7 @@ int main(int argc, const char **argv)
return 1;
}
if (!generatevulkanblobs((struct blobheader*)proto, sizeof(proto), inname))
if (!generatevulkanblobs((struct blobheader*)proto, sizeof(proto), inname, rayquery))
return 1;
//should have generated two files
v = fopen("vert.spv", "rb");

View file

@ -63,6 +63,9 @@ layout(location = 1) varying vec3 lightvector;
layout(location = 2) varying vec3 eyevector;
layout(location = 3) varying vec4 vtexprojcoord;
layout(location = 4) varying mat3 invsurface;
#ifdef RAY_QUERY
layout(location = 7) varying vec3 wcoord;
#endif
#ifdef VERTEX_SHADER
@ -101,6 +104,9 @@ void main ()
//for texture projections/shadowmapping on dlights
vtexprojcoord = l_cubematrix*m_model*vec4(w.xyz, 1.0);
}
#ifdef RAY_QUERY
wcoord = vec3(m_model*vec4(w+n*0.1, 1.0)); //push it half a qu away from the face, so we're less likely to get precision errors in the rays.
#endif
}
#endif
@ -110,6 +116,18 @@ void main ()
#ifdef FRAGMENT_SHADER
#include "sys/fog.h"
#ifdef RAY_QUERY
float RayQueryFilter(void)
{
rayQueryEXT rq;
//FIXME: no ortho
#define l_origin e_eyepos
rayQueryInitializeEXT(rq, toplevelaccel, gl_RayFlagsTerminateOnFirstHitEXT, 0xFF, l_lightposition, 0.0, wcoord-l_lightposition, 1.0);
rayQueryProceedEXT(rq);
//TODO: filter it through blended stuff, and alpha-tested stuff.
return float(rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionNoneEXT);
}
#else
//uniform vec4 l_shadowmapproj; //light projection matrix info
//uniform vec2 l_shadowmapscale; //xy are the texture scale, z is 1, w is the scale.
vec3 ShadowmapCoord(void)
@ -217,6 +235,7 @@ float ShadowmapFilter(void)
}
#endif
}
#endif
#include "sys/offsetmapping.h"
@ -295,6 +314,9 @@ void main ()
colorscale*=1.0-(dot(spot,spot));
}
#ifdef RAY_QUERY
colorscale *= RayQueryFilter();
#else
if (arg_pcf)
{
/*filter the light by the shadowmap. logically a boolean, but we allow fractions for softer shadows*/
@ -302,6 +324,7 @@ void main ()
colorscale *= ShadowmapFilter();
// diff = ShadowmapCoord();
}
#endif
#if defined(PROJECTION)
/*2d projection, not used*/

View file

@ -302,6 +302,11 @@ typedef struct
texid_t currentshadowmap;
VkDescriptorSetLayout textureLayout;
#ifdef VK_KHR_acceleration_structure
qboolean needtlas; //frame delay, urgh...
VkAccelerationStructureKHR tlas;
#endif
} vkbackend_t;
#define VERTEXSTREAMSIZE (1024*1024*2) //2mb = 1 PAE jumbo page
@ -439,7 +444,7 @@ static void VK_FinishProg(program_t *prog, const char *name)
{
VkDescriptorSetLayout desclayout;
VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
VkDescriptorSetLayoutBinding dbs[2+MAX_TMUS], *db = dbs;
VkDescriptorSetLayoutBinding dbs[3+MAX_TMUS], *db = dbs;
uint32_t i;
//VkSampler samp = VK_GetSampler(0);
@ -457,6 +462,18 @@ static void VK_FinishProg(program_t *prog, const char *name)
db->pImmutableSamplers = NULL;
db++;
#ifdef VK_KHR_acceleration_structure
if (prog->rayquery)
{
db->binding = db-dbs;
db->descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
db->descriptorCount = 1;
db->stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
db->pImmutableSamplers = NULL;
db++;
}
#endif
for (i = 0; i < 32; i++)
{
if (!(prog->defaulttextures & (1u<<i)))
@ -525,6 +542,36 @@ qboolean VK_LoadBlob(program_t *prog, void *blobdata, const char *name)
return false;
}
prog->supportedpermutations = blob->permutations;
#define VKPERMUTATION_RAYQUERY (1u<<31)
if (blob->permutations&~((PERMUTATIONS-1)|VKPERMUTATION_RAYQUERY))
{
Con_Printf("Blob %s has unknown permutations\n", name);
return false;
}
if (prog->supportedpermutations&VKPERMUTATION_RAYQUERY)
{ //not really a permutation.
#ifndef VK_KHR_ray_query
Con_Printf(CON_ERROR"Blob %s requires vk_khr_ray_query\n", name);
return false;
#else
if (!vk.khr_ray_query)
{ //the actual spv extension. let compiling catch it?
Con_Printf(CON_ERROR"Blob %s requires vk_khr_ray_query\n", name);
return false;
}
if (!vk.khr_acceleration_structure)
{ //what we're meant to be using to feed it... *sigh*
Con_Printf(CON_ERROR"Blob %s requires vk_khr_acceleration_structure\n", name);
return false;
}
prog->supportedpermutations&=~VKPERMUTATION_RAYQUERY;
prog->rayquery = true;
#endif
}
else
prog->rayquery = false;
info.flags = 0;
info.codeSize = blob->vertlength;
info.pCode = (uint32_t*)((char*)blob+blob->vertoffset);
@ -541,7 +588,6 @@ qboolean VK_LoadBlob(program_t *prog, void *blobdata, const char *name)
prog->frag = frag;
prog->numsamplers = blob->numtextures;
prog->defaulttextures = blob->defaulttextures;
prog->supportedpermutations = blob->permutations;
if (blob->cvarslength)
{
@ -684,6 +730,14 @@ static const char LIGHTPASS_SHADER[] = "\
blendfunc add\n\
}\n\
}";
static const char LIGHTPASS_SHADER_RQ[] = "\
{\n\
program rq_rtlight\n\
{\n\
nodepth\n\
blendfunc add\n\
}\n\
}";
void VKBE_Init(void)
{
@ -794,17 +848,28 @@ static struct descpool *VKBE_CreateDescriptorPool(void)
struct descpool *np = Z_Malloc(sizeof(*np));
VkDescriptorPoolCreateInfo dpi = {VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
VkDescriptorPoolSize dpisz[2];
VkDescriptorPoolSize dpisz[3];
dpi.flags = 0;
dpi.maxSets = np->totalsets = 512;
dpi.poolSizeCount = countof(dpisz);
dpi.poolSizeCount = 0;
dpi.pPoolSizes = dpisz;
dpisz[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
dpisz[0].descriptorCount = 2*dpi.maxSets;
dpisz[dpi.poolSizeCount].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
dpisz[dpi.poolSizeCount].descriptorCount = 2*dpi.maxSets;
dpi.poolSizeCount++;
dpisz[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpisz[1].descriptorCount = MAX_TMUS*dpi.maxSets;
dpisz[dpi.poolSizeCount].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
dpisz[dpi.poolSizeCount].descriptorCount = MAX_TMUS*dpi.maxSets;
dpi.poolSizeCount++;
#ifdef VK_KHR_acceleration_structure
if (vk.khr_acceleration_structure)
{
dpisz[dpi.poolSizeCount].type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
dpisz[dpi.poolSizeCount].descriptorCount = dpi.maxSets;
dpi.poolSizeCount++;
}
#endif
VkAssert(vkCreateDescriptorPool(vk.device, &dpi, NULL, &np->pool));
@ -834,13 +899,32 @@ static VkDescriptorSet VKBE_TempDescriptorSet(VkDescriptorSetLayout layout)
return ret;
}
//creates a new dynamic buffer for us to use while streaming. because spoons.
static struct dynbuffer *VKBE_AllocNewBuffer(struct dynbuffer **link, enum dynbuf_e type, VkDeviceSize minsize)
static const struct
{
const char *name;
VkBufferUsageFlags usage;
qboolean nomap;
VkDeviceSize align;
} dynbuf_info[DB_MAX] =
{ //FIXME: set alignment properly.
{"DB_VBO", VK_BUFFER_USAGE_VERTEX_BUFFER_BIT},
{"DB_EBO", VK_BUFFER_USAGE_INDEX_BUFFER_BIT},
{"DB_UBO", VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT},
{"DB_STAGING", VK_BUFFER_USAGE_TRANSFER_SRC_BIT},
#ifdef VK_KHR_acceleration_structure
{"DB_ACCELERATIONSTRUCT", VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR|VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, true, 256},
{"DB_ACCELERATIONSCRATCH", VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, true},
{"DB_ACCELERATIONMESHDATA", VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT},
{"DB_ACCELERATIONINSTANCE", VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT},
#endif
};
//creates a new dynamic buffer for us to use while streaming. because spoons.
static struct dynbuffer *VKBE_AllocNewStreamingBuffer(struct dynbuffer **link, enum dynbuf_e type, VkDeviceSize minsize)
{
VkBufferUsageFlags ufl[] = {VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT};
VkBufferCreateInfo bufinf = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
VkMemoryRequirements mem_reqs;
VkMemoryAllocateInfo memAllocInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
VkMemoryAllocateFlagsInfo memAllocFlagsInfo = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
struct dynbuffer *n = Z_Malloc(sizeof(*n));
qboolean usestaging = (vk_usedynamicstaging & (1u<<type))!=0;
@ -857,27 +941,31 @@ static struct dynbuffer *VKBE_AllocNewBuffer(struct dynbuffer **link, enum dynbu
n->size = bufinf.size;
bufinf.usage = dynbuf_info[type].usage;
if (type != DB_STAGING && usestaging)
{
//create two buffers, one staging/host buffer and one device buffer
bufinf.usage = ufl[type]|VK_BUFFER_USAGE_TRANSFER_DST_BIT;
bufinf.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->devicebuf);
DebugSetName(VK_OBJECT_TYPE_BUFFER, (uint64_t)n->devicebuf, dynbuf_info[type].name);
bufinf.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->stagingbuf);
DebugSetName(VK_OBJECT_TYPE_BUFFER, (uint64_t)n->devicebuf, "DB_AUTOSTAGING");
vkGetBufferMemoryRequirements(vk.device, n->devicebuf, &mem_reqs);
n->align = mem_reqs.alignment-1;
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &n->devicememory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)n->devicememory, "DB_AUTOSTAGING");
VkAssert(vkBindBufferMemory(vk.device, n->devicebuf, n->devicememory, 0));
n->renderbuf = n->devicebuf;
}
else
{ //single buffer. we'll write directly to the buffer.
bufinf.usage = ufl[type];
vkCreateBuffer(vk.device, &bufinf, vkallocationcb, &n->stagingbuf);
DebugSetName(VK_OBJECT_TYPE_BUFFER, (uint64_t)n->stagingbuf, dynbuf_info[type].name);
n->renderbuf = n->stagingbuf;
}
@ -887,6 +975,8 @@ static struct dynbuffer *VKBE_AllocNewBuffer(struct dynbuffer **link, enum dynbu
n->align = mem_reqs.alignment-1;
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = ~0;
if (memAllocInfo.memoryTypeIndex == ~0 && dynbuf_info[type].nomap)
memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
// if (memAllocInfo.memoryTypeIndex == ~0)
// memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
if (memAllocInfo.memoryTypeIndex == ~0 && n->renderbuf == n->stagingbuf) //probably won't get anything, but whatever.
@ -901,22 +991,46 @@ static struct dynbuffer *VKBE_AllocNewBuffer(struct dynbuffer **link, enum dynbu
usestaging = true;
continue;
}
memAllocFlagsInfo.flags = 0;
if (bufinf.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)
memAllocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
if (memAllocFlagsInfo.flags)
memAllocInfo.pNext = &memAllocFlagsInfo;
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &n->stagingmemory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)n->stagingmemory, dynbuf_info[type].name);
VkAssert(vkBindBufferMemory(vk.device, n->stagingbuf, n->stagingmemory, 0));
VkAssert(vkMapMemory(vk.device, n->stagingmemory, 0, n->size, 0, &n->ptr)); //persistent-mapped.
if (dynbuf_info[type].nomap)
{
n->ptr = NULL; //don't want to map this.
n->stagingcoherent = true;
}
else
{
VkAssert(vkMapMemory(vk.device, n->stagingmemory, 0, n->size, 0, &n->ptr)); //persistent-mapped.
n->stagingcoherent = !!(vk.memory_properties.memoryTypes[memAllocInfo.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
n->stagingcoherent = !!(vk.memory_properties.memoryTypes[memAllocInfo.memoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
}
n->next = *link;
*link = n;
return n;
}
}
static void *fte_restrict VKBE_AllocateBufferSpace(enum dynbuf_e type, size_t datasize, VkBuffer *buf, VkDeviceSize *offset)
static void *fte_restrict VKBE_AllocateStreamingSpace(enum dynbuf_e type, size_t datasize, VkBuffer *buf, VkDeviceSize *offset)
{ //FIXME: ubos need alignment
struct dynbuffer *b = vk.dynbuf[type];
void *ret;
if (b->offset + datasize > b->size)
if (!b)
{
if (!vk.frame->dynbufs[type])
VKBE_AllocNewStreamingBuffer(&vk.frame->dynbufs[type], type, datasize);
b = vk.dynbuf[type] = vk.frame->dynbufs[type];
b->offset = b->flushed = 0;
}
if (offset? //urgh...
b->offset + datasize > b->size: //regular offsetable buffer...
(b->offset || datasize > b->size)) //stoopid buffer space that must have the whole buffer to itself for some reason.
{
//flush the old one, just in case.
if (!b->stagingcoherent)
@ -940,17 +1054,23 @@ static void *fte_restrict VKBE_AllocateBufferSpace(enum dynbuf_e type, size_t da
}
if (!b->next)
VKBE_AllocNewBuffer(&b->next, type, datasize);
VKBE_AllocNewStreamingBuffer(&b->next, type, datasize);
b = vk.dynbuf[type] = b->next;
b->offset = 0;
b->flushed = 0;
}
*buf = b->renderbuf;
*offset = b->offset;
if (offset)
*offset = b->offset;
ret = (qbyte*)b->ptr + b->offset;
b->offset += datasize; //FIXME: alignment
if (dynbuf_info[type].align)
{
b->offset += dynbuf_info[type].align;
b->offset &= ~(dynbuf_info[type].align-1);
}
return ret;
}
@ -960,10 +1080,7 @@ void VKBE_InitFramePools(struct vkframe *frame)
{
uint32_t i;
for (i = 0; i < DB_MAX; i++)
{
frame->dynbufs[i] = NULL;
VKBE_AllocNewBuffer(&frame->dynbufs[i], i, 0);
}
frame->descpools = vk.khr_push_descriptor?NULL:VKBE_CreateDescriptorPool();
@ -997,7 +1114,7 @@ void VKBE_FlushDynamicBuffers(void)
for (i = 0; i < DB_MAX; i++)
{
d = vk.dynbuf[i];
if (d->flushed == d->offset)
if (!d || d->flushed == d->offset)
continue;
if (!d->stagingcoherent)
@ -1034,16 +1151,337 @@ void VKBE_Set2D(qboolean twodee)
shaderstate.curtime = realtime;
}
#ifdef VK_KHR_acceleration_structure
static void VKBE_DestroyTLAS(void *ctx)
{
VkAccelerationStructureKHR *tlas = ctx;
vkDestroyAccelerationStructureKHR(vk.device, *tlas, vkallocationcb);
}
static VkDeviceAddress VKBE_GetBufferDeviceAddress(VkBuffer buf)
{
VkBufferDeviceAddressInfo info = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, NULL, buf};
return vkGetBufferDeviceAddress(vk.device, &info);
}
struct blasgeom_s
{
VkBuffer vertbuf;
VkBuffer idxbuf;
VkDeviceSize vertoffset;
VkDeviceSize idxoffset;
size_t numverts;
size_t numtris;
};
static qboolean VKBE_GenerateAccelerationMesh_BSP(model_t *mod, struct blasgeom_s *geom)
{
unsigned int sno;
msurface_t *surf;
mesh_t *mesh;
unsigned int numverts;
unsigned int numindexes,i;
unsigned int *ptr_elements;
vec3_t *ptr_verts;
numverts = 0;
numindexes = 0;
for (sno = 0; sno < mod->nummodelsurfaces; sno++)
{
surf = &mod->surfaces[sno+mod->firstmodelsurface];
if (surf->flags & (SURF_DRAWSKY|SURF_DRAWTURB))
continue;
if (surf->mesh)
{
mesh = surf->mesh;
numverts += mesh->numvertexes;
numindexes += mesh->numindexes;
}
else if (surf->numedges > 2)
{
numverts += surf->numedges;
numindexes += (surf->numedges-2) * 3;
}
}
if (!numindexes)
return false;
geom->idxoffset = geom->vertoffset = 0;
ptr_elements = VKBE_AllocateStreamingSpace(DB_ACCELERATIONMESHDATA, sizeof(*ptr_elements)*numindexes, &geom->idxbuf, &geom->idxoffset);
ptr_verts = VKBE_AllocateStreamingSpace(DB_ACCELERATIONMESHDATA, sizeof(*ptr_verts)*numverts, &geom->vertbuf, &geom->vertoffset);
numverts = 0;
numindexes = 0;
for (sno = 0; sno < mod->nummodelsurfaces; sno++)
{
surf = &mod->surfaces[sno+mod->firstmodelsurface];
if (surf->flags & (SURF_DRAWSKY|SURF_DRAWTURB))
continue;
if (surf->mesh)
{
mesh = surf->mesh;
for (i = 0; i < mesh->numvertexes; i++)
VectorCopy(mesh->xyz_array[i], ptr_verts[numverts+i]);
for (i = 0; i < mesh->numindexes; i+=3)
{
//flip the triangles as we go
ptr_elements[numindexes+i+0] = numverts+mesh->indexes[i+2];
ptr_elements[numindexes+i+1] = numverts+mesh->indexes[i+1];
ptr_elements[numindexes+i+2] = numverts+mesh->indexes[i+0];
}
numverts += mesh->numvertexes;
numindexes += i;
}
else if (surf->numedges > 2)
{
float *vec;
medge_t *edge;
int lindex;
for (i = 0; i < surf->numedges; i++)
{
lindex = mod->surfedges[surf->firstedge + i];
if (lindex > 0)
{
edge = &mod->edges[lindex];
vec = mod->vertexes[edge->v[0]].position;
}
else
{
edge = &mod->edges[-lindex];
vec = mod->vertexes[edge->v[1]].position;
}
VectorCopy(vec, ptr_verts[numverts+i]);
}
for (i = 2; i < surf->numedges; i++)
{
//quake is backwards, not ode
ptr_elements[numindexes++] = numverts+i;
ptr_elements[numindexes++] = numverts+i-1;
ptr_elements[numindexes++] = numverts;
}
numverts += surf->numedges;
}
}
geom->numverts = numverts;
geom->numtris = numindexes/3;
return true;
}
static VkAccelerationStructureKHR VKBE_GenerateBLAS(model_t *mod)
{
struct blasgeom_s geom = {VK_NULL_HANDLE,VK_NULL_HANDLE,0,0};
VkAccelerationStructureCreateInfoKHR asci = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
VkAccelerationStructureBuildGeometryInfoKHR asbgi = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
uint32_t maxPrimitiveCounts[1] = {0};
VkAccelerationStructureGeometryKHR asg[1];
VkAccelerationStructureBuildRangeInfoKHR asbri[1];
VkAccelerationStructureBuildRangeInfoKHR const *const asbrip = {asbri};
VkAccelerationStructureBuildSizesInfoKHR asbsi = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
VkBuffer transformbuf, scratchbuf;
VkDeviceSize transformoffset = 0;
//this is stupid. oh well.
VkTransformMatrixKHR *transform;
transform = VKBE_AllocateStreamingSpace(DB_ACCELERATIONINSTANCE, sizeof(*transform), &transformbuf, &transformoffset);
Vector4Set(transform->matrix[0], 1,0,0,0);
Vector4Set(transform->matrix[1], 0,1,0,0);
Vector4Set(transform->matrix[2], 0,0,1,0);
//FIXME: use of VKBE_AllocateStreamingSpace on the geomdata, transform, and blas storage itself mean we can only use this for a single frame, regenerating each time. which is wasteful for a blas that contains the entire worldmodel.
VKBE_GenerateAccelerationMesh_BSP(mod, &geom);
asg[0].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
asg[0].pNext = NULL;
asg[0].flags = VK_GEOMETRY_OPAQUE_BIT_KHR;
asg[0].geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
asg[0].geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
asg[0].geometry.triangles.pNext = NULL;
asg[0].geometry.triangles.vertexFormat = VK_FORMAT_R32G32B32_SFLOAT;
asg[0].geometry.triangles.vertexData.deviceAddress = VKBE_GetBufferDeviceAddress(geom.vertbuf);
asg[0].geometry.triangles.vertexStride = sizeof(vec3_t);
asg[0].geometry.triangles.maxVertex = geom.numverts;
asg[0].geometry.triangles.indexType = VK_INDEX_TYPE_UINT32;
asg[0].geometry.triangles.indexData.deviceAddress = VKBE_GetBufferDeviceAddress(geom.idxbuf);
asg[0].geometry.triangles.transformData.deviceAddress = VKBE_GetBufferDeviceAddress(transformbuf);
asbri[0].firstVertex = geom.vertoffset/sizeof(vec3_t);
asbri[0].primitiveCount = maxPrimitiveCounts[0] = geom.numtris;
asbri[0].primitiveOffset = geom.idxoffset;
asbri[0].transformOffset = transformoffset;
asci.createFlags = 0;
asci.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
asci.deviceAddress = 0; //no overriding here.
asbgi.type = asci.type;
asbgi.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR /* | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR*/;
asbgi.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
asbgi.srcAccelerationStructure = NULL; //ignored here
asbgi.dstAccelerationStructure = NULL; //filled in later
asbgi.geometryCount = countof(asg);
asbgi.pGeometries = asg;
asbgi.ppGeometries = NULL; //too much indirection! oh noes!
VKBE_FlushDynamicBuffers();
vkGetAccelerationStructureBuildSizesKHR(vk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &asbgi, maxPrimitiveCounts, &asbsi);
VKBE_AllocateStreamingSpace(DB_ACCELERATIONSTRUCT, asci.size = asbsi.accelerationStructureSize, &asci.buffer, &asci.offset);
VKBE_AllocateStreamingSpace(DB_ACCELERATIONSCRATCH, asbsi.buildScratchSize, &scratchbuf, NULL);
asbgi.scratchData.deviceAddress = VKBE_GetBufferDeviceAddress(scratchbuf);
vkCreateAccelerationStructureKHR(vk.device, &asci, vkallocationcb, &asbgi.dstAccelerationStructure);
DebugSetName(VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, (uint64_t)asbgi.dstAccelerationStructure, "ShadowBLAS");
vkCmdBuildAccelerationStructuresKHR(vk.rendertarg->cbuf, 1, &asbgi, &asbrip);
{
VkMemoryBarrier membarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER};
membarrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
membarrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
vkCmdPipelineBarrier(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1, &membarrier, 0, NULL, 0, NULL);
}
return asbgi.dstAccelerationStructure;
}
static VkAccelerationStructureKHR VKBE_GenerateTLAS(void)
{
VkAccelerationStructureKHR blas = VKBE_GenerateBLAS(r_worldentity.model);
VkAccelerationStructureDeviceAddressInfoKHR blasinfo = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, NULL, blas};
VkAccelerationStructureCreateInfoKHR asci = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR};
VkAccelerationStructureBuildGeometryInfoKHR asbgi = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR};
uint32_t maxPrimitiveCounts[1] = {0};
VkAccelerationStructureGeometryKHR asg[1];
VkAccelerationStructureBuildRangeInfoKHR asbri[1];
VkAccelerationStructureBuildRangeInfoKHR const *const asbrip = {asbri};
VkAccelerationStructureBuildSizesInfoKHR asbsi = {VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR};
VkBuffer instancesbuf, scratchbuf;
VkDeviceSize instancesofs = 0;
size_t numinstances = 1;
VkAccelerationStructureInstanceKHR *instances = VKBE_AllocateStreamingSpace(DB_ACCELERATIONINSTANCE, sizeof(*instances)*numinstances, &instancesbuf, &instancesofs);
#if 0
batch_t **worldbatches = r_worldentity.model->batches; //FIXME
batch_t *batch;
int i, id = 0;
for (i = 0; i < SHADER_SORT_COUNT; i++)
{
if (worldbatches)
{
for (batch = worldbatches[i]; batch; batch = batch->next)
{
if (batch->meshes == batch->firstmesh)
continue; //nothing to do...
if (batch->buildmeshes)
batch->buildmeshes(batch);
{
shader_t *shader = batch->shader;
unsigned int bf;
unsigned int nummeshes = batch->meshes - batch->firstmesh;
if (!nummeshes)
continue;
//ubo[id].stuff = ...;
VectorCopy(batch->ent->axis[0], instances->transform.matrix[0]); instances->transform.matrix[0][3] = batch->ent->origin[0];
VectorCopy(batch->ent->axis[1], instances->transform.matrix[1]); instances->transform.matrix[1][3] = batch->ent->origin[1];
VectorCopy(batch->ent->axis[2], instances->transform.matrix[2]); instances->transform.matrix[2][3] = batch->ent->origin[2];
instances->instanceCustomIndex = id++; //extra info
if (batch->shader->flags & SHADER_SKY)
instances->mask = 0x2;
else if (batch->shader->sort > SHADER_SORT_OPAQUE)
instances->mask = 0x4;
else
instances->mask = 0x1;
instances->instanceShaderBindingTableRecordOffset = shader->id; //material id
if (shader->flags & SHADER_CULL_FRONT)
instances->flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR;
else if (shader->flags & SHADER_CULL_BACK)
instances->flags = 0;
else
instances->flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
instances->accelerationStructureReference = batch->blas; //no half measures
}
}
}
//and non-world too... may require temporary blas for lerping/skeletal models.
}
#else
Vector4Set(instances->transform.matrix[0], 1,0,0,0);
Vector4Set(instances->transform.matrix[1], 0,1,0,0);
Vector4Set(instances->transform.matrix[2], 0,0,1,0);
instances->instanceCustomIndex = 0; //index into our ssbo... if we had one...
instances->mask = 0x01;
instances->instanceShaderBindingTableRecordOffset = 0; //FIXME: alphamasked stuff needs a texture somehow
instances->flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; //FIXME: optimise
instances->accelerationStructureReference = vkGetAccelerationStructureDeviceAddressKHR(vk.device, &blasinfo);
#endif
asg[0].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
asg[0].pNext = NULL;
asg[0].flags = VK_GEOMETRY_OPAQUE_BIT_KHR;
asg[0].geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
asg[0].geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
asg[0].geometry.instances.pNext = NULL;
asg[0].geometry.instances.arrayOfPointers = false;
asg[0].geometry.instances.data.deviceAddress = VKBE_GetBufferDeviceAddress(instancesbuf);
asbri[0].firstVertex = 0;
asbri[0].primitiveCount = maxPrimitiveCounts[0] = numinstances;
asbri[0].primitiveOffset = instancesofs;
asbri[0].transformOffset = 0;
asci.createFlags = 0;
asci.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
asci.deviceAddress = 0; //no overriding here.
asbgi.type = asci.type;
asbgi.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR /* | VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR*/;
asbgi.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
asbgi.srcAccelerationStructure = NULL; //ignored here
asbgi.dstAccelerationStructure = NULL; //filled in later
asbgi.geometryCount = countof(asg);
asbgi.pGeometries = asg;
asbgi.ppGeometries = NULL; //too much indirection! oh noes!
VKBE_FlushDynamicBuffers();
vkGetAccelerationStructureBuildSizesKHR(vk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &asbgi, maxPrimitiveCounts, &asbsi);
VKBE_AllocateStreamingSpace(DB_ACCELERATIONSTRUCT, asci.size = asbsi.accelerationStructureSize, &asci.buffer, &asci.offset);
VKBE_AllocateStreamingSpace(DB_ACCELERATIONSCRATCH, asbsi.buildScratchSize, &scratchbuf, NULL);
asbgi.scratchData.deviceAddress = VKBE_GetBufferDeviceAddress(scratchbuf);
vkCreateAccelerationStructureKHR(vk.device, &asci, vkallocationcb, &asbgi.dstAccelerationStructure);
DebugSetName(VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, (uint64_t)asbgi.dstAccelerationStructure, "ShadowTLAS");
vkCmdBuildAccelerationStructuresKHR(vk.rendertarg->cbuf, 1, &asbgi, &asbrip);
VK_AtFrameEnd(VKBE_DestroyTLAS, &asbgi.dstAccelerationStructure, sizeof(asbgi.dstAccelerationStructure)); //clean up the tlas, each frame gets a new one.
VK_AtFrameEnd(VKBE_DestroyTLAS, &blas, sizeof(blas)); //clean up the tlas, each frame gets a new one.
{
VkMemoryBarrier membarrier = {VK_STRUCTURE_TYPE_MEMORY_BARRIER};
membarrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
membarrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
vkCmdPipelineBarrier(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1, &membarrier, 0, NULL, 0, NULL);
}
//FIXME: use a compute cbuf, add a fence to block the rtlight queries until this BVH is done.
//vkResetEvent
//vkCmdSetEvent
//vkCmdPipelineBarrier
return asbgi.dstAccelerationStructure;
}
#endif
//called at the start of each frame
//resets the working dynamic buffers to this frame's storage, to avoid stepping on frames owned by the gpu
void VKBE_RestartFrame(void)
{
uint32_t i;
for (i = 0; i < DB_MAX; i++)
{
vk.dynbuf[i] = vk.frame->dynbufs[i];
vk.dynbuf[i]->offset = vk.dynbuf[i]->flushed = 0;
}
vk.dynbuf[i] = NULL;
shaderstate.rc.activepipeline = VK_NULL_HANDLE;
vk.descpool = vk.frame->descpools;
@ -1052,6 +1490,14 @@ void VKBE_RestartFrame(void)
vkResetDescriptorPool(vk.device, vk.descpool->pool, 0);
vk.descpool->availsets = vk.descpool->totalsets;
}
#ifdef VK_KHR_acceleration_structure
if (vk.khr_ray_query && r_worldentity.model && shaderstate.needtlas)
shaderstate.tlas = VKBE_GenerateTLAS();
else
shaderstate.tlas = VK_NULL_HANDLE;
shaderstate.needtlas = false;
#endif
}
void VKBE_ShutdownFramePools(struct vkframe *frame)
@ -1674,7 +2120,7 @@ static void BE_GenerateColourMods(unsigned int vertcount, const shaderpass_t *pa
//we can at least ensure that the data is written in one go to aid cpu cache.
vec4_t *fte_restrict map;
unsigned int mno;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), buffer, offset);
map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec4_t), buffer, offset);
if (m->colors4f_array[0])
{
for (mno = 0; mno < shaderstate.nummeshes; mno++)
@ -1706,7 +2152,7 @@ static void BE_GenerateColourMods(unsigned int vertcount, const shaderpass_t *pa
{
vec4_t *fte_restrict map;
unsigned int mno;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), buffer, offset);
map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec4_t), buffer, offset);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -2520,9 +2966,9 @@ static void BE_CreatePipeline(program_t *p, unsigned int shaderflags, unsigned i
#ifdef VK_KHR_fragment_shading_rate
if (vk.khr_fragment_shading_rate)
{
//three ways to specify rates... we need to set which one wins here.
shadingrate.combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR;//pipeline vs primitive
shadingrate.combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR;//previous vs attachment
//three ways to specify rates... we need to set which one wins here. we only do pipeline rates.
shadingrate.combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;//pipeline vs primitive
shadingrate.combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;//previous vs attachment
if (blendflags & SBITS_MISC_FULLRATE)
{
shadingrate.fragmentSize.width = 1;
@ -2622,6 +3068,24 @@ static void BE_SetupUBODescriptor(VkDescriptorSet set, VkWriteDescriptorSet *fir
desc->pBufferInfo = info;
desc->pTexelBufferView = NULL;
}
#ifdef VK_KHR_acceleration_structure
static void BE_SetupAccelerationDescriptor(VkDescriptorSet set, VkWriteDescriptorSet *firstdesc, VkWriteDescriptorSet *desc, VkWriteDescriptorSetAccelerationStructureKHR *descas)
{
desc->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
descas->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR;
desc->pNext = descas;
descas->pNext = NULL;
desc->dstSet = set;
desc->dstBinding = desc-firstdesc;
desc->dstArrayElement = 0;
desc->descriptorCount = descas->accelerationStructureCount = 1;
desc->descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
desc->pImageInfo = NULL;
desc->pBufferInfo = NULL;
desc->pTexelBufferView = NULL;
descas->pAccelerationStructures = &shaderstate.tlas;
}
#endif
static qboolean BE_SetupMeshProgram(program_t *p, shaderpass_t *pass, unsigned int shaderbits, unsigned int idxcount)
{
@ -2652,6 +3116,9 @@ static qboolean BE_SetupMeshProgram(program_t *p, shaderpass_t *pass, unsigned i
{
VkDescriptorSet set = shaderstate.rc.descriptorsets[0] = vk.khr_push_descriptor?VK_NULL_HANDLE:VKBE_TempDescriptorSet(p->desclayout);
VkWriteDescriptorSet descs[MAX_TMUS], *desc = descs;
#ifdef VK_KHR_acceleration_structure
VkWriteDescriptorSetAccelerationStructureKHR descas;
#endif
VkDescriptorImageInfo imgs[MAX_TMUS], *img = imgs;
unsigned int i;
texid_t t;
@ -2660,6 +3127,15 @@ static qboolean BE_SetupMeshProgram(program_t *p, shaderpass_t *pass, unsigned i
//light / scene
BE_SetupUBODescriptor(set, descs, desc++, &shaderstate.ubo_entity);
BE_SetupUBODescriptor(set, descs, desc++, &shaderstate.ubo_light);
#ifdef VK_KHR_acceleration_structure
if (p->rayquery) //an alternative to shadowmaps...
{
shaderstate.needtlas = true;
if (!shaderstate.tlas)
return false; //nope... maybe next frame
BE_SetupAccelerationDescriptor(set, descs, desc++, &descas);
}
#endif
if (p->defaulttextures & (1u<<S_SHADOWMAP))
BE_SetupTextureDescriptor(shaderstate.currentshadowmap, r_whiteimage, set, descs, desc++, img++);
if (p->defaulttextures & (1u<<S_PROJECTIONMAP))
@ -2870,7 +3346,7 @@ static void BE_DrawMeshChain_Internal(void)
m = shaderstate.meshlist[mno];
idxcount += m->numindexes;
}
map = VKBE_AllocateBufferSpace(DB_EBO, idxcount * sizeof(*map), &buf, &offset);
map = VKBE_AllocateStreamingSpace(DB_EBO, idxcount * sizeof(*map), &buf, &offset);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -2895,7 +3371,7 @@ static void BE_DrawMeshChain_Internal(void)
idxcount += m->numindexes;
}
map = VKBE_AllocateBufferSpace(DB_EBO, idxcount * sizeof(*map), &buf, &offset);
map = VKBE_AllocateStreamingSpace(DB_EBO, idxcount * sizeof(*map), &buf, &offset);
for (mno = 0, vertcount = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -2926,7 +3402,7 @@ static void BE_DrawMeshChain_Internal(void)
unsigned int mno;
unsigned int i;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vecV_t), &vertexbuffers[VK_BUFF_POS], &vertexoffsets[VK_BUFF_POS]);
map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vecV_t), &vertexbuffers[VK_BUFF_POS], &vertexoffsets[VK_BUFF_POS]);
if (vblends)
{
@ -3013,7 +3489,7 @@ static void BE_DrawMeshChain_Internal(void)
if (shaderstate.meshlist[0]->normals_array[0])
{
vec4_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_NORM], &vertexoffsets[VK_BUFF_NORM]);
vec4_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_NORM], &vertexoffsets[VK_BUFF_NORM]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -3029,7 +3505,7 @@ static void BE_DrawMeshChain_Internal(void)
if (shaderstate.meshlist[0]->snormals_array[0])
{
vec4_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_SDIR], &vertexoffsets[VK_BUFF_SDIR]);
vec4_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_SDIR], &vertexoffsets[VK_BUFF_SDIR]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -3045,7 +3521,7 @@ static void BE_DrawMeshChain_Internal(void)
if (shaderstate.meshlist[0]->tnormals_array[0])
{
vec4_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_TDIR], &vertexoffsets[VK_BUFF_TDIR]);
vec4_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec3_t), &vertexbuffers[VK_BUFF_TDIR], &vertexoffsets[VK_BUFF_TDIR]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -3061,7 +3537,7 @@ static void BE_DrawMeshChain_Internal(void)
if (shaderstate.meshlist[0]->colors4f_array[0])
{
vec4_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
vec4_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -3071,7 +3547,7 @@ static void BE_DrawMeshChain_Internal(void)
}
else if (shaderstate.meshlist[0]->colors4b_array)
{
vec4_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
vec4_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -3084,7 +3560,7 @@ static void BE_DrawMeshChain_Internal(void)
}
else
{ //FIXME: use some predefined buffer
vec4_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
vec4_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec4_t), &vertexbuffers[VK_BUFF_COL], &vertexoffsets[VK_BUFF_COL]);
for (i = 0; i < vertcount; i++)
{
Vector4Set(map[i], 1, 1, 1, 1);
@ -3093,8 +3569,8 @@ static void BE_DrawMeshChain_Internal(void)
if (shaderstate.meshlist[0]->lmst_array[0])
{
vec2_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
vec2_t *fte_restrict lmmap = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_LMTC], &vertexoffsets[VK_BUFF_LMTC]);
vec2_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
vec2_t *fte_restrict lmmap = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_LMTC], &vertexoffsets[VK_BUFF_LMTC]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -3106,7 +3582,7 @@ static void BE_DrawMeshChain_Internal(void)
}
else
{
vec2_t *fte_restrict map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
vec2_t *fte_restrict map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
for (mno = 0; mno < shaderstate.nummeshes; mno++)
{
m = shaderstate.meshlist[mno];
@ -3155,7 +3631,7 @@ static void BE_DrawMeshChain_Internal(void)
else
{
float *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
BE_GenerateTCMods(p, map);
vertexbuffers[VK_BUFF_LMTC] = vertexbuffers[VK_BUFF_TC];
@ -3195,7 +3671,7 @@ static void BE_DrawMeshChain_Internal(void)
else
{
float *map;
map = VKBE_AllocateBufferSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
map = VKBE_AllocateStreamingSpace(DB_VBO, vertcount * sizeof(vec2_t), &vertexbuffers[VK_BUFF_TC], &vertexoffsets[VK_BUFF_TC]);
BE_GenerateTCMods(p, map);
}
@ -3274,7 +3750,15 @@ qboolean VKBE_GenerateRTLightShader(unsigned int lmode)
{
if (!shaderstate.shader_rtlight[lmode])
{
shaderstate.shader_rtlight[lmode] = R_RegisterShader(va("rtlight%s%s%s",
#ifdef VK_KHR_acceleration_structure
if (lmode & LSHADER_RAYQUERY)
shaderstate.shader_rtlight[lmode] = R_RegisterShader(va("rq_rtlight%s%s",
(lmode & LSHADER_SPOT)?"#SPOT=1":"#SPOT=0",
(lmode & LSHADER_CUBE)?"#CUBE=1":"#CUBE=0")
, SUF_NONE, LIGHTPASS_SHADER_RQ);
else
#endif
shaderstate.shader_rtlight[lmode] = R_RegisterShader(va("rtlight%s%s%s",
(lmode & LSHADER_SMAP)?"#PCF=1":"#PCF=0",
(lmode & LSHADER_SPOT)?"#SPOT=1":"#SPOT=0",
(lmode & LSHADER_CUBE)?"#CUBE=1":"#CUBE=0")
@ -3338,6 +3822,7 @@ void *VKBE_CreateStagingBuffer(struct stagingbuf *n, size_t size, VkBufferUsageF
Sys_Error("Unable to allocate buffer memory");
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &n->mem.memory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)n->mem.memory, "VKBE_CreateStagingBuffer");
VkAssert(vkBindBufferMemory(vk.device, n->buf, n->mem.memory, n->mem.offset));
VkAssert(vkMapMemory(vk.device, n->mem.memory, 0, n->size, 0, &ptr));
@ -3408,6 +3893,7 @@ VkBuffer VKBE_FinishStaging(struct stagingbuf *n, vk_poolmem_t *mem)
mem->memory = VK_NULL_HANDLE;
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &mem->memory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)mem->memory, "VKBE_FinishStaging");
}
VkAssert(vkBindBufferMemory(vk.device, retbuf, mem->memory, mem->offset));
}
@ -3636,7 +4122,7 @@ void VK_UploadLightmap(lightmapinfo_t *lm)
size_t x = lm->rectchange.l, w = lm->rectchange.r - lm->rectchange.l;
size_t y = lm->rectchange.t, h = lm->rectchange.b - lm->rectchange.t, i;
data = VKBE_AllocateBufferSpace(DB_STAGING, w * h * 4, &buf, &bic.bufferOffset);
data = VKBE_AllocateStreamingSpace(DB_STAGING, w * h * 4, &buf, &bic.bufferOffset);
bic.bufferRowLength = w;
bic.bufferImageHeight = h;
bic.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
@ -3776,7 +4262,7 @@ static void VKBE_SetupLightCBuffer(dlight_t *dl, vec3_t colour, vec3_t axis[3])
#ifdef RTLIGHTS
extern cvar_t gl_specular;
#endif
vkcbuf_light_t *cbl = VKBE_AllocateBufferSpace(DB_UBO, (sizeof(*cbl) + 0x0ff) & ~0xff, &shaderstate.ubo_light.buffer, &shaderstate.ubo_light.offset);
vkcbuf_light_t *cbl = VKBE_AllocateStreamingSpace(DB_UBO, (sizeof(*cbl) + 0x0ff) & ~0xff, &shaderstate.ubo_light.buffer, &shaderstate.ubo_light.offset);
shaderstate.ubo_light.range = sizeof(*cbl);
if (!dl)
@ -3843,7 +4329,7 @@ static void BE_RotateForEntity (const entity_t *fte_restrict e, const model_t *f
float modelmatrix[16];
float *m = modelmatrix;
float *proj;
vkcbuf_entity_t *fte_restrict cbe = VKBE_AllocateBufferSpace(DB_UBO, (sizeof(*cbe) + 0x0ff) & ~0xff, &shaderstate.ubo_entity.buffer, &shaderstate.ubo_entity.offset);
vkcbuf_entity_t *fte_restrict cbe = VKBE_AllocateStreamingSpace(DB_UBO, (sizeof(*cbe) + 0x0ff) & ~0xff, &shaderstate.ubo_entity.buffer, &shaderstate.ubo_entity.offset);
shaderstate.ubo_entity.range = sizeof(*cbe);
shaderstate.curentity = e;
@ -5432,11 +5918,11 @@ struct vk_shadowbuffer *VKBE_GenerateShadowBuffer(vecV_t *verts, int numverts, i
struct vk_shadowbuffer *buf = &tempbuf;
void *fte_restrict map;
map = VKBE_AllocateBufferSpace(DB_VBO, sizeof(*verts)*numverts, &buf->vbuffer, &buf->voffset);
map = VKBE_AllocateStreamingSpace(DB_VBO, sizeof(*verts)*numverts, &buf->vbuffer, &buf->voffset);
memcpy(map, verts, sizeof(*verts)*numverts);
buf->numverts = numverts;
map = VKBE_AllocateBufferSpace(DB_EBO, sizeof(*indicies)*numindicies, &buf->ibuffer, &buf->ioffset);
map = VKBE_AllocateStreamingSpace(DB_EBO, sizeof(*indicies)*numindicies, &buf->ibuffer, &buf->ioffset);
memcpy(map, indicies, sizeof(*indicies)*numindicies);
buf->numindicies = numindicies;
return buf;
@ -5575,6 +6061,7 @@ qboolean VKBE_BeginShadowmap(qboolean isspot, uint32_t width, uint32_t height)
if (memAllocInfo.memoryTypeIndex == ~0)
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, 0);
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &shad->memory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)shad->memory, "VKBE_BeginShadowmap");
VkAssert(vkBindImageMemory(vk.device, shad->image, shad->memory, 0));
}

View file

@ -7,8 +7,6 @@
#include "vr.h"
#define VK_API_MAX_VERSION VK_API_VERSION_1_0
extern qboolean vid_isfullscreen;
cvar_t vk_stagingbuffers = CVARFD ("vk_stagingbuffers", "", CVAR_RENDERERLATCH, "Configures which dynamic buffers are copied into gpu memory for rendering, instead of reading from shared memory. Empty for default settings.\nAccepted chars are u(niform), e(lements), v(ertex), 0(none).");
@ -28,6 +26,9 @@ static cvar_t vK_khr_fragment_shading_rate = CVARFD("vK_khr_fragment_shading_ra
#ifdef VK_EXT_astc_decode_mode
static cvar_t vk_ext_astc_decode_mode = CVARFD("vk_ext_astc_decode_mode", "", CVAR_VIDEOLATCH, "Enables reducing texture cache sizes for LDR ASTC-compressed textures.");
#endif
#ifdef VK_KHR_ray_query
static cvar_t vk_khr_ray_query = CVARFD("vk_khr_ray_query", "", CVAR_VIDEOLATCH, "Required for the use of hardware raytraced shadows.");
#endif
extern cvar_t vid_srgb, vid_vsync, vid_triplebuffer, r_stereo_method, vid_multisample, vid_bpp;
texid_t r_blackcubeimage, r_whitecubeimage;
@ -54,6 +55,9 @@ void VK_RegisterVulkanCvars(void)
#ifdef VK_EXT_astc_decode_mode
Cvar_Register (&vk_ext_astc_decode_mode, VKRENDEREROPTIONS);
#endif
#ifdef VK_KHR_ray_query
Cvar_Register (&vk_khr_ray_query, VKRENDEREROPTIONS);
#endif
}
void R2D_Console_Resize(void);
static void VK_DestroySampler(VkSampler s);
@ -65,7 +69,7 @@ extern qboolean scr_con_forcedraw;
#define Sys_UnlockConditional(c)
#endif
const char *vklayerlist[] =
static const char *vklayerlist[] =
{
#if 1
"VK_LAYER_KHRONOS_validation"
@ -264,6 +268,9 @@ char *DebugAnnotObjectToString(VkObjectType t)
#ifdef VK_NV_ray_tracing
case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV: return "VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV";
#endif
#ifdef VK_KHR_acceleration_structure
case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR: return "VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR";
#endif
// case VK_OBJECT_TYPE_RANGE_SIZE:
case VK_OBJECT_TYPE_MAX_ENUM:
break;
@ -659,6 +666,7 @@ static qboolean VK_CreateSwapChain(void)
}
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &memories[i]));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)memories[i], "VK_CreateSwapChain");
VkAssert(vkBindImageMemory(vk.device, images[i], memories[i], 0));
}
}
@ -1341,6 +1349,7 @@ qboolean VK_AllocatePoolMemory(uint32_t pooltype, VkDeviceSize memsize, VkDevice
Z_Free(p);
return false;
}
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)p->memory, "VK_AllocatePoolMemory");
p->next = vk.mempools;
vk.mempools = p;
}
@ -1426,6 +1435,7 @@ qboolean VK_AllocateImageMemory(VkImage image, qboolean dedicated, vk_poolmem_t
err = vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &mem->memory);
if (err != VK_SUCCESS)
return false;
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)mem->memory, "VK_AllocateImageMemory");
return true;
}
}
@ -2099,6 +2109,7 @@ qboolean VK_LoadTextureMips (texid_t tex, const struct pendingtextureinfo *mips)
VK_FencedSubmit(fence);
return false; //some sort of oom error?
}
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)fence->stagingmemory, "VK_LoadTextureMips");
VkAssert(vkBindBufferMemory(vk.device, fence->stagingbuffer, fence->stagingmemory, 0));
VkAssert(vkMapMemory(vk.device, fence->stagingmemory, 0, bci.size, 0, &mapdata));
if (!mapdata)
@ -3247,6 +3258,7 @@ void VKVID_QueueGetRGBData (void (*gotrgbdata) (void *rgbdata, intptr_t bytest
if (memAllocInfo.memoryTypeIndex == ~0u)
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &capt->memory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)capt->memory, "VKVID_QueueGetRGBData");
VkAssert(vkBindBufferMemory(vk.device, capt->buffer, capt->memory, 0));
set_image_layout(vk.rendertarg->cbuf, vk.frame->backbuf->colour.image, VK_IMAGE_ASPECT_COLOR_BIT,
@ -3334,6 +3346,7 @@ char *VKVID_GetRGBInfo (int *bytestride, int *truevidwidth, int *truevidheight
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, 0);
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &tempmemory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)tempmemory, "VKVID_GetRGBInfo staging");
VkAssert(vkBindImageMemory(vk.device, tempimage, tempmemory, 0));
bci.flags = 0;
@ -3344,12 +3357,14 @@ char *VKVID_GetRGBInfo (int *bytestride, int *truevidwidth, int *truevidheight
bci.pQueueFamilyIndices = NULL;
VkAssert(vkCreateBuffer(vk.device, &bci, vkallocationcb, &tempbuffer));
DebugSetName(VK_OBJECT_TYPE_BUFFER, (uint64_t)tempbuffer, "VKVID_GetRGBInfo buffer");
vkGetBufferMemoryRequirements(vk.device, tempbuffer, &mem_reqs);
memAllocInfo.allocationSize = mem_reqs.size;
memAllocInfo.memoryTypeIndex = vk_find_memory_try(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT|VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
if (memAllocInfo.memoryTypeIndex == ~0u)
memAllocInfo.memoryTypeIndex = vk_find_memory_require(mem_reqs.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
VkAssert(vkAllocateMemory(vk.device, &memAllocInfo, vkallocationcb, &tempbufmemory));
DebugSetName(VK_OBJECT_TYPE_DEVICE_MEMORY, (uint64_t)tempbufmemory, "VKVID_GetRGBInfo buffer");
VkAssert(vkBindBufferMemory(vk.device, tempbuffer, tempbufmemory, 0));
@ -3732,8 +3747,6 @@ qboolean VK_SCR_GrabBackBuffer(void)
vkBeginCommandBuffer(vk.rendertarg->cbuf, &begininf);
}
VKBE_RestartFrame();
// VK_DebugFramerate();
// vkCmdWriteTimestamp(vk.frame->cbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, querypool, vk.bufferidx*2+0);
@ -3780,6 +3793,7 @@ qboolean VK_SCR_GrabBackBuffer(void)
imgbarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
vkCmdPipelineBarrier(vk.rendertarg->cbuf, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 1, &imgbarrier);
}
VKBE_RestartFrame();
{
int rp = vk.frame->backbuf->rpassflags;
@ -4649,7 +4663,7 @@ qboolean VK_EnumerateDevices (void *usercontext, void(*callback)(void *context,
app.applicationVersion = revision_number(false);
app.pEngineName = "FTE Quake";
app.engineVersion = VK_MAKE_VERSION(FTE_VER_MAJOR, FTE_VER_MINOR, 0);
app.apiVersion = VK_API_MAX_VERSION;
app.apiVersion = VK_API_VERSION_1_0; //make sure it works...
memset(&inst_info, 0, sizeof(inst_info));
inst_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
@ -4737,6 +4751,13 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
#endif
#ifdef VK_EXT_astc_decode_mode
{&vk.ext_astc_decode_mode, VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, &vk_ext_astc_decode_mode, true, NULL, NULL},
#endif
#ifdef VK_KHR_acceleration_structure
{&vk.khr_deferred_host_operations, VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, &vk_khr_ray_query, true, NULL, NULL}, //dependancy of khr_acceleration_structure
{&vk.khr_acceleration_structure, VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, &vk_khr_ray_query, true, NULL, NULL},
#endif
#ifdef VK_KHR_ray_query
{&vk.khr_ray_query, VK_KHR_RAY_QUERY_EXTENSION_NAME, &vk_khr_ray_query, true, NULL, NULL},
#endif
};
size_t e;
@ -4836,7 +4857,18 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
app.applicationVersion = revision_number(false);
app.pEngineName = "FTE Quake";
app.engineVersion = VK_MAKE_VERSION(FTE_VER_MAJOR, FTE_VER_MINOR, 0);
app.apiVersion = VK_API_MAX_VERSION;
app.apiVersion = VK_API_VERSION_1_0;
if (vkEnumerateInstanceVersion)
{
vkEnumerateInstanceVersion(&app.apiVersion);
#ifdef VK_API_VERSION_1_2
if (app.apiVersion > VK_API_VERSION_1_2)
app.apiVersion = VK_API_VERSION_1_2;
#else
if (app.apiVersion > VK_API_VERSION_1_0)
app.apiVersion = VK_API_VERSION_1_0;
#endif
}
memset(&inst_info, 0, sizeof(inst_info));
inst_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
@ -5044,10 +5076,31 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
{
char *vendor, *type;
VkPhysicalDeviceProperties props;
vkGetPhysicalDeviceProperties(vk.gpu, &props);
#ifdef VK_API_VERSION_1_2
VkPhysicalDeviceVulkan12Properties props12 = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES};
VkPhysicalDeviceProperties2 props = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, &props12};
#else
struct {VkPhysicalDeviceProperties properties;} props;
#endif
vkGetPhysicalDeviceProperties(vk.gpu, &props.properties); //legacy
vk.apiversion = props.properties.apiVersion;
if (vk.apiversion > app.apiVersion)
vk.apiversion = app.apiVersion; //cap it to the instance version...
#ifdef VK_API_VERSION_1_2
if (vk.apiversion >= VK_API_VERSION_1_2)
{
PFN_vkGetPhysicalDeviceProperties2 vkGetPhysicalDeviceProperties2 = (PFN_vkGetPhysicalDeviceProperties2)vkGetInstanceProcAddr(vk.instance, "vkGetPhysicalDeviceProperties2");
if (vkGetPhysicalDeviceProperties2)
vkGetPhysicalDeviceProperties2(vk.gpu, &props);
}
switch(props.vendorID)
if (*props12.driverName)
vendor = props12.driverName;
else
#endif
switch(props.properties.vendorID)
{
//explicit registered vendors
case 0x10001: vendor = "Vivante"; break;
@ -5079,10 +5132,10 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
case 0x11E2: vendor = "Samsung"; break;
case 0x1249: vendor = "Samsung"; break;
default: vendor = va("VEND_%x", props.vendorID); break;
default: vendor = va("VEND_%x", props.properties.vendorID); break;
}
switch(props.deviceType)
switch(props.properties.deviceType)
{
default:
case VK_PHYSICAL_DEVICE_TYPE_OTHER: type = "(other)"; break;
@ -5092,10 +5145,24 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
case VK_PHYSICAL_DEVICE_TYPE_CPU: type = "software"; break;
}
Con_TPrintf("Vulkan %u.%u.%u: GPU%i %s %s %s (%u.%u.%u)\n", VK_VERSION_MAJOR(props.apiVersion), VK_VERSION_MINOR(props.apiVersion), VK_VERSION_PATCH(props.apiVersion),
gpuidx, type, vendor, props.deviceName,
VK_VERSION_MAJOR(props.driverVersion), VK_VERSION_MINOR(props.driverVersion), VK_VERSION_PATCH(props.driverVersion)
);
#ifdef VK_API_VERSION_1_2
if (*props12.driverInfo)
{
Con_TPrintf("Vulkan Driver Name: %s\n"
"Vulkan Device (GPU%i): %s\n"
"Vulkan Driver Info: %s\n",
vendor,
gpuidx, props.properties.deviceName,
props12.driverInfo );
}
else
#endif
{
Con_TPrintf("Vulkan %u.%u.%u: GPU%i %s %s %s (%u.%u.%u)\n", VK_VERSION_MAJOR(props.properties.apiVersion), VK_VERSION_MINOR(props.properties.apiVersion), VK_VERSION_PATCH(props.properties.apiVersion),
gpuidx, type, vendor, props.properties.deviceName,
VK_VERSION_MAJOR(props.properties.driverVersion), VK_VERSION_MINOR(props.properties.driverVersion), VK_VERSION_PATCH(props.properties.driverVersion)
);
}
}
//figure out which of the device's queue's we're going to use
@ -5196,6 +5263,20 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
}
free(ext);
}
#ifdef VK_KHR_ray_query
if ((vk.khr_ray_query && !vk.khr_acceleration_structure) || vk.apiversion < VK_API_VERSION_1_2)
vk.khr_ray_query = false; //doesn't make sense.
#endif
#ifdef VK_KHR_acceleration_structure
if ((vk.khr_acceleration_structure && !vk.khr_ray_query) || vk.apiversion < VK_API_VERSION_1_2)
vk.khr_acceleration_structure = false; //not useful.
#endif
#ifdef VK_KHR_fragment_shading_rate
if (vk.apiversion < VK_API_VERSION_1_2) //too lazy to check its requesite extensions. vk12 is enough.
vk.khr_fragment_shading_rate = false;
#endif
{
const char *devextensions[1+countof(knowndevexts)];
size_t numdevextensions = 0;
@ -5204,6 +5285,19 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
VkDeviceCreateInfo devinf = {VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO};
VkPhysicalDeviceFeatures features;
VkPhysicalDeviceFeatures avail;
void *next = NULL;
#ifdef VK_KHR_fragment_shading_rate
VkPhysicalDeviceFragmentShadingRateFeaturesKHR shadingrate = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR};
#endif
#ifdef VK_KHR_ray_query
VkPhysicalDeviceRayQueryFeaturesKHR rayquery = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR};
#endif
#ifdef VK_KHR_acceleration_structure
VkPhysicalDeviceAccelerationStructureFeaturesKHR accelstruct = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR};
#endif
#ifdef VK_API_VERSION_1_2
VkPhysicalDeviceVulkan12Features vk12features = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES};
#endif
memset(&features, 0, sizeof(features));
vkGetPhysicalDeviceFeatures(vk.gpu, &avail);
@ -5280,6 +5374,47 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
devinf.ppEnabledExtensionNames = devextensions;
devinf.pEnabledFeatures = &features;
#ifdef VK_KHR_fragment_shading_rate
if (vk.khr_fragment_shading_rate)
{
shadingrate.pNext = next;
next = &shadingrate; //now linked
shadingrate.pipelineFragmentShadingRate = true;
shadingrate.primitiveFragmentShadingRate = false;
shadingrate.attachmentFragmentShadingRate = false;
}
#endif
#ifdef VK_KHR_ray_query
if (vk.khr_ray_query)
{
rayquery.pNext = next;
next = &rayquery; //now linked
rayquery.rayQuery = true;
}
#endif
#ifdef VK_KHR_acceleration_structure
if (vk.khr_acceleration_structure)
{
accelstruct.pNext = next;
next = &accelstruct; //now linked
accelstruct.accelerationStructure = true;
accelstruct.accelerationStructureCaptureReplay = false;
accelstruct.accelerationStructureIndirectBuild = false;
accelstruct.accelerationStructureHostCommands = false;
accelstruct.descriptorBindingAccelerationStructureUpdateAfterBind = false;
vk12features.bufferDeviceAddress = true; //we also need this feature.
}
#endif
#ifdef VK_API_VERSION_1_2
if (vk.apiversion >= VK_API_VERSION_1_2)
{
vk12features.pNext = next;
next = &vk12features;
}
#endif
devinf.pNext = next;
#if 0
if (vkEnumeratePhysicalDeviceGroupsKHR && vk_afr.ival)
{
@ -5342,6 +5477,9 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
vkGetDeviceProcAddr = (PFN_vkGetDeviceProcAddr)vkGetInstanceProcAddr(vk.instance, "vkGetDeviceProcAddr");
#define VKFunc(n) vk##n = (PFN_vk##n)vkGetDeviceProcAddr(vk.device, "vk"#n);
VKDevFuncs
#ifdef VK_KHR_acceleration_structure
if (vk.khr_acceleration_structure) { VKAccelStructFuncs }
#endif
#undef VKFunc
#endif
@ -5448,6 +5586,12 @@ qboolean VK_Init(rendererstate_t *info, const char **sysextnames, qboolean (*cre
if (info->srgb > 0 && (vid.flags & VID_SRGB_FB))
vid.flags |= VID_SRGBAWARE;
Q_snprintfz(info->subrenderer, sizeof(info->subrenderer), "GPU%i", gpuidx);
if (!vk.khr_fragment_shading_rate)
Cvar_LockUnsupportedRendererCvar(&r_halfrate, "0");
if (!vk.khr_ray_query)
Cvar_LockUnsupportedRendererCvar(&r_shadow_raytrace, "0");
return true;
}
void VK_Shutdown(void)

View file

@ -62,6 +62,7 @@
#define VKInstFuncs \
VKFunc(EnumerateInstanceLayerProperties) \
VKFunc(EnumerateInstanceExtensionProperties) \
VKFunc(EnumerateInstanceVersion) \
VKFunc(CreateInstance)
//funcs specific to an instance
@ -175,11 +176,25 @@
VKFunc(CreateImageView) \
VKFunc(DestroyImageView)
//funcs for ray query's acceleration structures
#ifdef VK_KHR_acceleration_structure
#define VKAccelStructFuncs \
VKFunc(GetBufferDeviceAddress)/*1.2*/ \
VKFunc(GetAccelerationStructureBuildSizesKHR) \
VKFunc(CreateAccelerationStructureKHR) \
VKFunc(GetAccelerationStructureDeviceAddressKHR) \
VKFunc(DestroyAccelerationStructureKHR) \
VKFunc(CmdBuildAccelerationStructuresKHR)
#else
#define VKAccelStructFuncs
#endif
//all vulkan funcs
#define VKFuncs \
VKInstFuncs \
VKInst2Funcs \
VKDevFuncs \
VKAccelStructFuncs \
VKFunc(GetInstanceProcAddr)\
VKFunc(GetDeviceProcAddr)
@ -233,6 +248,12 @@ enum dynbuf_e
DB_EBO,
DB_UBO,
DB_STAGING,
#ifdef VK_KHR_acceleration_structure
DB_ACCELERATIONSTRUCT,
DB_ACCELERATIONSCRATCH,
DB_ACCELERATIONMESHDATA,
DB_ACCELERATIONINSTANCE,
#endif
DB_MAX
};
struct vk_rendertarg
@ -280,14 +301,24 @@ extern struct vulkaninfo_s
qboolean khr_dedicated_allocation; //standardised version of the above where the driver decides whether a resource is worth a dedicated allocation.
qboolean khr_push_descriptor; //more efficient descriptor streaming
qboolean amd_rasterization_order; //allows primitives to draw in any order
#ifdef VK_EXT_astc_decode_mode
qboolean ext_astc_decode_mode; //small perf boost
#endif
#ifdef VK_KHR_fragment_shading_rate
qboolean khr_fragment_shading_rate; //small perf boost. probably more useful for battery.
#endif
#ifdef VK_KHR_ray_query
qboolean khr_ray_query;
#endif
#ifdef VK_KHR_acceleration_structure
qboolean khr_acceleration_structure;
qboolean khr_deferred_host_operations; //need to enable it, we don't make use of it though.
#endif
VkInstance instance;
VkDevice device;
VkPhysicalDevice gpu;
uint32_t apiversion; //the device api, capped by instance version, capped by our own version... sigh
VkSurfaceKHR surface;
uint32_t queuefam[VQ_COUNT];
uint32_t queuenum[VQ_COUNT];

View file

@ -11,7 +11,6 @@ class options_effects : mitem_exmenu
{
if (key == "r_replacemodels") //convert from boolean to some arbitrary list.
newval = stof(newval)?"iqm md5mesh md3":"";
super::set(key, newval);
};
};
@ -83,6 +82,8 @@ nonstatic void(mitem_desktop desktop) M_Options_Effects =
fr.add(menuitemcheck_spawn(_("Relief Mapping"), "r_glsl_offsetmapping", '280 8'), fl, [0, pos], [0, 8]); pos += 8;
fr.add(menuitemcheck_spawn(_("RT Dynamic Lights"), "r_shadow_realtime_dlight", '280 8'), fl, [0, pos], [0, 8]); pos += 8;
fr.add(menuitemcheck_spawn(_("RT World Lighting"), "r_shadow_realtime_world", '280 8'), fl, [0, pos], [0, 8]); pos += 8;
fr.add(menuitemcheck_spawn(_("Raytraced Shadows"), "r_shadow_raytrace", '280 8'), fl, [0, pos], [0, 8]); pos += 8;
fr.add(menuitemcheck_spawn(_("Half-Rate Shading"), "r_halfrate", '280 8'), fl, [0, pos], [0, 8]); pos += 8;
fr.add(menuitemcombo_spawn(_("Water Effects"), "r_waterstyle", '280 8', _(
"1 \"Classic\" "

View file

@ -267,7 +267,10 @@ float(string key) mitem::isvalid =
if (this.item_parent)
return this.item_parent.isvalid(key);
else //no parent, just assume its a cvar.
return cvar_type(key);
{
float t = cvar_type(key);
return (t & CVAR_TYPEFLAG_EXISTS) && !(t & CVAR_TYPEFLAG_READONLY);
}
};