1
0
Fork 0
forked from fte/fteqw

Attempt to fix VK_NOT_READY issue on amd vulkan drivers, as well as xlib errors also on amd.

git-svn-id: https://svn.code.sf.net/p/fteqw/code/trunk@5710 fc73d0e0-1445-4013-8a0c-d673dee63da5
This commit is contained in:
Spoike 2020-06-27 19:31:55 +00:00
parent 08f8cb2d35
commit 571d16b14f
5 changed files with 112 additions and 56 deletions

View file

@ -175,6 +175,7 @@ static struct
int (*pXGetWindowProperty)(Display *display, Window w, Atom property, long long_offset, long long_length, Bool delete, Atom req_type, Atom *actual_type_return, int *actual_format_return, unsigned long *nitems_return, unsigned long *bytes_after_return, unsigned char **prop_return);
int (*pXGrabKeyboard)(Display *display, Window grab_window, Bool owner_events, int pointer_mode, int keyboard_mode, Time time);
int (*pXGrabPointer)(Display *display, Window grab_window, Bool owner_events, unsigned int event_mask, int pointer_mode, int keyboard_mode, Window confine_to, Cursor cursor, Time time);
Status (*pXInitThreads)(void);
Atom (*pXInternAtom)(Display *display, char *atom_name, Bool only_if_exists);
KeySym (*pXLookupKeysym)(XKeyEvent *key_event, int index);
int (*pXLookupString)(XKeyEvent *event_struct, char *buffer_return, int bytes_buffer, KeySym *keysym_return, XComposeStatus *status_in_out);
@ -277,6 +278,7 @@ static qboolean x11_initlib(void)
{(void**)&x11.pXGetWindowProperty, "XGetWindowProperty"},
{(void**)&x11.pXGrabKeyboard, "XGrabKeyboard"},
{(void**)&x11.pXGrabPointer, "XGrabPointer"},
{(void**)&x11.pXInitThreads, "XInitThreads"},
{(void**)&x11.pXInternAtom, "XInternAtom"},
{(void**)&x11.pXLookupKeysym, "XLookupKeysym"},
{(void**)&x11.pXLookupString, "XLookupString"},
@ -2575,28 +2577,20 @@ static void X_KeyEvent(XKeyEvent *ev, qboolean pressed, qboolean filtered)
case XK_Tab: key = K_TAB; break;
case XK_F1: key = K_F1; break;
case XK_F2: key = K_F2; break;
case XK_F3: key = K_F3; break;
case XK_F4: key = K_F4; break;
case XK_F5: key = K_F5; break;
case XK_F6: key = K_F6; break;
case XK_F7: key = K_F7; break;
case XK_F8: key = K_F8; break;
case XK_F9: key = K_F9; break;
case XK_F10: key = K_F10; break;
case XK_F11: key = K_F11; break;
case XK_F12: key = K_F12; break;
case XK_F13: key = K_F13; break;
case XK_F14: key = K_F14; break;
case XK_F15: key = K_F15; break;
case XK_BackSpace: key = K_BACKSPACE; break;
@ -4093,6 +4087,9 @@ static qboolean X11VID_Init (rendererstate_t *info, unsigned char *palette, int
return false;
}
}
//"Some implementations may require threads to implement some presentation modes so applications must call XInitThreads() before calling any other Xlib functions."
x11.pXInitThreads();
}
break;
#endif

View file

@ -898,9 +898,9 @@ static void Win32NVVK_DoPresent(struct vkframe *theframe)
0, 1, 1, 0); //stst (remember that gl textures are meant to be upside down)
//and tell our code to expect it.
vk.acquirebufferidx[vk.aquirelast%ACQUIRELIMIT] = vk.aquirelast%vk.backbuf_count;
fence = vk.acquirefences[vk.aquirelast%ACQUIRELIMIT];
vk.aquirelast++;
vk.acquirebufferidx[vk.acquirelast%ACQUIRELIMIT] = vk.acquirelast%vk.backbuf_count;
fence = vk.acquirefences[vk.acquirelast%ACQUIRELIMIT];
vk.acquirelast++;
//and actually signal it, so our code can wake up.
qglSignalVkFenceNV((GLuint64)fence);

View file

@ -4790,7 +4790,10 @@ void VKBE_RT_Gen(struct vk_rendertarg *targ, vk_image_t *colour, uint32_t width,
if (targ->externalimage)
targ->colour.image = colour->image;
else
{
VkAssert(vkCreateImage(vk.device, &colour_imginfo, vkallocationcb, &targ->colour.image));
DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)targ->colour.image, "RT Colour");
}
depth_imginfo = colour_imginfo;
depth_imginfo.format = vk.depthformat;
@ -4800,9 +4803,11 @@ void VKBE_RT_Gen(struct vk_rendertarg *targ, vk_image_t *colour, uint32_t width,
mscolour_imginfo = colour_imginfo;
depth_imginfo.samples = mscolour_imginfo.samples = vk.multisamplebits;
VkAssert(vkCreateImage(vk.device, &mscolour_imginfo, vkallocationcb, &targ->mscolour.image));
DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)targ->mscolour.image, "RT MS Colour");
VK_AllocateBindImageMemory(&targ->mscolour, true);
}
VkAssert(vkCreateImage(vk.device, &depth_imginfo, vkallocationcb, &targ->depth.image));
DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)targ->depth.image, "RT Depth");
if (targ->externalimage) //an external image is assumed to already have memory bound. don't allocate it elsewhere.
memset(&targ->colour.mem, 0, sizeof(targ->colour.mem));
@ -4980,11 +4985,13 @@ void VKBE_RT_Gen_Cube(struct vk_rendertarg_cube *targ, uint32_t size, qboolean c
colour_imginfo.pQueueFamilyIndices = NULL;
colour_imginfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkAssert(vkCreateImage(vk.device, &colour_imginfo, vkallocationcb, &targ->colour.image));
DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)targ->colour.image, "RT Cube Colour");
depth_imginfo = colour_imginfo;
depth_imginfo.format = vk.depthformat;
depth_imginfo.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT|VK_IMAGE_USAGE_SAMPLED_BIT;
VkAssert(vkCreateImage(vk.device, &depth_imginfo, vkallocationcb, &targ->depth.image));
DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)targ->depth.image, "RT Cube Depth");
VK_AllocateBindImageMemory(&targ->colour, true);
VK_AllocateBindImageMemory(&targ->depth, true);
@ -6062,6 +6069,7 @@ qboolean VKBE_BeginShadowmap(qboolean isspot, uint32_t width, uint32_t height)
imginfo.pQueueFamilyIndices = NULL;
imginfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkAssert(vkCreateImage(vk.device, &imginfo, vkallocationcb, &shad->image));
DebugSetName(VK_OBJECT_TYPE_IMAGE, (uint64_t)shad->image, "Shadowmap");
{
VkMemoryRequirements mem_reqs;

View file

@ -59,6 +59,8 @@ extern qboolean scr_con_forcedraw;
const char *vklayerlist[] =
{
#if 1
"VK_LAYER_KHRONOS_validation"
#elif 1
"VK_LAYER_LUNARG_standard_validation"
#else
//older versions of the sdk were crashing out on me,
@ -185,7 +187,7 @@ char *VK_VKErrorToString(VkResult err)
//irrelevant parts of the enum
case VK_RESULT_RANGE_SIZE:
case VK_RESULT_MAX_ENUM:
//default:
default:
break;
}
return va("%d", (int)err);
@ -244,8 +246,10 @@ char *DebugAnnotObjectToString(VkObjectType t)
case VK_OBJECT_TYPE_DISPLAY_KHR: return "VK_OBJECT_TYPE_DISPLAY_KHR";
case VK_OBJECT_TYPE_DISPLAY_MODE_KHR: return "VK_OBJECT_TYPE_DISPLAY_MODE_KHR";
case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT: return "VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT";
#ifdef VK_NVX_device_generated_commands
case VK_OBJECT_TYPE_OBJECT_TABLE_NVX: return "VK_OBJECT_TYPE_OBJECT_TABLE_NVX";
case VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX: return "VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX";
#endif
case VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT: return "VK_OBJECT_TYPE_DEBUG_UTILS_MESSENGER_EXT";
case VK_OBJECT_TYPE_VALIDATION_CACHE_EXT: return "VK_OBJECT_TYPE_VALIDATION_CACHE_EXT";
#ifdef VK_NV_ray_tracing
@ -254,6 +258,8 @@ char *DebugAnnotObjectToString(VkObjectType t)
case VK_OBJECT_TYPE_RANGE_SIZE:
case VK_OBJECT_TYPE_MAX_ENUM:
break;
default:
break;
}
return "UNKNOWNTYPE";
}
@ -474,16 +480,19 @@ static void VK_DestroySwapChain(void)
vk.backbufs[i].colour.view = VK_NULL_HANDLE;
VK_DestroyVkTexture(&vk.backbufs[i].depth);
VK_DestroyVkTexture(&vk.backbufs[i].mscolour);
vkDestroySemaphore(vk.device, vk.backbufs[i].presentsemaphore, vkallocationcb);
}
if (vk.dopresent)
vk.dopresent(NULL);
while (vk.aquirenext < vk.aquirelast)
//clean up our acquires so we know the driver isn't going to update anything.
while (vk.acquirenext < vk.acquirelast)
{
if (vk.acquirefences[vk.aquirenext%ACQUIRELIMIT])
VkWarnAssert(vkWaitForFences(vk.device, 1, &vk.acquirefences[vk.aquirenext%ACQUIRELIMIT], VK_FALSE, UINT64_MAX));
vk.aquirenext++;
if (vk.acquirefences[vk.acquirenext%ACQUIRELIMIT])
VkWarnAssert(vkWaitForFences(vk.device, 1, &vk.acquirefences[vk.acquirenext%ACQUIRELIMIT], VK_FALSE, UINT64_MAX));
vk.acquirenext++;
}
//wait for it to all finish.
if (vk.device)
vkDeviceWaitIdle(vk.device);
for (i = 0; i < ACQUIRELIMIT; i++)
@ -568,7 +577,7 @@ static qboolean VK_CreateSwapChain(void)
memories = malloc(sizeof(VkDeviceMemory)*vk.backbuf_count);
memset(memories, 0, sizeof(VkDeviceMemory)*vk.backbuf_count);
vk.aquirelast = vk.aquirenext = 0;
vk.acquirelast = vk.acquirenext = 0;
for (i = 0; i < ACQUIRELIMIT; i++)
{
if (1)
@ -582,11 +591,12 @@ static qboolean VK_CreateSwapChain(void)
{
VkSemaphoreCreateInfo sci = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
VkAssert(vkCreateSemaphore(vk.device, &sci, vkallocationcb, &vk.acquiresemaphores[i]));
DebugSetName(VK_OBJECT_TYPE_SEMAPHORE, (uint64_t)vk.acquiresemaphores[i], "vk.acquiresemaphores");
vk.acquirefences[i] = VK_NULL_HANDLE;
}
vk.acquirebufferidx[vk.aquirelast%ACQUIRELIMIT] = vk.aquirelast%vk.backbuf_count;
vk.aquirelast++;
vk.acquirebufferidx[vk.acquirelast%ACQUIRELIMIT] = vk.acquirelast%vk.backbuf_count;
vk.acquirelast++;
}
for (i = 0; i < vk.backbuf_count; i++)
@ -880,7 +890,7 @@ static qboolean VK_CreateSwapChain(void)
memories = NULL;
VkAssert(vkGetSwapchainImagesKHR(vk.device, vk.swapchain, &vk.backbuf_count, images));
vk.aquirelast = vk.aquirenext = 0;
vk.acquirelast = vk.acquirenext = 0;
for (i = 0; i < ACQUIRELIMIT; i++)
{
if (vk_waitfence.ival || !*vk_waitfence.string)
@ -893,18 +903,19 @@ static qboolean VK_CreateSwapChain(void)
{
VkSemaphoreCreateInfo sci = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
VkAssert(vkCreateSemaphore(vk.device, &sci, vkallocationcb, &vk.acquiresemaphores[i]));
DebugSetName(VK_OBJECT_TYPE_SEMAPHORE, (uint64_t)vk.acquiresemaphores[i], "vk.acquiresemaphores");
vk.acquirefences[i] = VK_NULL_HANDLE;
}
}
if (!vk_submissionthread.value && *vk_submissionthread.string)
preaquirecount = 1;
preaquirecount = 1; //no real point asking for more.
else
preaquirecount = vk.backbuf_count;
/*-1 to hide any weird thread issues*/
while (vk.aquirelast < ACQUIRELIMIT-1 && vk.aquirelast < preaquirecount && vk.aquirelast <= vk.backbuf_count-surfcaps.minImageCount)
while (vk.acquirelast < ACQUIRELIMIT-1 && vk.acquirelast < preaquirecount && vk.acquirelast <= vk.backbuf_count-surfcaps.minImageCount)
{
VkAssert(vkAcquireNextImageKHR(vk.device, vk.swapchain, UINT64_MAX, vk.acquiresemaphores[vk.aquirelast%ACQUIRELIMIT], vk.acquirefences[vk.aquirelast%ACQUIRELIMIT], &vk.acquirebufferidx[vk.aquirelast%ACQUIRELIMIT]));
vk.aquirelast++;
VkAssert(vkAcquireNextImageKHR(vk.device, vk.swapchain, UINT64_MAX, vk.acquiresemaphores[vk.acquirelast%ACQUIRELIMIT], vk.acquirefences[vk.acquirelast%ACQUIRELIMIT], &vk.acquirebufferidx[vk.acquirelast%ACQUIRELIMIT]));
vk.acquirelast++;
}
}
@ -1087,6 +1098,7 @@ static qboolean VK_CreateSwapChain(void)
{
VkSemaphoreCreateInfo seminfo = {VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO};
VkAssert(vkCreateSemaphore(vk.device, &seminfo, vkallocationcb, &vk.backbufs[i].presentsemaphore));
DebugSetName(VK_OBJECT_TYPE_SEMAPHORE, (uint64_t)vk.backbufs[i].presentsemaphore, "vk.backbufs.presentsemaphore");
}
}
free(images);
@ -1608,20 +1620,20 @@ vk_image_t VK_CreateTexture2DArray(uint32_t width, uint32_t height, uint32_t lay
break;
#ifdef VK_EXT_astc_decode_mode
case PTI_ASTC_4X4: //set these to use rgba8 decoding, because we know they're not hdr and the format is basically 8bit anyway.
case PTI_ASTC_5X4: //we do NOT do this for the hdr, as that would cause data loss.
case PTI_ASTC_5X5: //we do NOT do this for sRGB because its pointless.
case PTI_ASTC_6X5:
case PTI_ASTC_6X6:
case PTI_ASTC_8X5:
case PTI_ASTC_8X6:
case PTI_ASTC_8X8:
case PTI_ASTC_10X5:
case PTI_ASTC_10X6:
case PTI_ASTC_10X8:
case PTI_ASTC_10X10:
case PTI_ASTC_12X10:
case PTI_ASTC_12X12:
case PTI_ASTC_4X4_LDR: //set these to use rgba8 decoding, because we know they're not hdr and the format is basically 8bit anyway.
case PTI_ASTC_5X4_LDR: //we do NOT do this for the hdr, as that would cause data loss.
case PTI_ASTC_5X5_LDR: //we do NOT do this for sRGB because its pointless.
case PTI_ASTC_6X5_LDR:
case PTI_ASTC_6X6_LDR:
case PTI_ASTC_8X5_LDR:
case PTI_ASTC_8X6_LDR:
case PTI_ASTC_8X8_LDR:
case PTI_ASTC_10X5_LDR:
case PTI_ASTC_10X6_LDR:
case PTI_ASTC_10X8_LDR:
case PTI_ASTC_10X10_LDR:
case PTI_ASTC_12X10_LDR:
case PTI_ASTC_12X12_LDR:
viewInfo.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
viewInfo.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
viewInfo.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
@ -3421,7 +3433,7 @@ qboolean VK_SCR_GrabBackBuffer(void)
vk.unusedframes = newframe;
}
while (vk.aquirenext == vk.aquirelast)
while (vk.acquirenext == vk.acquirelast)
{ //we're still waiting for the render thread to increment acquirelast.
//shouldn't really happen, but can if the gpu is slow.
if (vk.neednewswapchain)
@ -3447,14 +3459,14 @@ qboolean VK_SCR_GrabBackBuffer(void)
#endif
}
if (vk.acquirefences[vk.aquirenext%ACQUIRELIMIT] != VK_NULL_HANDLE)
if (vk.acquirefences[vk.acquirenext%ACQUIRELIMIT] != VK_NULL_HANDLE)
{
//wait for the queued acquire to actually finish
if (vk_busywait.ival)
{ //busy wait, to try to get the highest fps possible
for (;;)
{
switch(vkGetFenceStatus(vk.device, vk.acquirefences[vk.aquirenext%ACQUIRELIMIT]))
switch(vkGetFenceStatus(vk.device, vk.acquirefences[vk.acquirenext%ACQUIRELIMIT]))
{
case VK_SUCCESS:
break; //hurrah
@ -3476,7 +3488,7 @@ qboolean VK_SCR_GrabBackBuffer(void)
int failures = 0;
for(;;)
{
VkResult err = vkWaitForFences(vk.device, 1, &vk.acquirefences[vk.aquirenext%ACQUIRELIMIT], VK_FALSE, 1000000000);
VkResult err = vkWaitForFences(vk.device, 1, &vk.acquirefences[vk.acquirenext%ACQUIRELIMIT], VK_FALSE, 1000000000);
if (err == VK_SUCCESS)
break;
@ -3493,12 +3505,12 @@ qboolean VK_SCR_GrabBackBuffer(void)
return false;
}
}
VkAssert(vkResetFences(vk.device, 1, &vk.acquirefences[vk.aquirenext%ACQUIRELIMIT]));
VkAssert(vkResetFences(vk.device, 1, &vk.acquirefences[vk.acquirenext%ACQUIRELIMIT]));
}
vk.bufferidx = vk.acquirebufferidx[vk.aquirenext%ACQUIRELIMIT];
vk.bufferidx = vk.acquirebufferidx[vk.acquirenext%ACQUIRELIMIT];
sem = vk.acquiresemaphores[vk.aquirenext%ACQUIRELIMIT];
vk.aquirenext++;
sem = vk.acquiresemaphores[vk.acquirenext%ACQUIRELIMIT];
vk.acquirenext++;
//grab the first unused
Sys_LockConditional(vk.submitcondition);
@ -3975,6 +3987,8 @@ VkRenderPass VK_GetRenderPass(int pass)
attachments[depth_reference.attachment].initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
// attachments[color_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachments[depth_reference.attachment].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
attachments[depth_reference.attachment].finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
}
VkAssert(vkCreateRenderPass(vk.device, &rp_info, vkallocationcb, &vk.renderpass[pass]));
@ -4015,14 +4029,49 @@ void VK_DoPresent(struct vkframe *theframe)
}
else
{
err = vkAcquireNextImageKHR(vk.device, vk.swapchain, 0, vk.acquiresemaphores[vk.aquirelast%ACQUIRELIMIT], vk.acquirefences[vk.aquirelast%ACQUIRELIMIT], &vk.acquirebufferidx[vk.aquirelast%ACQUIRELIMIT]);
if (err)
int r = vk.acquirelast%ACQUIRELIMIT;
uint64_t timeout = (vk.acquirelast==vk.acquirenext)?UINT64_MAX:0; //
err = vkAcquireNextImageKHR(vk.device, vk.swapchain, timeout, vk.acquiresemaphores[r], vk.acquirefences[r], &vk.acquirebufferidx[r]);
switch(err)
{
case VK_SUBOPTIMAL_KHR: //success, but with a warning.
vk.neednewswapchain = true;
vk.acquirelast++;
break;
case VK_SUCCESS: //success
vk.acquirelast++;
break;
//we gave the presentation engine an image, but its refusing to give us one back.
//logically this means the implementation lied about its VkSurfaceCapabilitiesKHR::minImageCount
case VK_TIMEOUT: //'success', yet still no result
case VK_NOT_READY:
//no idea how to handle. let it slip?
if (vk.acquirelast == vk.acquirenext)
vk.neednewswapchain = true; //slipped too much
break;
case VK_ERROR_OUT_OF_DATE_KHR:
//unable to present, but we at least don't need to throw everything away.
vk.neednewswapchain = true;
break;
case VK_ERROR_DEVICE_LOST:
case VK_ERROR_OUT_OF_HOST_MEMORY:
case VK_ERROR_OUT_OF_DEVICE_MEMORY:
case VK_ERROR_SURFACE_LOST_KHR:
//something really bad happened.
Con_Printf("ERROR: vkAcquireNextImageKHR: %s\n", VK_VKErrorToString(err));
vk.neednewswapchain = true;
vk.devicelost |= (err == VK_ERROR_DEVICE_LOST);
vk.devicelost = true;
break;
default:
//case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
//we don't know why we're getting this. vendor problem.
Con_Printf("ERROR: vkAcquireNextImageKHR: undocumented/extended %s\n", VK_VKErrorToString(err));
vk.neednewswapchain = true;
vk.devicelost = true; //this might be an infinite loop... no idea how to handle it.
break;
}
vk.aquirelast++;
}
RSpeedEnd(RSPEED_ACQUIRE);
}

View file

@ -295,12 +295,14 @@ extern struct vulkaninfo_s
VkCommandPool cmdpool;
VkPhysicalDeviceLimits limits;
#define ACQUIRELIMIT 8 //don't run more than this many frames behind
//we have a ringbuffer for acquires
#define ACQUIRELIMIT 8
VkSemaphore acquiresemaphores[ACQUIRELIMIT];
VkFence acquirefences[ACQUIRELIMIT];
uint32_t acquirebufferidx[ACQUIRELIMIT];
unsigned int aquirenext;
volatile unsigned int aquirelast; //set inside the submission thread
unsigned int acquirenext; //first usable buffer, but we still need to wait on its fence (accessed on main thread).
volatile unsigned int acquirelast; //last buffer that we have successfully asked to aquire (set inside the submission thread).
//acquirenext <= acquirelast, acquirelast-acquirenext<=ACQUIRELIMIT
VkPipelineCache pipelinecache;