From fe91066f4673f7a3ee87235f08b72db4910eb01c Mon Sep 17 00:00:00 2001 From: Wollnashorn <Wollnashorn@users.noreply.github.com> Date: Wed, 5 Apr 2023 03:02:24 +0200 Subject: [PATCH] video_core: Enable ImageGather with subpixel offset on Intel --- src/shader_recompiler/profile.h | 6 +++--- src/video_core/renderer_opengl/gl_device.cpp | 1 - src/video_core/renderer_opengl/gl_device.h | 9 ++++----- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 4 +++- src/video_core/vulkan_common/vulkan_device.cpp | 1 - src/video_core/vulkan_common/vulkan_device.h | 5 ----- 7 files changed, 11 insertions(+), 17 deletions(-) diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 31390e869b..9f88fb4407 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -52,9 +52,9 @@ struct Profile { bool need_declared_frag_colors{}; /// Prevents fast math optimizations that may cause inaccuracies bool need_fastmath_off{}; - /// Some GPU vendors use a lower fixed point format of 16.8 when calculating pixel coordinates - /// in the ImageGather instruction than the Maxwell architecture does. Applying an offset does - /// fix this mismatching rounding behaviour. + /// Some GPU vendors use a different rounding precision when calculating texture pixel + /// coordinates with the 16.8 format in the ImageGather instruction than the Maxwell + /// architecture. Applying an offset does fix this mismatching rounding behaviour. bool need_gather_subpixel_offset{}; /// OpFClamp is broken and OpFMax + OpFMin should be used instead diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index d36a0a7a1d..22ed16ebfb 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -169,7 +169,6 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { has_draw_texture = GLAD_GL_NV_draw_texture; warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; need_fastmath_off = is_nvidia; - need_gather_subpixel_offset = is_amd; can_report_memory = GLAD_GL_NVX_gpu_memory_info; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index e8104c4def..cc0b95f1a5 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -160,10 +160,6 @@ public: return need_fastmath_off; } - bool NeedsGatherSubpixelOffset() const { - return need_gather_subpixel_offset; - } - bool HasCbufFtouBug() const { return has_cbuf_ftou_bug; } @@ -180,6 +176,10 @@ public: return vendor_name == "ATI Technologies Inc."; } + bool IsIntel() const { + return vendor_name == "Intel"; + } + bool CanReportMemoryUsage() const { return can_report_memory; } @@ -229,7 +229,6 @@ private: bool has_draw_texture{}; bool warp_size_potentially_larger_than_guest{}; bool need_fastmath_off{}; - bool need_gather_subpixel_offset{}; bool has_cbuf_ftou_bug{}; bool has_bool_ref_bug{}; bool can_report_memory{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index b40aa6f5e6..6ecda29842 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -218,7 +218,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo .lower_left_origin_mode = true, .need_declared_frag_colors = true, .need_fastmath_off = device.NeedsFastmathOff(), - .need_gather_subpixel_offset = device.NeedsGatherSubpixelOffset(), + .need_gather_subpixel_offset = device.IsAmd() || device.IsIntel(), .has_broken_spirv_clamp = true, .has_broken_unsigned_image_offsets = true, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f512572673..8963b6a666 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -329,7 +329,9 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .lower_left_origin_mode = false, .need_declared_frag_colors = false, - .need_gather_subpixel_offset = device.NeedsGatherSubpixelOffset(), + .need_gather_subpixel_offset = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY || + driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || + driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA, .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS, .has_broken_spirv_position_input = driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0939b62c92..6f288b3f8a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -431,7 +431,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR "AMD GCN4 and earlier have broken VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"); has_broken_cube_compatibility = true; } - need_gather_subpixel_offset = true; } if (extensions.sampler_filter_minmax && is_amd) { // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 50e95bccab..41b5da18ac 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -554,10 +554,6 @@ public: return features.robustness2.nullDescriptor; } - bool NeedsGatherSubpixelOffset() const { - return need_gather_subpixel_offset; - } - u32 GetMaxVertexInputAttributes() const { return properties.properties.limits.maxVertexInputAttributes; } @@ -668,7 +664,6 @@ private: bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3. bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3. - bool need_gather_subpixel_offset{}; ///< Needs offset at ImageGather for correct rounding. u64 device_access_memory{}; ///< Total size of device local memory in bytes. u32 sets_per_pool{}; ///< Sets per Description Pool