gpu_hw.cpp (162574B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "gpu_hw.h" 5 #include "cpu_core.h" 6 #include "cpu_pgxp.h" 7 #include "gpu_hw_shadergen.h" 8 #include "gpu_sw_backend.h" 9 #include "host.h" 10 #include "settings.h" 11 #include "system.h" 12 13 #include "util/imgui_manager.h" 14 #include "util/postprocessing.h" 15 #include "util/state_wrapper.h" 16 17 #include "common/align.h" 18 #include "common/assert.h" 19 #include "common/error.h" 20 #include "common/gsvector_formatter.h" 21 #include "common/log.h" 22 #include "common/scoped_guard.h" 23 #include "common/string_util.h" 24 #include "common/timer.h" 25 26 #include "IconsFontAwesome5.h" 27 #include "IconsEmoji.h" 28 #include "imgui.h" 29 30 #include <cmath> 31 #include <limits> 32 #include <sstream> 33 #include <tuple> 34 35 Log_SetChannel(GPU_HW); 36 37 // TODO: instead of full state restore, only restore what changed 38 39 static constexpr GPUTexture::Format VRAM_RT_FORMAT = GPUTexture::Format::RGBA8; 40 static constexpr GPUTexture::Format VRAM_DS_FORMAT = GPUTexture::Format::D16; 41 static constexpr GPUTexture::Format VRAM_DS_DEPTH_FORMAT = GPUTexture::Format::D32F; 42 static constexpr GPUTexture::Format VRAM_DS_COLOR_FORMAT = GPUTexture::Format::R32F; 43 44 #ifdef _DEBUG 45 46 static u32 s_draw_number = 0; 47 48 static constexpr const std::array s_transparency_modes = { 49 "HalfBackgroundPlusHalfForeground", 50 "BackgroundPlusForeground", 51 "BackgroundMinusForeground", 52 "BackgroundPlusQuarterForeground", 53 "Disabled", 54 }; 55 56 static constexpr const std::array s_batch_texture_modes = { 57 "Palette4Bit", "Palette8Bit", "Direct16Bit", "Disabled", 58 "SpritePalette4Bit", "SpritePalette8Bit", "SpriteDirect16Bit", 59 }; 60 61 static constexpr const std::array s_batch_render_modes = { 62 "TransparencyDisabled", "TransparentAndOpaque", "OnlyOpaque", "OnlyTransparent", "ShaderBlend", 63 }; 64 65 #endif 66 67 /// Returns the distance between two rectangles. 68 ALWAYS_INLINE static float RectDistance(const GSVector4i lhs, const GSVector4i rhs) 69 { 70 const s32 lcx = (lhs.left + ((lhs.right - lhs.left) / 2)); 71 const s32 lcy = (lhs.top + ((lhs.bottom - lhs.top) / 2)); 72 const s32 rcx = (rhs.left + ((rhs.right - rhs.left) / 2)); 73 const s32 rcy = (rhs.top + ((rhs.bottom - rhs.top) / 2)); 74 const s32 dx = (lcx - rcx); 75 const s32 dy = (lcy - rcy); 76 const s32 distsq = (dx * dx) + (dy * dy); 77 return std::sqrt(static_cast<float>(distsq)); 78 } 79 80 ALWAYS_INLINE static u32 GetMaxResolutionScale() 81 { 82 return g_gpu_device->GetMaxTextureSize() / VRAM_WIDTH; 83 } 84 85 ALWAYS_INLINE_RELEASE static u32 GetBoxDownsampleScale(u32 resolution_scale) 86 { 87 u32 scale = std::min<u32>(resolution_scale, g_settings.gpu_downsample_scale); 88 while ((resolution_scale % scale) != 0) 89 scale--; 90 return scale; 91 } 92 93 ALWAYS_INLINE static bool ShouldClampUVs(GPUTextureFilter texture_filter) 94 { 95 // We only need UV limits if PGXP is enabled, or texture filtering is enabled. 96 return g_settings.gpu_pgxp_enable || texture_filter != GPUTextureFilter::Nearest; 97 } 98 99 ALWAYS_INLINE static bool ShouldAllowSpriteMode(u8 resolution_scale, GPUTextureFilter texture_filter, 100 GPUTextureFilter sprite_texture_filter) 101 { 102 // Use sprite shaders/mode when texcoord rounding is forced, or if the filters are different. 103 return (sprite_texture_filter != texture_filter || (resolution_scale > 1 && g_settings.gpu_force_round_texcoords)); 104 } 105 106 ALWAYS_INLINE static bool ShouldDisableColorPerspective() 107 { 108 return g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction && !g_settings.gpu_pgxp_color_correction; 109 } 110 111 /// Returns true if the specified texture filtering mode requires dual-source blending. 112 ALWAYS_INLINE static bool IsBlendedTextureFiltering(GPUTextureFilter filter) 113 { 114 // return (filter == GPUTextureFilter::Bilinear || filter == GPUTextureFilter::JINC2 || filter == 115 // GPUTextureFilter::xBR); 116 static_assert(((static_cast<u8>(GPUTextureFilter::Nearest) & 1u) == 0u) && 117 ((static_cast<u8>(GPUTextureFilter::Bilinear) & 1u) == 1u) && 118 ((static_cast<u8>(GPUTextureFilter::BilinearBinAlpha) & 1u) == 0u) && 119 ((static_cast<u8>(GPUTextureFilter::JINC2) & 1u) == 1u) && 120 ((static_cast<u8>(GPUTextureFilter::JINC2BinAlpha) & 1u) == 0u) && 121 ((static_cast<u8>(GPUTextureFilter::xBR) & 1u) == 1u) && 122 ((static_cast<u8>(GPUTextureFilter::xBRBinAlpha) & 1u) == 0u)); 123 return ((static_cast<u8>(filter) & 1u) == 1u); 124 } 125 126 /// Computes the area affected by a VRAM transfer, including wrap-around of X. 127 ALWAYS_INLINE_RELEASE static GSVector4i GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) 128 { 129 GSVector4i ret; 130 ret.left = x % VRAM_WIDTH; 131 ret.top = y % VRAM_HEIGHT; 132 ret.right = ret.left + width; 133 ret.bottom = ret.top + height; 134 if (ret.right > static_cast<s32>(VRAM_WIDTH)) 135 { 136 ret.left = 0; 137 ret.right = static_cast<s32>(VRAM_WIDTH); 138 } 139 if (ret.bottom > static_cast<s32>(VRAM_HEIGHT)) 140 { 141 ret.top = 0; 142 ret.bottom = static_cast<s32>(VRAM_HEIGHT); 143 } 144 return ret; 145 } 146 147 namespace { 148 class ShaderCompileProgressTracker 149 { 150 public: 151 ShaderCompileProgressTracker(std::string title, u32 total) 152 : m_title(std::move(title)), m_min_time(Common::Timer::ConvertSecondsToValue(1.0)), 153 m_update_interval(Common::Timer::ConvertSecondsToValue(0.1)), m_start_time(Common::Timer::GetCurrentValue()), 154 m_last_update_time(0), m_progress(0), m_total(total) 155 { 156 } 157 ~ShaderCompileProgressTracker() = default; 158 159 void Increment(u32 progress = 1) 160 { 161 m_progress += progress; 162 163 const u64 tv = Common::Timer::GetCurrentValue(); 164 if ((tv - m_start_time) >= m_min_time && (tv - m_last_update_time) >= m_update_interval) 165 { 166 Host::DisplayLoadingScreen(m_title.c_str(), 0, static_cast<int>(m_total), static_cast<int>(m_progress)); 167 m_last_update_time = tv; 168 } 169 } 170 171 private: 172 std::string m_title; 173 u64 m_min_time; 174 u64 m_update_interval; 175 u64 m_start_time; 176 u64 m_last_update_time; 177 u32 m_progress; 178 u32 m_total; 179 }; 180 } // namespace 181 182 GPU_HW::GPU_HW() : GPU() 183 { 184 #ifdef _DEBUG 185 s_draw_number = 0; 186 #endif 187 } 188 189 GPU_HW::~GPU_HW() 190 { 191 if (m_sw_renderer) 192 { 193 m_sw_renderer->Shutdown(); 194 m_sw_renderer.reset(); 195 } 196 } 197 198 ALWAYS_INLINE void GPU_HW::BatchVertex::Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, 199 u16 packed_texcoord, u32 uv_limits_) 200 { 201 Set(x_, y_, z_, w_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8), uv_limits_); 202 } 203 204 ALWAYS_INLINE void GPU_HW::BatchVertex::Set(float x_, float y_, float z_, float w_, u32 color_, u32 texpage_, u16 u_, 205 u16 v_, u32 uv_limits_) 206 { 207 x = x_; 208 y = y_; 209 z = z_; 210 w = w_; 211 color = color_; 212 texpage = texpage_; 213 u = u_; 214 v = v_; 215 uv_limits = uv_limits_; 216 } 217 218 ALWAYS_INLINE u32 GPU_HW::BatchVertex::PackUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v) 219 { 220 return min_u | (min_v << 8) | (max_u << 16) | (max_v << 24); 221 } 222 223 ALWAYS_INLINE void GPU_HW::BatchVertex::SetUVLimits(u32 min_u, u32 max_u, u32 min_v, u32 max_v) 224 { 225 uv_limits = PackUVLimits(min_u, max_u, min_v, max_v); 226 } 227 228 const Threading::Thread* GPU_HW::GetSWThread() const 229 { 230 return m_sw_renderer ? m_sw_renderer->GetThread() : nullptr; 231 } 232 233 bool GPU_HW::IsHardwareRenderer() const 234 { 235 return true; 236 } 237 238 bool GPU_HW::Initialize() 239 { 240 if (!GPU::Initialize()) 241 return false; 242 243 const GPUDevice::Features features = g_gpu_device->GetFeatures(); 244 245 m_resolution_scale = Truncate8(CalculateResolutionScale()); 246 m_multisamples = Truncate8(std::min<u32>(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); 247 m_texture_filtering = g_settings.gpu_texture_filter; 248 m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; 249 m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; 250 m_downsample_mode = GetDownsampleMode(m_resolution_scale); 251 m_wireframe_mode = g_settings.gpu_wireframe_mode; 252 m_supports_dual_source_blend = features.dual_source_blend; 253 m_supports_framebuffer_fetch = features.framebuffer_fetch; 254 m_true_color = g_settings.gpu_true_color; 255 m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); 256 m_clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); 257 m_compute_uv_range = m_clamp_uvs; 258 m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); 259 260 CheckSettings(); 261 262 UpdateSoftwareRenderer(false); 263 264 PrintSettingsToLog(); 265 266 Error error; 267 if (!CompilePipelines(&error)) 268 { 269 ERROR_LOG("Failed to compile pipelines: {}", error.GetDescription()); 270 return false; 271 } 272 273 if (!CreateBuffers()) 274 { 275 ERROR_LOG("Failed to create framebuffer"); 276 return false; 277 } 278 279 UpdateDownsamplingLevels(); 280 RestoreDeviceContext(); 281 return true; 282 } 283 284 void GPU_HW::Reset(bool clear_vram) 285 { 286 if (m_batch_vertex_ptr) 287 UnmapGPUBuffer(0, 0); 288 289 GPU::Reset(clear_vram); 290 291 if (m_sw_renderer) 292 m_sw_renderer->Reset(); 293 294 m_batch = {}; 295 m_batch_ubo_data = {}; 296 m_batch_ubo_dirty = true; 297 m_current_depth = 1; 298 SetClampedDrawingArea(); 299 300 if (clear_vram) 301 ClearFramebuffer(); 302 } 303 304 bool GPU_HW::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_display) 305 { 306 // Need to download local VRAM copy before calling the base class, because it serializes this. 307 if (m_sw_renderer) 308 { 309 m_sw_renderer->Sync(true); 310 } 311 else if (sw.IsWriting() && !host_texture) 312 { 313 // If SW renderer readbacks aren't enabled, the CLUT won't be populated, which means it'll be invalid if the user 314 // loads this state with software instead of hardware renderers. So force-update the CLUT. 315 ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); 316 if (IsCLUTValid()) 317 GPU::ReadCLUT(g_gpu_clut, GPUTexturePaletteReg{Truncate16(m_current_clut_reg_bits)}, m_current_clut_is_8bit); 318 } 319 320 if (!GPU::DoState(sw, host_texture, update_display)) 321 return false; 322 323 if (host_texture) 324 { 325 GPUTexture* tex = *host_texture; 326 if (sw.IsReading()) 327 { 328 if (tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || 329 tex->GetSamples() != m_vram_texture->GetSamples()) 330 { 331 return false; 332 } 333 334 g_gpu_device->CopyTextureRegion(m_vram_texture.get(), 0, 0, 0, 0, tex, 0, 0, 0, 0, tex->GetWidth(), 335 tex->GetHeight()); 336 } 337 else 338 { 339 if (!tex || tex->GetWidth() != m_vram_texture->GetWidth() || tex->GetHeight() != m_vram_texture->GetHeight() || 340 tex->GetSamples() != m_vram_texture->GetSamples()) 341 { 342 delete tex; 343 344 // We copy to/from the save state texture, but we can't have multisampled non-RTs. 345 tex = g_gpu_device 346 ->FetchTexture( 347 m_vram_texture->GetWidth(), m_vram_texture->GetHeight(), 1, 1, m_vram_texture->GetSamples(), 348 m_vram_texture->IsMultisampled() ? GPUTexture::Type::RenderTarget : GPUTexture::Type::Texture, 349 GPUTexture::Format::RGBA8, nullptr, 0) 350 .release(); 351 *host_texture = tex; 352 if (!tex) 353 return false; 354 } 355 356 g_gpu_device->CopyTextureRegion(tex, 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 0, 0, tex->GetWidth(), 357 tex->GetHeight()); 358 } 359 } 360 else if (sw.IsReading()) 361 { 362 // Need to update the VRAM copy on the GPU with the state data. 363 UpdateVRAMOnGPU(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, VRAM_WIDTH * sizeof(u16), false, false, VRAM_SIZE_RECT); 364 } 365 366 // invalidate the whole VRAM read texture when loading state 367 if (sw.IsReading()) 368 { 369 DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); 370 ClearVRAMDirtyRectangle(); 371 SetFullVRAMDirtyRectangle(); 372 ResetBatchVertexDepth(); 373 } 374 375 return true; 376 } 377 378 void GPU_HW::RestoreDeviceContext() 379 { 380 g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); 381 SetVRAMRenderTarget(); 382 g_gpu_device->SetViewport(m_vram_texture->GetRect()); 383 SetScissor(); 384 m_batch_ubo_dirty = true; 385 } 386 387 void GPU_HW::UpdateSettings(const Settings& old_settings) 388 { 389 GPU::UpdateSettings(old_settings); 390 391 const GPUDevice::Features features = g_gpu_device->GetFeatures(); 392 393 const u8 resolution_scale = Truncate8(CalculateResolutionScale()); 394 const u8 multisamples = Truncate8(std::min<u32>(g_settings.gpu_multisamples, g_gpu_device->GetMaxMultisamples())); 395 const bool clamp_uvs = ShouldClampUVs(m_texture_filtering) || ShouldClampUVs(m_sprite_texture_filtering); 396 const bool framebuffer_changed = (m_resolution_scale != resolution_scale || m_multisamples != multisamples || 397 g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || 398 m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); 399 const bool shaders_changed = 400 (m_resolution_scale != resolution_scale || m_multisamples != multisamples || 401 m_true_color != g_settings.gpu_true_color || g_settings.gpu_debanding != old_settings.gpu_debanding || 402 (multisamples > 0 && g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) || 403 (resolution_scale > 1 && g_settings.gpu_scaled_dithering != old_settings.gpu_scaled_dithering) || 404 (resolution_scale > 1 && g_settings.gpu_texture_filter == GPUTextureFilter::Nearest && 405 g_settings.gpu_force_round_texcoords != old_settings.gpu_force_round_texcoords) || 406 g_settings.IsUsingAccurateBlending() != old_settings.IsUsingAccurateBlending() || 407 m_texture_filtering != g_settings.gpu_texture_filter || 408 m_sprite_texture_filtering != g_settings.gpu_sprite_texture_filter || m_clamp_uvs != clamp_uvs || 409 (resolution_scale > 1 && (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || 410 (m_downsample_mode == GPUDownsampleMode::Box && 411 g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale))) || 412 (features.geometry_shaders && g_settings.gpu_wireframe_mode != old_settings.gpu_wireframe_mode) || 413 m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer() || 414 (features.noperspective_interpolation && 415 ShouldDisableColorPerspective() != old_settings.gpu_pgxp_color_correction) || 416 m_allow_sprite_mode != 417 ShouldAllowSpriteMode(m_resolution_scale, g_settings.gpu_texture_filter, g_settings.gpu_sprite_texture_filter)); 418 419 if (m_resolution_scale != resolution_scale) 420 { 421 Host::AddIconOSDMessage( 422 "ResolutionScaleChanged", ICON_FA_PAINT_BRUSH, 423 fmt::format(TRANSLATE_FS("GPU_HW", "Resolution scale set to {0}x (display {1}x{2}, VRAM {3}x{4})"), 424 resolution_scale, m_crtc_state.display_vram_width * resolution_scale, 425 resolution_scale * m_crtc_state.display_vram_height, VRAM_WIDTH * resolution_scale, 426 VRAM_HEIGHT * resolution_scale), 427 Host::OSD_INFO_DURATION); 428 } 429 430 if (m_multisamples != multisamples || g_settings.gpu_per_sample_shading != old_settings.gpu_per_sample_shading) 431 { 432 if (g_settings.gpu_per_sample_shading && features.per_sample_shading) 433 { 434 Host::AddIconOSDMessage( 435 "MultisamplingChanged", ICON_FA_PAINT_BRUSH, 436 fmt::format(TRANSLATE_FS("GPU_HW", "Multisample anti-aliasing set to {}x (SSAA)."), multisamples), 437 Host::OSD_INFO_DURATION); 438 } 439 else 440 { 441 Host::AddIconOSDMessage( 442 "MultisamplingChanged", ICON_FA_PAINT_BRUSH, 443 fmt::format(TRANSLATE_FS("GPU_HW", "Multisample anti-aliasing set to {}x."), multisamples), 444 Host::OSD_INFO_DURATION); 445 } 446 } 447 448 // Back up VRAM if we're recreating the framebuffer. 449 if (framebuffer_changed) 450 { 451 RestoreDeviceContext(); 452 ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); 453 DestroyBuffers(); 454 } 455 456 m_resolution_scale = resolution_scale; 457 m_multisamples = multisamples; 458 m_texture_filtering = g_settings.gpu_texture_filter; 459 m_sprite_texture_filtering = g_settings.gpu_sprite_texture_filter; 460 m_line_detect_mode = (m_resolution_scale > 1) ? g_settings.gpu_line_detect_mode : GPULineDetectMode::Disabled; 461 m_downsample_mode = GetDownsampleMode(resolution_scale); 462 m_wireframe_mode = g_settings.gpu_wireframe_mode; 463 m_true_color = g_settings.gpu_true_color; 464 m_clamp_uvs = clamp_uvs; 465 m_compute_uv_range = m_clamp_uvs; 466 m_allow_sprite_mode = ShouldAllowSpriteMode(resolution_scale, m_texture_filtering, m_sprite_texture_filtering); 467 m_batch.sprite_mode = (m_allow_sprite_mode && m_batch.sprite_mode); 468 469 const bool depth_buffer_changed = (m_pgxp_depth_buffer != g_settings.UsingPGXPDepthBuffer()); 470 if (depth_buffer_changed) 471 { 472 m_pgxp_depth_buffer = g_settings.UsingPGXPDepthBuffer(); 473 m_batch.use_depth_buffer = false; 474 m_depth_was_copied = false; 475 } 476 477 CheckSettings(); 478 479 UpdateSoftwareRenderer(true); 480 481 PrintSettingsToLog(); 482 483 if (shaders_changed) 484 { 485 DestroyPipelines(); 486 487 Error error; 488 if (!CompilePipelines(&error)) 489 { 490 ERROR_LOG("Failed to recompile pipelines: {}", error.GetDescription()); 491 Panic("Failed to recompile pipelines."); 492 } 493 } 494 495 if (framebuffer_changed) 496 { 497 // When using very high upscaling, it's possible that we don't have enough VRAM for two sets of buffers. 498 // Purge the pool, and idle the GPU so that all video memory is freed prior to creating the new buffers. 499 g_gpu_device->PurgeTexturePool(); 500 g_gpu_device->ExecuteAndWaitForGPUIdle(); 501 502 if (!CreateBuffers()) 503 Panic("Failed to recreate buffers."); 504 505 UpdateDownsamplingLevels(); 506 RestoreDeviceContext(); 507 UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false); 508 if (m_write_mask_as_depth) 509 UpdateDepthBufferFromMaskBit(); 510 UpdateDisplay(); 511 } 512 else if (m_vram_depth_texture && depth_buffer_changed) 513 { 514 if (m_pgxp_depth_buffer) 515 ClearDepthBuffer(); 516 else if (m_write_mask_as_depth) 517 UpdateDepthBufferFromMaskBit(); 518 } 519 520 if (g_settings.gpu_downsample_mode != old_settings.gpu_downsample_mode || 521 (g_settings.gpu_downsample_mode == GPUDownsampleMode::Box && 522 g_settings.gpu_downsample_scale != old_settings.gpu_downsample_scale)) 523 { 524 UpdateDownsamplingLevels(); 525 } 526 } 527 528 void GPU_HW::CheckSettings() 529 { 530 const GPUDevice::Features features = g_gpu_device->GetFeatures(); 531 532 if (m_multisamples != g_settings.gpu_multisamples) 533 { 534 Host::AddIconOSDMessage("MSAAUnsupported", ICON_EMOJI_WARNING, 535 fmt::format(TRANSLATE_FS("GPU_HW", "{}x MSAA is not supported, using {}x instead."), 536 g_settings.gpu_multisamples, m_multisamples), 537 Host::OSD_CRITICAL_ERROR_DURATION); 538 } 539 else 540 { 541 Host::RemoveKeyedOSDMessage("MSAAUnsupported"); 542 } 543 544 if (g_settings.gpu_per_sample_shading && !features.per_sample_shading) 545 { 546 Host::AddIconOSDMessage("SSAAUnsupported", ICON_EMOJI_WARNING, 547 TRANSLATE_STR("GPU_HW", "SSAA is not supported, using MSAA instead."), 548 Host::OSD_ERROR_DURATION); 549 } 550 if (!features.dual_source_blend && !features.framebuffer_fetch && 551 (IsBlendedTextureFiltering(m_texture_filtering) || IsBlendedTextureFiltering(m_sprite_texture_filtering))) 552 { 553 Host::AddIconOSDMessage( 554 "TextureFilterUnsupported", ICON_EMOJI_WARNING, 555 fmt::format(TRANSLATE_FS("GPU_HW", "Texture filter '{}/{}' is not supported with the current renderer."), 556 Settings::GetTextureFilterDisplayName(m_texture_filtering), 557 Settings::GetTextureFilterName(m_sprite_texture_filtering), Host::OSD_ERROR_DURATION)); 558 m_texture_filtering = GPUTextureFilter::Nearest; 559 m_sprite_texture_filtering = GPUTextureFilter::Nearest; 560 m_allow_sprite_mode = ShouldAllowSpriteMode(m_resolution_scale, m_texture_filtering, m_sprite_texture_filtering); 561 } 562 563 if (g_settings.IsUsingAccurateBlending() && !m_supports_framebuffer_fetch && !features.feedback_loops && 564 !features.raster_order_views) 565 { 566 // m_allow_shader_blend/m_prefer_shader_blend will be cleared in pipeline compile. 567 Host::AddIconOSDMessage( 568 "AccurateBlendingUnsupported", ICON_EMOJI_WARNING, 569 TRANSLATE_STR("GPU_HW", "Accurate blending is not supported by your current GPU.\nIt requires framebuffer fetch, " 570 "feedback loops, or rasterizer order views."), 571 Host::OSD_WARNING_DURATION); 572 } 573 else if (IsUsingMultisampling() && !features.framebuffer_fetch && 574 ((g_settings.IsUsingAccurateBlending() && features.raster_order_views) || 575 (m_pgxp_depth_buffer && features.raster_order_views && !features.feedback_loops))) 576 { 577 Host::AddIconOSDMessage( 578 "AccurateBlendingUnsupported", ICON_EMOJI_WARNING, 579 TRANSLATE_STR("GPU_HW", "Multisample anti-aliasing is not supported when using ROV blending."), 580 Host::OSD_WARNING_DURATION); 581 m_multisamples = 1; 582 } 583 584 if (m_pgxp_depth_buffer && !features.feedback_loops && !features.framebuffer_fetch && !features.raster_order_views) 585 { 586 Host::AddIconOSDMessage( 587 "AccurateBlendingUnsupported", ICON_EMOJI_WARNING, 588 TRANSLATE_STR("GPU_HW", "PGXP depth buffer is not supported by your current GPU or renderer.\nIt requires " 589 "framebuffer fetch, feedback loops, or rasterizer order views."), 590 Host::OSD_WARNING_DURATION); 591 m_pgxp_depth_buffer = false; 592 } 593 594 if (!features.noperspective_interpolation && !ShouldDisableColorPerspective()) 595 WARNING_LOG("Disable color perspective not supported, but should be used."); 596 597 if (!features.geometry_shaders && m_wireframe_mode != GPUWireframeMode::Disabled) 598 { 599 Host::AddIconOSDMessage( 600 "GeometryShadersUnsupported", ICON_EMOJI_WARNING, 601 TRANSLATE("GPU_HW", "Geometry shaders are not supported by your GPU, and are required for wireframe rendering."), 602 Host::OSD_CRITICAL_ERROR_DURATION); 603 m_wireframe_mode = GPUWireframeMode::Disabled; 604 } 605 606 if (m_downsample_mode == GPUDownsampleMode::Box) 607 { 608 const u32 resolution_scale = CalculateResolutionScale(); 609 const u32 box_downscale = GetBoxDownsampleScale(resolution_scale); 610 if (box_downscale != g_settings.gpu_downsample_scale || box_downscale == resolution_scale) 611 { 612 Host::AddIconOSDMessage( 613 "BoxDownsampleUnsupported", ICON_FA_PAINT_BRUSH, 614 fmt::format(TRANSLATE_FS( 615 "GPU_HW", "Resolution scale {0}x is not divisible by downsample scale {1}x, using {2}x instead."), 616 resolution_scale, g_settings.gpu_downsample_scale, box_downscale), 617 Host::OSD_WARNING_DURATION); 618 } 619 else 620 { 621 Host::RemoveKeyedOSDMessage("BoxDownsampleUnsupported"); 622 } 623 624 if (box_downscale == g_settings.gpu_resolution_scale) 625 m_downsample_mode = GPUDownsampleMode::Disabled; 626 } 627 } 628 629 u32 GPU_HW::CalculateResolutionScale() const 630 { 631 const u32 max_resolution_scale = GetMaxResolutionScale(); 632 633 u32 scale; 634 if (g_settings.gpu_resolution_scale != 0) 635 { 636 scale = std::clamp<u32>(g_settings.gpu_resolution_scale, 1, max_resolution_scale); 637 } 638 else 639 { 640 // Auto scaling. When the system is starting and all borders crop is enabled, the registers are zero, and 641 // display_height therefore is also zero. Use the default size from the region in this case. 642 const s32 height = (m_crtc_state.display_height != 0) ? 643 static_cast<s32>(m_crtc_state.display_height) : 644 (m_console_is_pal ? (PAL_VERTICAL_ACTIVE_END - PAL_VERTICAL_ACTIVE_START) : 645 (NTSC_VERTICAL_ACTIVE_END - NTSC_VERTICAL_ACTIVE_START)); 646 647 float widescreen_multiplier = 1.0f; 648 if (g_settings.gpu_widescreen_hack) 649 { 650 // Multiply scale factor by aspect ratio relative to 4:3, so that widescreen resolution is as close as possible to 651 // native screen resolution. Otherwise, anamorphic stretching would result in increasingly less horizontal 652 // resolution (relative to native screen resolution) as the aspect ratio gets wider. 653 widescreen_multiplier = std::max(1.0f, (static_cast<float>(g_gpu_device->GetWindowWidth()) / 654 static_cast<float>(g_gpu_device->GetWindowHeight())) / 655 (4.0f / 3.0f)); 656 } 657 658 const s32 preferred_scale = 659 static_cast<s32>(std::ceil(static_cast<float>(g_gpu_device->GetWindowHeight() * widescreen_multiplier) / height)); 660 VERBOSE_LOG("Height = {}, preferred scale = {}", height, preferred_scale); 661 662 scale = static_cast<u32>(std::clamp<s32>(preferred_scale, 1, max_resolution_scale)); 663 } 664 665 if (g_settings.gpu_downsample_mode == GPUDownsampleMode::Adaptive && scale > 1 && !Common::IsPow2(scale)) 666 { 667 const u32 new_scale = Common::PreviousPow2(scale); 668 WARNING_LOG("Resolution scale {}x not supported for adaptive downsampling, using {}x", scale, new_scale); 669 670 if (g_settings.gpu_resolution_scale != 0) 671 { 672 Host::AddIconOSDMessage( 673 "ResolutionNotPow2", ICON_FA_PAINT_BRUSH, 674 fmt::format( 675 TRANSLATE_FS("GPU_HW", "Resolution scale {0}x not supported for adaptive downsampling, using {1}x."), scale, 676 new_scale), 677 Host::OSD_WARNING_DURATION); 678 } 679 680 scale = new_scale; 681 } 682 683 return scale; 684 } 685 686 void GPU_HW::UpdateResolutionScale() 687 { 688 GPU::UpdateResolutionScale(); 689 690 if (CalculateResolutionScale() != m_resolution_scale) 691 UpdateSettings(g_settings); 692 } 693 694 GPUDownsampleMode GPU_HW::GetDownsampleMode(u32 resolution_scale) const 695 { 696 return (resolution_scale == 1) ? GPUDownsampleMode::Disabled : g_settings.gpu_downsample_mode; 697 } 698 699 bool GPU_HW::IsUsingMultisampling() const 700 { 701 return m_multisamples > 1; 702 } 703 704 bool GPU_HW::IsUsingDownsampling() const 705 { 706 return (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24); 707 } 708 709 void GPU_HW::SetFullVRAMDirtyRectangle() 710 { 711 m_vram_dirty_draw_rect = VRAM_SIZE_RECT; 712 m_draw_mode.SetTexturePageChanged(); 713 } 714 715 void GPU_HW::ClearVRAMDirtyRectangle() 716 { 717 m_vram_dirty_draw_rect = INVALID_RECT; 718 m_vram_dirty_write_rect = INVALID_RECT; 719 } 720 721 void GPU_HW::AddWrittenRectangle(const GSVector4i rect) 722 { 723 m_vram_dirty_write_rect = m_vram_dirty_write_rect.runion(rect); 724 SetTexPageChangedOnOverlap(m_vram_dirty_write_rect); 725 } 726 727 void GPU_HW::AddDrawnRectangle(const GSVector4i rect) 728 { 729 // Normally, we would check for overlap here. But the GPU's texture cache won't actually reload until the page 730 // changes, or it samples a larger region, so we can get away without doing so. This reduces copies considerably in 731 // games like Mega Man Legends 2. 732 m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect); 733 } 734 735 void GPU_HW::AddUnclampedDrawnRectangle(const GSVector4i rect) 736 { 737 m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(rect); 738 SetTexPageChangedOnOverlap(m_vram_dirty_draw_rect); 739 } 740 741 void GPU_HW::SetTexPageChangedOnOverlap(const GSVector4i update_rect) 742 { 743 // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the 744 // shadow texture is updated 745 if (!m_draw_mode.IsTexturePageChanged() && m_batch.texture_mode != BatchTextureMode::Disabled && 746 (m_draw_mode.mode_reg.GetTexturePageRectangle().rintersects(update_rect) || 747 (m_draw_mode.mode_reg.IsUsingPalette() && 748 m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode).rintersects(update_rect)))) 749 { 750 m_draw_mode.SetTexturePageChanged(); 751 } 752 } 753 754 std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution(bool scaled /* = true */) 755 { 756 const u32 scale = scaled ? m_resolution_scale : 1u; 757 return std::make_tuple(m_crtc_state.display_vram_width * scale, m_crtc_state.display_vram_height * scale); 758 } 759 760 std::tuple<u32, u32> GPU_HW::GetFullDisplayResolution(bool scaled /* = true */) 761 { 762 const u32 scale = scaled ? m_resolution_scale : 1u; 763 return std::make_tuple(m_crtc_state.display_width * scale, m_crtc_state.display_height * scale); 764 } 765 766 void GPU_HW::PrintSettingsToLog() 767 { 768 INFO_LOG("Resolution Scale: {} ({}x{}), maximum {}", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, 769 VRAM_HEIGHT * m_resolution_scale, GetMaxResolutionScale()); 770 INFO_LOG("Multisampling: {}x{}", m_multisamples, 771 (g_settings.gpu_per_sample_shading && g_gpu_device->GetFeatures().per_sample_shading) ? 772 " (per sample shading)" : 773 ""); 774 INFO_LOG("Dithering: {}{}", m_true_color ? "Disabled" : "Enabled", 775 (!m_true_color && g_settings.gpu_scaled_dithering) ? 776 " (Scaled)" : 777 ((m_true_color && g_settings.gpu_debanding) ? " (Debanding)" : "")); 778 INFO_LOG("Force round texture coordinates: {}", 779 (m_resolution_scale > 1 && g_settings.gpu_force_round_texcoords) ? "Enabled" : "Disabled"); 780 INFO_LOG("Texture Filtering: {}/{}", Settings::GetTextureFilterDisplayName(m_texture_filtering), 781 Settings::GetTextureFilterDisplayName(m_sprite_texture_filtering)); 782 INFO_LOG("Dual-source blending: {}", m_supports_dual_source_blend ? "Supported" : "Not supported"); 783 INFO_LOG("Clamping UVs: {}", m_clamp_uvs ? "YES" : "NO"); 784 INFO_LOG("Depth buffer: {}", m_pgxp_depth_buffer ? "YES" : "NO"); 785 INFO_LOG("Downsampling: {}", Settings::GetDownsampleModeDisplayName(m_downsample_mode)); 786 INFO_LOG("Wireframe rendering: {}", Settings::GetGPUWireframeModeDisplayName(m_wireframe_mode)); 787 INFO_LOG("Line detection: {}", Settings::GetLineDetectModeDisplayName(m_line_detect_mode)); 788 INFO_LOG("Using software renderer for readbacks: {}", m_sw_renderer ? "YES" : "NO"); 789 INFO_LOG("Separate sprite shaders: {}", m_allow_sprite_mode ? "YES" : "NO"); 790 } 791 792 GPUTexture::Format GPU_HW::GetDepthBufferFormat() const 793 { 794 // Use 32-bit depth for PGXP depth buffer, otherwise 16-bit for mask bit. 795 return m_pgxp_depth_buffer ? (m_use_rov_for_shader_blend ? VRAM_DS_COLOR_FORMAT : VRAM_DS_DEPTH_FORMAT) : 796 VRAM_DS_FORMAT; 797 } 798 799 bool GPU_HW::CreateBuffers() 800 { 801 DestroyBuffers(); 802 803 // scale vram size to internal resolution 804 const u32 texture_width = VRAM_WIDTH * m_resolution_scale; 805 const u32 texture_height = VRAM_HEIGHT * m_resolution_scale; 806 const u8 samples = static_cast<u8>(m_multisamples); 807 const bool needs_depth_buffer = m_write_mask_as_depth || m_pgxp_depth_buffer; 808 809 // Needed for Metal resolve. 810 const GPUTexture::Type read_texture_type = (g_gpu_device->GetRenderAPI() == RenderAPI::Metal && m_multisamples > 1) ? 811 GPUTexture::Type::RWTexture : 812 GPUTexture::Type::Texture; 813 const GPUTexture::Type vram_texture_type = 814 m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::RenderTarget; 815 const GPUTexture::Type depth_texture_type = 816 m_use_rov_for_shader_blend ? GPUTexture::Type::RWTexture : GPUTexture::Type::DepthStencil; 817 818 if (!(m_vram_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, vram_texture_type, 819 VRAM_RT_FORMAT)) || 820 (needs_depth_buffer && 821 !(m_vram_depth_texture = g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, 822 depth_texture_type, GetDepthBufferFormat()))) || 823 (m_pgxp_depth_buffer && !(m_vram_depth_copy_texture = 824 g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, samples, 825 GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT))) || 826 !(m_vram_read_texture = 827 g_gpu_device->FetchTexture(texture_width, texture_height, 1, 1, 1, read_texture_type, VRAM_RT_FORMAT)) || 828 !(m_vram_readback_texture = g_gpu_device->FetchTexture(VRAM_WIDTH / 2, VRAM_HEIGHT, 1, 1, 1, 829 GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT))) 830 { 831 return false; 832 } 833 834 GL_OBJECT_NAME(m_vram_texture, "VRAM Texture"); 835 if (m_vram_depth_texture) 836 GL_OBJECT_NAME(m_vram_depth_texture, "VRAM Depth Texture"); 837 GL_OBJECT_NAME(m_vram_read_texture, "VRAM Read Texture"); 838 GL_OBJECT_NAME(m_vram_readback_texture, "VRAM Readback Texture"); 839 840 if (g_gpu_device->GetFeatures().memory_import) 841 { 842 DEV_LOG("Trying to import guest VRAM buffer for downloads..."); 843 m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture( 844 m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat(), 845 g_vram, sizeof(g_vram), VRAM_WIDTH * sizeof(u16)); 846 if (!m_vram_readback_download_texture) 847 ERROR_LOG("Failed to create imported readback buffer"); 848 } 849 if (!m_vram_readback_download_texture) 850 { 851 m_vram_readback_download_texture = g_gpu_device->CreateDownloadTexture( 852 m_vram_readback_texture->GetWidth(), m_vram_readback_texture->GetHeight(), m_vram_readback_texture->GetFormat()); 853 if (!m_vram_readback_download_texture) 854 { 855 ERROR_LOG("Failed to create readback download texture"); 856 return false; 857 } 858 } 859 860 if (g_gpu_device->GetFeatures().supports_texture_buffers) 861 { 862 if (!(m_vram_upload_buffer = 863 g_gpu_device->CreateTextureBuffer(GPUTextureBuffer::Format::R16UI, GPUDevice::MIN_TEXEL_BUFFER_ELEMENTS))) 864 { 865 return false; 866 } 867 868 GL_OBJECT_NAME(m_vram_upload_buffer, "VRAM Upload Buffer"); 869 } 870 871 INFO_LOG("Created HW framebuffer of {}x{}", texture_width, texture_height); 872 873 SetVRAMRenderTarget(); 874 SetFullVRAMDirtyRectangle(); 875 return true; 876 } 877 878 void GPU_HW::ClearFramebuffer() 879 { 880 g_gpu_device->ClearRenderTarget(m_vram_texture.get(), 0); 881 if (m_vram_depth_texture) 882 { 883 if (m_use_rov_for_shader_blend) 884 g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF); 885 else 886 g_gpu_device->ClearDepth(m_vram_depth_texture.get(), m_pgxp_depth_buffer ? 1.0f : 0.0f); 887 } 888 ClearVRAMDirtyRectangle(); 889 m_last_depth_z = 1.0f; 890 } 891 892 void GPU_HW::SetVRAMRenderTarget() 893 { 894 if (m_use_rov_for_shader_blend) 895 { 896 GPUTexture* rts[2] = {m_vram_texture.get(), m_vram_depth_texture.get()}; 897 const u32 num_rts = m_pgxp_depth_buffer ? 2 : 1; 898 g_gpu_device->SetRenderTargets( 899 rts, num_rts, nullptr, m_rov_active ? GPUPipeline::BindRenderTargetsAsImages : GPUPipeline::NoRenderPassFlags); 900 } 901 else 902 { 903 g_gpu_device->SetRenderTarget(m_vram_texture.get(), m_vram_depth_texture.get(), 904 ((m_allow_shader_blend && !m_use_rov_for_shader_blend) ? 905 GPUPipeline::ColorFeedbackLoop : 906 GPUPipeline::NoRenderPassFlags)); 907 } 908 } 909 910 void GPU_HW::DeactivateROV() 911 { 912 if (!m_rov_active) 913 return; 914 915 GL_INS("Deactivating ROV."); 916 m_rov_active = false; 917 SetVRAMRenderTarget(); 918 } 919 920 void GPU_HW::DestroyBuffers() 921 { 922 ClearDisplayTexture(); 923 924 DebugAssert((m_batch_vertex_ptr != nullptr) == (m_batch_index_ptr != nullptr)); 925 if (m_batch_vertex_ptr) 926 UnmapGPUBuffer(0, 0); 927 928 m_vram_upload_buffer.reset(); 929 m_vram_readback_download_texture.reset(); 930 g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); 931 g_gpu_device->RecycleTexture(std::move(m_vram_extract_depth_texture)); 932 g_gpu_device->RecycleTexture(std::move(m_vram_extract_texture)); 933 g_gpu_device->RecycleTexture(std::move(m_vram_read_texture)); 934 g_gpu_device->RecycleTexture(std::move(m_vram_depth_copy_texture)); 935 g_gpu_device->RecycleTexture(std::move(m_vram_depth_texture)); 936 g_gpu_device->RecycleTexture(std::move(m_vram_texture)); 937 g_gpu_device->RecycleTexture(std::move(m_vram_readback_texture)); 938 } 939 940 bool GPU_HW::CompilePipelines(Error* error) 941 { 942 const GPUDevice::Features features = g_gpu_device->GetFeatures(); 943 const bool per_sample_shading = g_settings.gpu_per_sample_shading && features.per_sample_shading; 944 const bool force_round_texcoords = (m_resolution_scale > 1 && m_texture_filtering == GPUTextureFilter::Nearest && 945 g_settings.gpu_force_round_texcoords); 946 947 // Determine when to use shader blending. 948 // FBFetch is free, we need it for filtering without DSB, or when accurate blending is forced. 949 // But, don't bother with accurate blending if true colour is on. The result will be the same. 950 // Prefer ROV over barriers/feedback loops without FBFetch, it'll be faster. 951 // Abuse the depth buffer for the mask bit when it's free (FBFetch), or PGXP depth buffering is enabled. 952 m_allow_shader_blend = features.framebuffer_fetch || 953 ((features.feedback_loops || features.raster_order_views) && 954 (m_pgxp_depth_buffer || g_settings.IsUsingAccurateBlending() || 955 (!m_supports_dual_source_blend && (IsBlendedTextureFiltering(m_texture_filtering) || 956 IsBlendedTextureFiltering(m_sprite_texture_filtering))))); 957 m_prefer_shader_blend = (m_allow_shader_blend && g_settings.IsUsingAccurateBlending()); 958 m_use_rov_for_shader_blend = (m_allow_shader_blend && !features.framebuffer_fetch && features.raster_order_views && 959 (m_prefer_shader_blend || !features.feedback_loops)); 960 m_write_mask_as_depth = (!m_pgxp_depth_buffer && !features.framebuffer_fetch && !m_prefer_shader_blend); 961 962 // ROV doesn't support MSAA in DirectX. 963 Assert(!m_use_rov_for_shader_blend || !IsUsingMultisampling()); 964 965 const bool needs_depth_buffer = (m_pgxp_depth_buffer || m_write_mask_as_depth); 966 const bool needs_rov_depth = (m_pgxp_depth_buffer && m_use_rov_for_shader_blend); 967 const bool needs_real_depth_buffer = (needs_depth_buffer && !needs_rov_depth); 968 const bool needs_feedback_loop = (m_allow_shader_blend && features.feedback_loops && !m_use_rov_for_shader_blend); 969 const GPUTexture::Format depth_buffer_format = 970 needs_depth_buffer ? GetDepthBufferFormat() : GPUTexture::Format::Unknown; 971 972 // Logging in case something goes wrong. 973 INFO_LOG("Shader blending allowed: {}", m_allow_shader_blend ? "YES" : "NO"); 974 INFO_LOG("Shader blending preferred: {}", m_prefer_shader_blend ? "YES" : "NO"); 975 INFO_LOG("Use ROV for shader blending: {}", m_use_rov_for_shader_blend ? "YES" : "NO"); 976 INFO_LOG("Write mask as depth: {}", m_write_mask_as_depth ? "YES" : "NO"); 977 INFO_LOG("Depth buffer is {}needed in {}.", needs_depth_buffer ? "" : "NOT ", 978 GPUTexture::GetFormatName(GetDepthBufferFormat())); 979 INFO_LOG("Using ROV depth: {}", needs_rov_depth ? "YES" : "NO"); 980 INFO_LOG("Using real depth buffer: {}", needs_real_depth_buffer ? "YES" : "NO"); 981 INFO_LOG("Using feedback loops: {}", needs_feedback_loop ? "YES" : "NO"); 982 983 // Start generating shaders. 984 GPU_HW_ShaderGen shadergen(g_gpu_device->GetRenderAPI(), m_resolution_scale, m_multisamples, per_sample_shading, 985 m_true_color, (m_resolution_scale > 1 && g_settings.gpu_scaled_dithering), 986 m_write_mask_as_depth, ShouldDisableColorPerspective(), m_supports_dual_source_blend, 987 m_supports_framebuffer_fetch, g_settings.gpu_true_color && g_settings.gpu_debanding); 988 989 const u32 active_texture_modes = 990 m_allow_sprite_mode ? NUM_TEXTURE_MODES : 991 (NUM_TEXTURE_MODES - (NUM_TEXTURE_MODES - static_cast<u32>(BatchTextureMode::SpriteStart))); 992 const u32 total_pipelines = 993 (m_allow_sprite_mode ? 5 : 3) + // vertex shaders 994 (active_texture_modes * 5 * 9 * 2 * 2 * 2 * (1 + BoolToUInt32(needs_rov_depth))) + // fragment shaders 995 ((m_pgxp_depth_buffer ? 2 : 1) * 5 * 5 * active_texture_modes * 2 * 2 * 2) + // batch pipelines 996 ((m_wireframe_mode != GPUWireframeMode::Disabled) ? 1 : 0) + // wireframe 997 1 + // fullscreen quad VS 998 (2 * 2) + // vram fill 999 (1 + BoolToUInt32(m_write_mask_as_depth)) + // vram copy 1000 (1 + BoolToUInt32(m_write_mask_as_depth)) + // vram write 1001 1 + // vram write replacement 1002 (m_write_mask_as_depth ? 1 : 0) + // mask -> depth 1003 1 + // vram read 1004 2 + // extract/display 1005 ((m_downsample_mode != GPUDownsampleMode::Disabled) ? 1 : 0); // downsample 1006 1007 ShaderCompileProgressTracker progress("Compiling Pipelines", total_pipelines); 1008 1009 // vertex shaders - [textured/palette/sprite] 1010 // fragment shaders - [depth_test][render_mode][transparency_mode][texture_mode][check_mask][dithering][interlacing] 1011 static constexpr auto destroy_shader = [](std::unique_ptr<GPUShader>& s) { s.reset(); }; 1012 DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2> batch_vertex_shaders{}; 1013 DimensionalArray<std::unique_ptr<GPUShader>, 2, 2, 2, NUM_TEXTURE_MODES, 5, 5, 2> batch_fragment_shaders{}; 1014 ScopedGuard batch_shader_guard([&batch_vertex_shaders, &batch_fragment_shaders]() { 1015 batch_vertex_shaders.enumerate(destroy_shader); 1016 batch_fragment_shaders.enumerate(destroy_shader); 1017 }); 1018 1019 for (u8 textured = 0; textured < 2; textured++) 1020 { 1021 for (u8 palette = 0; palette < (textured ? 2 : 1); palette++) 1022 { 1023 for (u8 sprite = 0; sprite < (textured ? 2 : 1); sprite++) 1024 { 1025 const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); 1026 const std::string vs = shadergen.GenerateBatchVertexShader( 1027 textured != 0, palette != 0, uv_limits, !sprite && force_round_texcoords, m_pgxp_depth_buffer); 1028 if (!(batch_vertex_shaders[textured][palette][sprite] = 1029 g_gpu_device->CreateShader(GPUShaderStage::Vertex, shadergen.GetLanguage(), vs, error))) 1030 { 1031 return false; 1032 } 1033 1034 progress.Increment(); 1035 } 1036 } 1037 } 1038 1039 for (u8 depth_test = 0; depth_test < 2; depth_test++) 1040 { 1041 if (depth_test && !needs_rov_depth) 1042 { 1043 // Don't need to do depth testing in the shader. 1044 continue; 1045 } 1046 1047 for (u8 render_mode = 0; render_mode < 5; render_mode++) 1048 { 1049 for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) 1050 { 1051 if ( 1052 // Can't generate shader blending. 1053 ((render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && !m_allow_shader_blend) || 1054 (render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend) && 1055 transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled))) || 1056 // Don't need multipass shaders if we're preferring shader blend or have (free) FBFetch. 1057 ((m_supports_framebuffer_fetch || m_prefer_shader_blend) && 1058 (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) || 1059 render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))) || 1060 // If using ROV depth, we only draw with shader blending. 1061 (needs_rov_depth && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend))) 1062 { 1063 progress.Increment(active_texture_modes * 2 * 2 * 2); 1064 continue; 1065 } 1066 1067 for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++) 1068 { 1069 for (u8 check_mask = 0; check_mask < 2; check_mask++) 1070 { 1071 if (check_mask && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend)) 1072 { 1073 // mask bit testing is only valid with shader blending. 1074 progress.Increment(2 * 2); 1075 continue; 1076 } 1077 1078 for (u8 dithering = 0; dithering < 2; dithering++) 1079 { 1080 for (u8 interlacing = 0; interlacing < 2; interlacing++) 1081 { 1082 const bool sprite = (static_cast<BatchTextureMode>(texture_mode) >= BatchTextureMode::SpriteStart); 1083 const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); 1084 const BatchTextureMode shader_texmode = static_cast<BatchTextureMode>( 1085 texture_mode - (sprite ? static_cast<u8>(BatchTextureMode::SpriteStart) : 0)); 1086 const bool use_rov = 1087 (render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && m_use_rov_for_shader_blend); 1088 const std::string fs = shadergen.GenerateBatchFragmentShader( 1089 static_cast<BatchRenderMode>(render_mode), static_cast<GPUTransparencyMode>(transparency_mode), 1090 shader_texmode, sprite ? m_sprite_texture_filtering : m_texture_filtering, uv_limits, 1091 !sprite && force_round_texcoords, ConvertToBoolUnchecked(dithering), 1092 ConvertToBoolUnchecked(interlacing), ConvertToBoolUnchecked(check_mask), use_rov, needs_rov_depth, 1093 (depth_test != 0)); 1094 1095 if (!(batch_fragment_shaders[depth_test][render_mode][transparency_mode][texture_mode][check_mask] 1096 [dithering][interlacing] = g_gpu_device->CreateShader( 1097 GPUShaderStage::Fragment, shadergen.GetLanguage(), fs, error))) 1098 { 1099 return false; 1100 } 1101 1102 progress.Increment(); 1103 } 1104 } 1105 } 1106 } 1107 } 1108 } 1109 } 1110 1111 static constexpr GPUPipeline::VertexAttribute vertex_attributes[] = { 1112 GPUPipeline::VertexAttribute::Make(0, GPUPipeline::VertexAttribute::Semantic::Position, 0, 1113 GPUPipeline::VertexAttribute::Type::Float, 4, OFFSETOF(BatchVertex, x)), 1114 GPUPipeline::VertexAttribute::Make(1, GPUPipeline::VertexAttribute::Semantic::Color, 0, 1115 GPUPipeline::VertexAttribute::Type::UNorm8, 4, OFFSETOF(BatchVertex, color)), 1116 GPUPipeline::VertexAttribute::Make(2, GPUPipeline::VertexAttribute::Semantic::TexCoord, 0, 1117 GPUPipeline::VertexAttribute::Type::UInt32, 1, OFFSETOF(BatchVertex, u)), 1118 GPUPipeline::VertexAttribute::Make(3, GPUPipeline::VertexAttribute::Semantic::TexCoord, 1, 1119 GPUPipeline::VertexAttribute::Type::UInt32, 1, OFFSETOF(BatchVertex, texpage)), 1120 GPUPipeline::VertexAttribute::Make(4, GPUPipeline::VertexAttribute::Semantic::TexCoord, 2, 1121 GPUPipeline::VertexAttribute::Type::UNorm8, 4, OFFSETOF(BatchVertex, uv_limits)), 1122 }; 1123 static constexpr u32 NUM_BATCH_VERTEX_ATTRIBUTES = 2; 1124 static constexpr u32 NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES = 4; 1125 static constexpr u32 NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES = 5; 1126 1127 GPUPipeline::GraphicsConfig plconfig = {}; 1128 plconfig.layout = GPUPipeline::Layout::SingleTextureAndUBO; 1129 plconfig.input_layout.vertex_stride = sizeof(BatchVertex); 1130 plconfig.rasterization = GPUPipeline::RasterizationState::GetNoCullState(); 1131 plconfig.primitive = GPUPipeline::Primitive::Triangles; 1132 plconfig.geometry_shader = nullptr; 1133 plconfig.samples = m_multisamples; 1134 plconfig.per_sample_shading = per_sample_shading; 1135 plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); 1136 1137 // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] 1138 for (u8 depth_test = 0; depth_test < 2; depth_test++) 1139 { 1140 if (depth_test && !m_pgxp_depth_buffer) 1141 { 1142 // Not used. 1143 continue; 1144 } 1145 1146 for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) 1147 { 1148 for (u8 render_mode = 0; render_mode < 5; render_mode++) 1149 { 1150 if ( 1151 // Can't generate shader blending. 1152 (render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend) && !m_allow_shader_blend) || 1153 // Don't need multipass shaders. 1154 ((m_supports_framebuffer_fetch || m_prefer_shader_blend) && 1155 (render_mode == static_cast<u8>(BatchRenderMode::OnlyOpaque) || 1156 render_mode == static_cast<u8>(BatchRenderMode::OnlyTransparent))) || 1157 // If using ROV depth, we only draw with shader blending. 1158 (needs_rov_depth && render_mode != static_cast<u8>(BatchRenderMode::ShaderBlend))) 1159 { 1160 progress.Increment(9 * 2 * 2 * 2); 1161 continue; 1162 } 1163 1164 for (u8 texture_mode = 0; texture_mode < active_texture_modes; texture_mode++) 1165 { 1166 for (u8 dithering = 0; dithering < 2; dithering++) 1167 { 1168 for (u8 interlacing = 0; interlacing < 2; interlacing++) 1169 { 1170 for (u8 check_mask = 0; check_mask < 2; check_mask++) 1171 { 1172 const bool textured = (static_cast<BatchTextureMode>(texture_mode) != BatchTextureMode::Disabled); 1173 const bool palette = 1174 (static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::Palette4Bit || 1175 static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::Palette8Bit || 1176 static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::SpritePalette4Bit || 1177 static_cast<BatchTextureMode>(texture_mode) == BatchTextureMode::SpritePalette8Bit); 1178 const bool sprite = (static_cast<BatchTextureMode>(texture_mode) >= BatchTextureMode::SpriteStart); 1179 const bool uv_limits = ShouldClampUVs(sprite ? m_sprite_texture_filtering : m_texture_filtering); 1180 const bool use_shader_blending = (render_mode == static_cast<u8>(BatchRenderMode::ShaderBlend)); 1181 const bool use_rov = (use_shader_blending && m_use_rov_for_shader_blend); 1182 plconfig.input_layout.vertex_attributes = 1183 textured ? 1184 (uv_limits ? std::span<const GPUPipeline::VertexAttribute>( 1185 vertex_attributes, NUM_BATCH_TEXTURED_LIMITS_VERTEX_ATTRIBUTES) : 1186 std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, 1187 NUM_BATCH_TEXTURED_VERTEX_ATTRIBUTES)) : 1188 std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); 1189 1190 plconfig.vertex_shader = 1191 batch_vertex_shaders[BoolToUInt8(textured)][BoolToUInt8(palette)][BoolToUInt8(sprite)].get(); 1192 plconfig.fragment_shader = 1193 batch_fragment_shaders[BoolToUInt8(depth_test && needs_rov_depth)][render_mode] 1194 [use_shader_blending ? transparency_mode : 1195 static_cast<u8>(GPUTransparencyMode::Disabled)] 1196 [texture_mode][use_shader_blending ? check_mask : 0][dithering][interlacing] 1197 .get(); 1198 Assert(plconfig.vertex_shader && plconfig.fragment_shader); 1199 1200 if (needs_real_depth_buffer) 1201 { 1202 plconfig.depth.depth_test = 1203 m_pgxp_depth_buffer ? 1204 (depth_test ? GPUPipeline::DepthFunc::LessEqual : GPUPipeline::DepthFunc::Always) : 1205 (check_mask ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always); 1206 1207 // Don't write for transparent, but still test. 1208 plconfig.depth.depth_write = 1209 !m_pgxp_depth_buffer || 1210 (depth_test && transparency_mode == static_cast<u8>(GPUTransparencyMode::Disabled)); 1211 } 1212 1213 plconfig.SetTargetFormats(use_rov ? GPUTexture::Format::Unknown : VRAM_RT_FORMAT, 1214 needs_rov_depth ? GPUTexture::Format::Unknown : depth_buffer_format); 1215 plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown; 1216 plconfig.render_pass_flags = 1217 use_rov ? GPUPipeline::BindRenderTargetsAsImages : 1218 (needs_feedback_loop ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags); 1219 1220 plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); 1221 1222 if (use_rov) 1223 { 1224 plconfig.blend.write_mask = 0; 1225 } 1226 else if (!use_shader_blending && 1227 ((static_cast<GPUTransparencyMode>(transparency_mode) != GPUTransparencyMode::Disabled && 1228 (static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled && 1229 static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) || 1230 (textured && 1231 IsBlendedTextureFiltering(sprite ? m_sprite_texture_filtering : m_texture_filtering)))) 1232 { 1233 plconfig.blend.enable = true; 1234 plconfig.blend.src_alpha_blend = GPUPipeline::BlendFunc::One; 1235 plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::Zero; 1236 plconfig.blend.alpha_blend_op = GPUPipeline::BlendOp::Add; 1237 1238 if (m_supports_dual_source_blend) 1239 { 1240 plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; 1241 plconfig.blend.dst_blend = GPUPipeline::BlendFunc::SrcAlpha1; 1242 plconfig.blend.blend_op = 1243 (static_cast<GPUTransparencyMode>(transparency_mode) == 1244 GPUTransparencyMode::BackgroundMinusForeground && 1245 static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled && 1246 static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque) ? 1247 GPUPipeline::BlendOp::ReverseSubtract : 1248 GPUPipeline::BlendOp::Add; 1249 } 1250 else 1251 { 1252 // TODO: This isn't entirely accurate, 127.5 versus 128. 1253 // But if we use fbfetch on Mali, it doesn't matter. 1254 plconfig.blend.src_blend = GPUPipeline::BlendFunc::One; 1255 plconfig.blend.dst_blend = GPUPipeline::BlendFunc::One; 1256 if (static_cast<GPUTransparencyMode>(transparency_mode) == 1257 GPUTransparencyMode::HalfBackgroundPlusHalfForeground) 1258 { 1259 plconfig.blend.dst_blend = GPUPipeline::BlendFunc::ConstantColor; 1260 plconfig.blend.dst_alpha_blend = GPUPipeline::BlendFunc::ConstantColor; 1261 plconfig.blend.constant = 0x00808080u; 1262 } 1263 1264 plconfig.blend.blend_op = 1265 (static_cast<GPUTransparencyMode>(transparency_mode) == 1266 GPUTransparencyMode::BackgroundMinusForeground && 1267 static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled && 1268 static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque) ? 1269 GPUPipeline::BlendOp::ReverseSubtract : 1270 GPUPipeline::BlendOp::Add; 1271 } 1272 } 1273 1274 if (!(m_batch_pipelines[depth_test][transparency_mode][render_mode][texture_mode][dithering] 1275 [interlacing][check_mask] = g_gpu_device->CreatePipeline(plconfig, error))) 1276 { 1277 return false; 1278 } 1279 1280 progress.Increment(); 1281 } 1282 } 1283 } 1284 } 1285 } 1286 } 1287 } 1288 1289 plconfig.SetTargetFormats(VRAM_RT_FORMAT, needs_rov_depth ? GPUTexture::Format::Unknown : depth_buffer_format); 1290 plconfig.render_pass_flags = needs_feedback_loop ? GPUPipeline::ColorFeedbackLoop : GPUPipeline::NoRenderPassFlags; 1291 1292 if (m_wireframe_mode != GPUWireframeMode::Disabled) 1293 { 1294 std::unique_ptr<GPUShader> gs = g_gpu_device->CreateShader(GPUShaderStage::Geometry, shadergen.GetLanguage(), 1295 shadergen.GenerateWireframeGeometryShader(), error); 1296 std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1297 shadergen.GenerateWireframeFragmentShader(), error); 1298 if (!gs || !fs) 1299 return false; 1300 1301 GL_OBJECT_NAME(gs, "Batch Wireframe Geometry Shader"); 1302 GL_OBJECT_NAME(fs, "Batch Wireframe Fragment Shader"); 1303 1304 plconfig.input_layout.vertex_attributes = 1305 std::span<const GPUPipeline::VertexAttribute>(vertex_attributes, NUM_BATCH_VERTEX_ATTRIBUTES); 1306 plconfig.blend = (m_wireframe_mode == GPUWireframeMode::OverlayWireframe) ? 1307 GPUPipeline::BlendState::GetAlphaBlendingState() : 1308 GPUPipeline::BlendState::GetNoBlendingState(); 1309 plconfig.blend.write_mask = 0x7; 1310 plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); 1311 plconfig.vertex_shader = batch_vertex_shaders[0][0][0].get(); 1312 plconfig.geometry_shader = gs.get(); 1313 plconfig.fragment_shader = fs.get(); 1314 1315 if (!(m_wireframe_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1316 return false; 1317 1318 GL_OBJECT_NAME(m_wireframe_pipeline, "Batch Wireframe Pipeline"); 1319 1320 plconfig.vertex_shader = nullptr; 1321 plconfig.geometry_shader = nullptr; 1322 plconfig.fragment_shader = nullptr; 1323 1324 progress.Increment(); 1325 } 1326 1327 batch_shader_guard.Run(); 1328 1329 // use a depth of 1, that way writes will reset the depth 1330 std::unique_ptr<GPUShader> fullscreen_quad_vertex_shader = g_gpu_device->CreateShader( 1331 GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateScreenQuadVertexShader(1.0f), error); 1332 if (!fullscreen_quad_vertex_shader) 1333 return false; 1334 1335 progress.Increment(); 1336 1337 // common state 1338 plconfig.input_layout.vertex_attributes = {}; 1339 plconfig.input_layout.vertex_stride = 0; 1340 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 1341 plconfig.per_sample_shading = false; 1342 plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); 1343 plconfig.vertex_shader = fullscreen_quad_vertex_shader.get(); 1344 plconfig.color_formats[1] = needs_rov_depth ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown; 1345 1346 // VRAM fill 1347 for (u8 wrapped = 0; wrapped < 2; wrapped++) 1348 { 1349 for (u8 interlaced = 0; interlaced < 2; interlaced++) 1350 { 1351 std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader( 1352 GPUShaderStage::Fragment, shadergen.GetLanguage(), 1353 shadergen.GenerateVRAMFillFragmentShader(ConvertToBoolUnchecked(wrapped), ConvertToBoolUnchecked(interlaced)), 1354 error); 1355 if (!fs) 1356 return false; 1357 1358 plconfig.fragment_shader = fs.get(); 1359 plconfig.depth = needs_real_depth_buffer ? GPUPipeline::DepthState::GetAlwaysWriteState() : 1360 GPUPipeline::DepthState::GetNoTestsState(); 1361 1362 if (!(m_vram_fill_pipelines[wrapped][interlaced] = g_gpu_device->CreatePipeline(plconfig, error))) 1363 return false; 1364 1365 progress.Increment(); 1366 } 1367 } 1368 1369 // VRAM copy 1370 { 1371 std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1372 shadergen.GenerateVRAMCopyFragmentShader(), error); 1373 if (!fs) 1374 return false; 1375 1376 plconfig.fragment_shader = fs.get(); 1377 for (u8 depth_test = 0; depth_test < 2; depth_test++) 1378 { 1379 if (depth_test && !m_write_mask_as_depth) 1380 continue; 1381 1382 plconfig.depth.depth_write = needs_real_depth_buffer; 1383 plconfig.depth.depth_test = 1384 (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; 1385 1386 if (!(m_vram_copy_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig), error)) 1387 return false; 1388 1389 GL_OBJECT_NAME_FMT(m_vram_copy_pipelines[depth_test], "VRAM Write Pipeline, depth={}", depth_test); 1390 1391 progress.Increment(); 1392 } 1393 } 1394 1395 // VRAM write 1396 { 1397 const bool use_buffer = features.supports_texture_buffers; 1398 const bool use_ssbo = features.texture_buffers_emulated_with_ssbo; 1399 std::unique_ptr<GPUShader> fs = 1400 g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1401 shadergen.GenerateVRAMWriteFragmentShader(use_buffer, use_ssbo), error); 1402 if (!fs) 1403 return false; 1404 1405 plconfig.layout = use_buffer ? GPUPipeline::Layout::SingleTextureBufferAndPushConstants : 1406 GPUPipeline::Layout::SingleTextureAndPushConstants; 1407 plconfig.fragment_shader = fs.get(); 1408 for (u8 depth_test = 0; depth_test < 2; depth_test++) 1409 { 1410 if (depth_test && !m_write_mask_as_depth) 1411 continue; 1412 1413 plconfig.depth.depth_write = needs_real_depth_buffer; 1414 plconfig.depth.depth_test = 1415 (depth_test != 0) ? GPUPipeline::DepthFunc::GreaterEqual : GPUPipeline::DepthFunc::Always; 1416 1417 if (!(m_vram_write_pipelines[depth_test] = g_gpu_device->CreatePipeline(plconfig, error))) 1418 return false; 1419 1420 GL_OBJECT_NAME_FMT(m_vram_write_pipelines[depth_test], "VRAM Write Pipeline, depth={}", depth_test); 1421 1422 progress.Increment(); 1423 } 1424 } 1425 1426 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 1427 1428 // VRAM write replacement 1429 { 1430 std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1431 shadergen.GenerateCopyFragmentShader(), error); 1432 if (!fs) 1433 return false; 1434 1435 plconfig.fragment_shader = fs.get(); 1436 plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); 1437 if (!(m_vram_write_replacement_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1438 return false; 1439 1440 progress.Increment(); 1441 } 1442 1443 // VRAM update depth 1444 if (m_write_mask_as_depth) 1445 { 1446 std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader( 1447 GPUShaderStage::Fragment, shadergen.GetLanguage(), shadergen.GenerateVRAMUpdateDepthFragmentShader(), error); 1448 if (!fs) 1449 return false; 1450 1451 plconfig.fragment_shader = fs.get(); 1452 plconfig.SetTargetFormats(GPUTexture::Format::Unknown, depth_buffer_format); 1453 plconfig.depth = GPUPipeline::DepthState::GetAlwaysWriteState(); 1454 plconfig.blend.write_mask = 0; 1455 1456 if (!(m_vram_update_depth_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1457 return false; 1458 1459 GL_OBJECT_NAME(m_vram_update_depth_pipeline, "VRAM Update Depth Pipeline"); 1460 1461 progress.Increment(); 1462 } 1463 1464 plconfig.SetTargetFormats(VRAM_RT_FORMAT); 1465 plconfig.render_pass_flags = GPUPipeline::NoRenderPassFlags; 1466 plconfig.depth = GPUPipeline::DepthState::GetNoTestsState(); 1467 plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); 1468 plconfig.samples = 1; 1469 plconfig.per_sample_shading = false; 1470 1471 // VRAM read 1472 { 1473 std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1474 shadergen.GenerateVRAMReadFragmentShader(), error); 1475 if (!fs) 1476 return false; 1477 1478 plconfig.fragment_shader = fs.get(); 1479 1480 if (!(m_vram_readback_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1481 return false; 1482 1483 GL_OBJECT_NAME(m_vram_readback_pipeline, "VRAM Read Pipeline"); 1484 progress.Increment(); 1485 } 1486 1487 // Display 1488 { 1489 for (u8 shader = 0; shader < 3; shader++) 1490 { 1491 // 24-bit doesn't give you a depth buffer. 1492 const bool color_24bit = (shader == 1); 1493 const bool depth_extract = (shader == 2); 1494 if (depth_extract && !m_pgxp_depth_buffer) 1495 continue; 1496 1497 std::unique_ptr<GPUShader> fs = 1498 g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1499 shadergen.GenerateVRAMExtractFragmentShader(color_24bit, depth_extract), error); 1500 if (!fs) 1501 return false; 1502 1503 plconfig.fragment_shader = fs.get(); 1504 1505 plconfig.layout = depth_extract ? GPUPipeline::Layout::MultiTextureAndPushConstants : 1506 GPUPipeline::Layout::SingleTextureAndPushConstants; 1507 plconfig.color_formats[1] = depth_extract ? VRAM_DS_COLOR_FORMAT : GPUTexture::Format::Unknown; 1508 1509 if (!(m_vram_extract_pipeline[shader] = g_gpu_device->CreatePipeline(plconfig, error))) 1510 return false; 1511 1512 progress.Increment(); 1513 } 1514 } 1515 1516 plconfig.layout = GPUPipeline::Layout::SingleTextureAndPushConstants; 1517 1518 if (m_pgxp_depth_buffer) 1519 { 1520 std::unique_ptr<GPUShader> fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1521 shadergen.GenerateCopyFragmentShader(), error); 1522 if (!fs) 1523 return false; 1524 1525 plconfig.fragment_shader = fs.get(); 1526 plconfig.SetTargetFormats(VRAM_DS_COLOR_FORMAT); 1527 if (!(m_copy_depth_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1528 return false; 1529 } 1530 1531 plconfig.SetTargetFormats(VRAM_RT_FORMAT); 1532 1533 if (m_downsample_mode == GPUDownsampleMode::Adaptive) 1534 { 1535 std::unique_ptr<GPUShader> vs = g_gpu_device->CreateShader( 1536 GPUShaderStage::Vertex, shadergen.GetLanguage(), shadergen.GenerateAdaptiveDownsampleVertexShader(), error); 1537 std::unique_ptr<GPUShader> fs = 1538 g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1539 shadergen.GenerateAdaptiveDownsampleMipFragmentShader(true), error); 1540 if (!vs || !fs) 1541 return false; 1542 GL_OBJECT_NAME(fs, "Downsample Vertex Shader"); 1543 GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); 1544 plconfig.vertex_shader = vs.get(); 1545 plconfig.fragment_shader = fs.get(); 1546 if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1547 return false; 1548 GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); 1549 1550 fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1551 shadergen.GenerateAdaptiveDownsampleMipFragmentShader(false), error); 1552 if (!fs) 1553 return false; 1554 GL_OBJECT_NAME(fs, "Downsample Mid Pass Fragment Shader"); 1555 plconfig.fragment_shader = fs.get(); 1556 if (!(m_downsample_mid_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1557 return false; 1558 GL_OBJECT_NAME(m_downsample_mid_pass_pipeline, "Downsample Mid Pass Pipeline"); 1559 1560 fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1561 shadergen.GenerateAdaptiveDownsampleBlurFragmentShader(), error); 1562 if (!fs) 1563 return false; 1564 GL_OBJECT_NAME(fs, "Downsample Blur Pass Fragment Shader"); 1565 plconfig.fragment_shader = fs.get(); 1566 plconfig.SetTargetFormats(GPUTexture::Format::R8); 1567 if (!(m_downsample_blur_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1568 return false; 1569 GL_OBJECT_NAME(m_downsample_blur_pass_pipeline, "Downsample Blur Pass Pipeline"); 1570 1571 fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1572 shadergen.GenerateAdaptiveDownsampleCompositeFragmentShader(), error); 1573 if (!fs) 1574 return false; 1575 GL_OBJECT_NAME(fs, "Downsample Composite Pass Fragment Shader"); 1576 plconfig.layout = GPUPipeline::Layout::MultiTextureAndPushConstants; 1577 plconfig.fragment_shader = fs.get(); 1578 plconfig.SetTargetFormats(VRAM_RT_FORMAT); 1579 if (!(m_downsample_composite_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1580 return false; 1581 GL_OBJECT_NAME(m_downsample_composite_pass_pipeline, "Downsample Blur Pass Pipeline"); 1582 1583 GPUSampler::Config config = GPUSampler::GetLinearConfig(); 1584 config.min_lod = 0; 1585 config.max_lod = GPUSampler::Config::LOD_MAX; 1586 if (!(m_downsample_lod_sampler = g_gpu_device->CreateSampler(config))) 1587 { 1588 Error::SetStringView(error, "Failed to create downsample LOD sampler."); 1589 return false; 1590 } 1591 GL_OBJECT_NAME(m_downsample_lod_sampler, "Downsample LOD Sampler"); 1592 config.mip_filter = GPUSampler::Filter::Linear; 1593 if (!(m_downsample_composite_sampler = g_gpu_device->CreateSampler(config))) 1594 { 1595 Error::SetStringView(error, "Failed to create downsample composite sampler."); 1596 return false; 1597 } 1598 GL_OBJECT_NAME(m_downsample_composite_sampler, "Downsample Trilinear Sampler"); 1599 progress.Increment(); 1600 } 1601 else if (m_downsample_mode == GPUDownsampleMode::Box) 1602 { 1603 std::unique_ptr<GPUShader> fs = 1604 g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), 1605 shadergen.GenerateBoxSampleDownsampleFragmentShader( 1606 m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale)), 1607 error); 1608 if (!fs) 1609 return false; 1610 1611 GL_OBJECT_NAME(fs, "Downsample First Pass Fragment Shader"); 1612 plconfig.fragment_shader = fs.get(); 1613 1614 if (!(m_downsample_first_pass_pipeline = g_gpu_device->CreatePipeline(plconfig, error))) 1615 return false; 1616 1617 GL_OBJECT_NAME(m_downsample_first_pass_pipeline, "Downsample First Pass Pipeline"); 1618 progress.Increment(); 1619 } 1620 1621 #undef UPDATE_PROGRESS 1622 1623 return true; 1624 } 1625 1626 void GPU_HW::DestroyPipelines() 1627 { 1628 static constexpr auto destroy = [](std::unique_ptr<GPUPipeline>& p) { p.reset(); }; 1629 1630 m_wireframe_pipeline.reset(); 1631 1632 m_batch_pipelines.enumerate(destroy); 1633 1634 m_vram_fill_pipelines.enumerate(destroy); 1635 1636 for (std::unique_ptr<GPUPipeline>& p : m_vram_write_pipelines) 1637 destroy(p); 1638 1639 for (std::unique_ptr<GPUPipeline>& p : m_vram_copy_pipelines) 1640 destroy(p); 1641 1642 for (std::unique_ptr<GPUPipeline>& p : m_vram_extract_pipeline) 1643 destroy(p); 1644 1645 destroy(m_vram_readback_pipeline); 1646 destroy(m_vram_update_depth_pipeline); 1647 destroy(m_vram_write_replacement_pipeline); 1648 1649 destroy(m_downsample_first_pass_pipeline); 1650 destroy(m_downsample_mid_pass_pipeline); 1651 destroy(m_downsample_blur_pass_pipeline); 1652 destroy(m_downsample_composite_pass_pipeline); 1653 m_downsample_composite_sampler.reset(); 1654 1655 m_copy_depth_pipeline.reset(); 1656 } 1657 1658 GPU_HW::BatchRenderMode GPU_HW::BatchConfig::GetRenderMode() const 1659 { 1660 return transparency_mode == GPUTransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled : 1661 BatchRenderMode::TransparentAndOpaque; 1662 } 1663 1664 void GPU_HW::UpdateVRAMReadTexture(bool drawn, bool written) 1665 { 1666 GL_SCOPE("UpdateVRAMReadTexture()"); 1667 1668 const auto update = [this](GSVector4i& rect, u8 dbit) { 1669 if (m_texpage_dirty & dbit) 1670 { 1671 m_texpage_dirty &= ~dbit; 1672 if (!m_texpage_dirty) 1673 GL_INS_FMT("{} texpage is no longer dirty", (dbit & TEXPAGE_DIRTY_DRAWN_RECT) ? "DRAW" : "WRITE"); 1674 } 1675 1676 const GSVector4i scaled_rect = rect.mul32l(GSVector4i(m_resolution_scale)); 1677 if (m_vram_texture->IsMultisampled()) 1678 { 1679 if (g_gpu_device->GetFeatures().partial_msaa_resolve) 1680 { 1681 g_gpu_device->ResolveTextureRegion(m_vram_read_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, 1682 m_vram_texture.get(), scaled_rect.left, scaled_rect.top, scaled_rect.width(), 1683 scaled_rect.height()); 1684 } 1685 else 1686 { 1687 g_gpu_device->ResolveTextureRegion(m_vram_read_texture.get(), 0, 0, 0, 0, m_vram_texture.get(), 0, 0, 1688 m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); 1689 } 1690 } 1691 else 1692 { 1693 g_gpu_device->CopyTextureRegion(m_vram_read_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, 1694 m_vram_texture.get(), scaled_rect.left, scaled_rect.top, 0, 0, 1695 scaled_rect.width(), scaled_rect.height()); 1696 } 1697 1698 // m_counters.num_read_texture_updates++; 1699 rect = INVALID_RECT; 1700 }; 1701 1702 if (drawn) 1703 { 1704 DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); 1705 GL_INS_FMT("Updating draw rect {}", m_vram_dirty_draw_rect); 1706 1707 u8 dbits = TEXPAGE_DIRTY_DRAWN_RECT; 1708 if (written && m_vram_dirty_draw_rect.rintersects(m_vram_dirty_write_rect)) 1709 { 1710 DebugAssert(!m_vram_dirty_write_rect.eq(INVALID_RECT)); 1711 GL_INS_FMT("Including write rect {}", m_vram_dirty_write_rect); 1712 m_vram_dirty_draw_rect = m_vram_dirty_draw_rect.runion(m_vram_dirty_write_rect); 1713 m_vram_dirty_write_rect = INVALID_RECT; 1714 dbits = TEXPAGE_DIRTY_DRAWN_RECT | TEXPAGE_DIRTY_WRITTEN_RECT; 1715 written = false; 1716 } 1717 1718 update(m_vram_dirty_draw_rect, dbits); 1719 } 1720 if (written) 1721 { 1722 GL_INS_FMT("Updating write rect {}", m_vram_dirty_write_rect); 1723 update(m_vram_dirty_write_rect, TEXPAGE_DIRTY_WRITTEN_RECT); 1724 } 1725 } 1726 1727 void GPU_HW::UpdateDepthBufferFromMaskBit() 1728 { 1729 DebugAssert(!m_pgxp_depth_buffer && m_vram_depth_texture && m_write_mask_as_depth); 1730 1731 // Viewport should already be set full, only need to fudge the scissor. 1732 g_gpu_device->SetScissor(m_vram_texture->GetRect()); 1733 g_gpu_device->InvalidateRenderTarget(m_vram_depth_texture.get()); 1734 g_gpu_device->SetRenderTargets(nullptr, 0, m_vram_depth_texture.get()); 1735 g_gpu_device->SetPipeline(m_vram_update_depth_pipeline.get()); 1736 g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); 1737 g_gpu_device->Draw(3, 0); 1738 1739 // Restore. 1740 g_gpu_device->SetTextureSampler(0, m_vram_read_texture.get(), g_gpu_device->GetNearestSampler()); 1741 SetVRAMRenderTarget(); 1742 SetScissor(); 1743 } 1744 1745 void GPU_HW::CopyAndClearDepthBuffer() 1746 { 1747 if (!m_depth_was_copied) 1748 { 1749 // Take a copy of the current depth buffer so it can be used when the previous frame/buffer gets scanned out. 1750 // Don't bother when we're not postprocessing, it'd just be a wasted copy. 1751 if (PostProcessing::InternalChain.NeedsDepthBuffer()) 1752 { 1753 // TODO: Shrink this to only the active area. 1754 GL_SCOPE("Copy Depth Buffer"); 1755 1756 m_vram_texture->MakeReadyForSampling(); 1757 g_gpu_device->InvalidateRenderTarget(m_vram_depth_copy_texture.get()); 1758 g_gpu_device->SetRenderTarget(m_vram_depth_copy_texture.get()); 1759 g_gpu_device->SetViewportAndScissor(0, 0, m_vram_depth_texture->GetWidth(), m_vram_depth_texture->GetHeight()); 1760 g_gpu_device->SetTextureSampler(0, m_vram_depth_texture.get(), g_gpu_device->GetNearestSampler()); 1761 g_gpu_device->SetPipeline(m_copy_depth_pipeline.get()); 1762 1763 const float uniforms[4] = {0.0f, 0.0f, 1.0f, 1.0f}; 1764 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 1765 g_gpu_device->Draw(3, 0); 1766 RestoreDeviceContext(); 1767 } 1768 1769 m_depth_was_copied = true; 1770 } 1771 1772 ClearDepthBuffer(); 1773 } 1774 1775 void GPU_HW::ClearDepthBuffer() 1776 { 1777 GL_SCOPE("GPU_HW::ClearDepthBuffer()"); 1778 DebugAssert(m_pgxp_depth_buffer); 1779 if (m_use_rov_for_shader_blend) 1780 g_gpu_device->ClearRenderTarget(m_vram_depth_texture.get(), 0xFF); 1781 else 1782 g_gpu_device->ClearDepth(m_vram_depth_texture.get(), 1.0f); 1783 m_last_depth_z = 1.0f; 1784 } 1785 1786 void GPU_HW::SetScissor() 1787 { 1788 g_gpu_device->SetScissor(m_clamped_drawing_area.mul32l(GSVector4i(m_resolution_scale))); 1789 } 1790 1791 void GPU_HW::MapGPUBuffer(u32 required_vertices, u32 required_indices) 1792 { 1793 DebugAssert(!m_batch_vertex_ptr && !m_batch_index_ptr); 1794 1795 void* vb_map; 1796 u32 vb_space; 1797 g_gpu_device->MapVertexBuffer(sizeof(BatchVertex), required_vertices, &vb_map, &vb_space, &m_batch_base_vertex); 1798 m_batch_vertex_ptr = static_cast<BatchVertex*>(vb_map); 1799 m_batch_vertex_space = Truncate16(std::min<u32>(vb_space, std::numeric_limits<u16>::max())); 1800 1801 u32 ib_space; 1802 g_gpu_device->MapIndexBuffer(required_indices, &m_batch_index_ptr, &ib_space, &m_batch_base_index); 1803 m_batch_index_space = Truncate16(std::min<u32>(ib_space, std::numeric_limits<u16>::max())); 1804 } 1805 1806 void GPU_HW::UnmapGPUBuffer(u32 used_vertices, u32 used_indices) 1807 { 1808 DebugAssert(m_batch_vertex_ptr && m_batch_index_ptr); 1809 g_gpu_device->UnmapVertexBuffer(sizeof(BatchVertex), used_vertices); 1810 g_gpu_device->UnmapIndexBuffer(used_indices); 1811 m_batch_vertex_ptr = nullptr; 1812 m_batch_vertex_count = 0; 1813 m_batch_vertex_space = 0; 1814 m_batch_index_ptr = nullptr; 1815 m_batch_index_count = 0; 1816 m_batch_index_space = 0; 1817 } 1818 1819 ALWAYS_INLINE_RELEASE void GPU_HW::DrawBatchVertices(BatchRenderMode render_mode, u32 num_indices, u32 base_index, 1820 u32 base_vertex) 1821 { 1822 // [depth_test][transparency_mode][render_mode][texture_mode][dithering][interlacing][check_mask] 1823 const u8 texture_mode = static_cast<u8>(m_batch.texture_mode) + 1824 ((m_batch.texture_mode != BatchTextureMode::Disabled && m_batch.sprite_mode) ? 1825 static_cast<u8>(BatchTextureMode::SpriteStart) : 1826 0); 1827 const u8 depth_test = BoolToUInt8(m_batch.use_depth_buffer); 1828 const u8 check_mask = BoolToUInt8(m_batch.check_mask_before_draw); 1829 g_gpu_device->SetPipeline(m_batch_pipelines[depth_test][static_cast<u8>(m_batch.transparency_mode)][static_cast<u8>( 1830 render_mode)][texture_mode][BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)][check_mask] 1831 .get()); 1832 1833 GL_INS_FMT("Texture mode: {}", s_batch_texture_modes[texture_mode]); 1834 GL_INS_FMT("Transparency mode: {}", s_transparency_modes[static_cast<u8>(m_batch.transparency_mode)]); 1835 GL_INS_FMT("Render mode: {}", s_batch_render_modes[static_cast<u8>(render_mode)]); 1836 GL_INS_FMT("Mask bit test: {}", m_batch.check_mask_before_draw); 1837 GL_INS_FMT("Interlacing: {}", m_batch.check_mask_before_draw); 1838 1839 // Activating ROV? 1840 if (render_mode == BatchRenderMode::ShaderBlend) 1841 { 1842 if (m_use_rov_for_shader_blend) 1843 { 1844 if (!m_rov_active) 1845 { 1846 GL_INS("Activating ROV."); 1847 m_rov_active = true; 1848 SetVRAMRenderTarget(); 1849 } 1850 1851 g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); 1852 } 1853 else if (m_supports_framebuffer_fetch) 1854 { 1855 // No barriers needed for FBFetch. 1856 g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); 1857 } 1858 else 1859 { 1860 // Barriers. Yucky. 1861 g_gpu_device->DrawIndexedWithBarrier(num_indices, base_index, base_vertex, GPUDevice::DrawBarrier::Full); 1862 } 1863 } 1864 else 1865 { 1866 g_gpu_device->DrawIndexed(num_indices, base_index, base_vertex); 1867 } 1868 } 1869 1870 ALWAYS_INLINE_RELEASE void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) 1871 { 1872 // Taken from beetle-psx gpu_polygon.cpp 1873 // For X/Y flipped 2D sprites, PSX games rely on a very specific rasterization behavior. If U or V is decreasing in X 1874 // or Y, and we use the provided U/V as is, we will sample the wrong texel as interpolation covers an entire pixel, 1875 // while PSX samples its interpolation essentially in the top-left corner and splats that interpolant across the 1876 // entire pixel. While we could emulate this reasonably well in native resolution by shifting our vertex coords by 1877 // 0.5, this breaks in upscaling scenarios, because we have several samples per native sample and we need NN rules to 1878 // hit the same UV every time. One approach here is to use interpolate at offset or similar tricks to generalize the 1879 // PSX interpolation patterns, but the problem is that vertices sharing an edge will no longer see the same UV (due to 1880 // different plane derivatives), we end up sampling outside the intended boundary and artifacts are inevitable, so the 1881 // only case where we can apply this fixup is for "sprites" or similar which should not share edges, which leads to 1882 // this unfortunate code below. 1883 1884 // It might be faster to do more direct checking here, but the code below handles primitives in any order and 1885 // orientation, and is far more SIMD-friendly if needed. 1886 const float abx = vertices[1].x - vertices[0].x; 1887 const float aby = vertices[1].y - vertices[0].y; 1888 const float bcx = vertices[2].x - vertices[1].x; 1889 const float bcy = vertices[2].y - vertices[1].y; 1890 const float cax = vertices[0].x - vertices[2].x; 1891 const float cay = vertices[0].y - vertices[2].y; 1892 1893 // Hack for Wild Arms 2: The player sprite is drawn one line at a time with a quad, but the bottom V coordinates 1894 // are set to a large distance from the top V coordinate. When upscaling, this means that the coordinate is 1895 // interpolated between these two values, result in out-of-bounds sampling. At native, it's fine, because at the 1896 // top of the primitive, no amount is added to the coordinates. So, in this case, just set all coordinates to the 1897 // same value, from the first vertex, ensuring no interpolation occurs. Gate it based on the Y distance being one 1898 // pixel, limiting the risk of false positives. 1899 if (m_line_detect_mode == GPULineDetectMode::Quads && 1900 (std::max(vertices[0].y, std::max(vertices[1].y, std::max(vertices[2].y, vertices[3].y))) - 1901 std::min(vertices[0].y, std::min(vertices[1].y, std::min(vertices[2].y, vertices[3].y)))) == 1.0f) [[unlikely]] 1902 { 1903 GL_INS_FMT("HLineQuad detected at [{},{}={},{} {},{}={},{} {},{}={},{} {},{}={},{}", vertices[0].x, vertices[0].y, 1904 vertices[0].u, vertices[0].v, vertices[1].x, vertices[1].y, vertices[1].u, vertices[1].v, vertices[2].x, 1905 vertices[2].y, vertices[2].u, vertices[2].v, vertices[3].x, vertices[3].y, vertices[3].u, vertices[3].v); 1906 vertices[1].v = vertices[0].v; 1907 vertices[2].v = vertices[0].v; 1908 vertices[3].v = vertices[0].v; 1909 } 1910 1911 // Compute static derivatives, just assume W is uniform across the primitive and that the plane equation remains the 1912 // same across the quad. (which it is, there is no Z.. yet). 1913 const float dudx = -aby * static_cast<float>(vertices[2].u) - bcy * static_cast<float>(vertices[0].u) - 1914 cay * static_cast<float>(vertices[1].u); 1915 const float dvdx = -aby * static_cast<float>(vertices[2].v) - bcy * static_cast<float>(vertices[0].v) - 1916 cay * static_cast<float>(vertices[1].v); 1917 const float dudy = +abx * static_cast<float>(vertices[2].u) + bcx * static_cast<float>(vertices[0].u) + 1918 cax * static_cast<float>(vertices[1].u); 1919 const float dvdy = +abx * static_cast<float>(vertices[2].v) + bcx * static_cast<float>(vertices[0].v) + 1920 cax * static_cast<float>(vertices[1].v); 1921 const float area = bcx * cay - bcy * cax; 1922 1923 // Detect and reject any triangles with 0 size texture area 1924 const s32 texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) - 1925 (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v); 1926 1927 // Shouldn't matter as degenerate primitives will be culled anyways. 1928 if (area == 0.0f || texArea == 0) 1929 return; 1930 1931 // Use floats here as it'll be faster than integer divides. 1932 const float rcp_area = 1.0f / area; 1933 const float dudx_area = dudx * rcp_area; 1934 const float dudy_area = dudy * rcp_area; 1935 const float dvdx_area = dvdx * rcp_area; 1936 const float dvdy_area = dvdy * rcp_area; 1937 const bool neg_dudx = dudx_area < 0.0f; 1938 const bool neg_dudy = dudy_area < 0.0f; 1939 const bool neg_dvdx = dvdx_area < 0.0f; 1940 const bool neg_dvdy = dvdy_area < 0.0f; 1941 const bool zero_dudx = dudx_area == 0.0f; 1942 const bool zero_dudy = dudy_area == 0.0f; 1943 const bool zero_dvdx = dvdx_area == 0.0f; 1944 const bool zero_dvdy = dvdy_area == 0.0f; 1945 1946 // If we have negative dU or dV in any direction, increment the U or V to work properly with nearest-neighbor in 1947 // this impl. If we don't have 1:1 pixel correspondence, this creates a slight "shift" in the sprite, but we 1948 // guarantee that we don't sample garbage at least. Overall, this is kinda hacky because there can be legitimate, 1949 // rare cases where 3D meshes hit this scenario, and a single texel offset can pop in, but this is way better than 1950 // having borked 2D overall. 1951 // 1952 // TODO: If perf becomes an issue, we can probably SIMD the 8 comparisons above, 1953 // create an 8-bit code, and use a LUT to get the offsets. 1954 // Case 1: U is decreasing in X, but no change in Y. 1955 // Case 2: U is decreasing in Y, but no change in X. 1956 // Case 3: V is decreasing in X, but no change in Y. 1957 // Case 4: V is decreasing in Y, but no change in X. 1958 if ((neg_dudx && zero_dudy) || (neg_dudy && zero_dudx)) 1959 { 1960 vertices[0].u++; 1961 vertices[1].u++; 1962 vertices[2].u++; 1963 vertices[3].u++; 1964 } 1965 1966 if ((neg_dvdx && zero_dvdy) || (neg_dvdy && zero_dvdx)) 1967 { 1968 vertices[0].v++; 1969 vertices[1].v++; 1970 vertices[2].v++; 1971 vertices[3].v++; 1972 } 1973 1974 // 2D polygons should have zero change in V on the X axis, and vice versa. 1975 if (m_allow_sprite_mode) 1976 SetBatchSpriteMode(zero_dudy && zero_dvdx); 1977 } 1978 1979 bool GPU_HW::IsPossibleSpritePolygon(const BatchVertex* vertices) const 1980 { 1981 const float abx = vertices[1].x - vertices[0].x; 1982 const float aby = vertices[1].y - vertices[0].y; 1983 const float bcx = vertices[2].x - vertices[1].x; 1984 const float bcy = vertices[2].y - vertices[1].y; 1985 const float cax = vertices[0].x - vertices[2].x; 1986 const float cay = vertices[0].y - vertices[2].y; 1987 const float dvdx = -aby * static_cast<float>(vertices[2].v) - bcy * static_cast<float>(vertices[0].v) - 1988 cay * static_cast<float>(vertices[1].v); 1989 const float dudy = +abx * static_cast<float>(vertices[2].u) + bcx * static_cast<float>(vertices[0].u) + 1990 cax * static_cast<float>(vertices[1].u); 1991 const float area = bcx * cay - bcy * cax; 1992 const s32 texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) - 1993 (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v); 1994 1995 // Doesn't matter. 1996 if (area == 0.0f || texArea == 0) 1997 return m_batch.sprite_mode; 1998 1999 const float rcp_area = 1.0f / area; 2000 const bool zero_dudy = ((dudy * rcp_area) == 0.0f); 2001 const bool zero_dvdx = ((dvdx * rcp_area) == 0.0f); 2002 return (zero_dudy && zero_dvdx); 2003 } 2004 2005 ALWAYS_INLINE_RELEASE bool GPU_HW::ExpandLineTriangles(BatchVertex* vertices) 2006 { 2007 // Line expansion inspired by beetle-psx. 2008 BatchVertex *vshort, *vlong; 2009 bool vertical, horizontal; 2010 2011 if (m_line_detect_mode == GPULineDetectMode::BasicTriangles) 2012 { 2013 // Given a tall/one-pixel-wide triangle, determine which vertex is the corner with axis-aligned edges. 2014 BatchVertex* vcorner; 2015 if (vertices[0].u == vertices[1].u && vertices[0].v == vertices[1].v) 2016 { 2017 // A,B,C 2018 vcorner = &vertices[0]; 2019 vshort = &vertices[1]; 2020 vlong = &vertices[2]; 2021 } 2022 else if (vertices[1].u == vertices[2].u && vertices[1].v == vertices[2].v) 2023 { 2024 // B,C,A 2025 vcorner = &vertices[1]; 2026 vshort = &vertices[2]; 2027 vlong = &vertices[0]; 2028 } 2029 else if (vertices[2].u == vertices[0].u && vertices[2].v == vertices[0].v) 2030 { 2031 // C,A,B 2032 vcorner = &vertices[2]; 2033 vshort = &vertices[0]; 2034 vlong = &vertices[1]; 2035 } 2036 else 2037 { 2038 return false; 2039 } 2040 2041 // Determine line direction. Vertical lines will have a width of 1, horizontal lines a height of 1. 2042 vertical = ((vcorner->y == vshort->y) && (std::abs(vcorner->x - vshort->x) == 1.0f)); 2043 horizontal = ((vcorner->x == vshort->x) && (std::abs(vcorner->y - vshort->y) == 1.0f)); 2044 if (vertical) 2045 { 2046 // Line should be vertical. Make sure the triangle is actually a right angle. 2047 if (vshort->x == vlong->x) 2048 std::swap(vshort, vcorner); 2049 else if (vcorner->x != vlong->x) 2050 return false; 2051 2052 GL_INS_FMT("Vertical line from Y={} to {}", vcorner->y, vlong->y); 2053 } 2054 else if (horizontal) 2055 { 2056 // Line should be horizontal. Make sure the triangle is actually a right angle. 2057 if (vshort->y == vlong->y) 2058 std::swap(vshort, vcorner); 2059 else if (vcorner->y != vlong->y) 2060 return false; 2061 2062 GL_INS_FMT("Horizontal line from X={} to {}", vcorner->x, vlong->x); 2063 } 2064 else 2065 { 2066 // Not a line-like triangle. 2067 return false; 2068 } 2069 2070 // We could adjust the short texture coordinate to +1 from its original position, rather than leaving it the same. 2071 // However, since the texture is unlikely to be a higher resolution than the one-wide triangle, there would be no 2072 // benefit in doing so. 2073 } 2074 else 2075 { 2076 DebugAssert(m_line_detect_mode == GPULineDetectMode::AggressiveTriangles); 2077 2078 // Find direction of line based on horizontal position. 2079 BatchVertex *va, *vb, *vc; 2080 if (vertices[0].x == vertices[1].x) 2081 { 2082 va = &vertices[0]; 2083 vb = &vertices[1]; 2084 vc = &vertices[2]; 2085 } 2086 else if (vertices[1].x == vertices[2].x) 2087 { 2088 va = &vertices[1]; 2089 vb = &vertices[2]; 2090 vc = &vertices[0]; 2091 } 2092 else if (vertices[2].x == vertices[0].x) 2093 { 2094 va = &vertices[2]; 2095 vb = &vertices[0]; 2096 vc = &vertices[1]; 2097 } 2098 else 2099 { 2100 return false; 2101 } 2102 2103 // Determine line direction. Vertical lines will have a width of 1, horizontal lines a height of 1. 2104 vertical = (std::abs(va->x - vc->x) == 1.0f); 2105 horizontal = (std::abs(va->y - vb->y) == 1.0f); 2106 if (!vertical && !horizontal) 2107 return false; 2108 2109 // Determine which vertex is the right angle, based on the vertical position. 2110 const BatchVertex* vcorner; 2111 if (va->y == vc->y) 2112 vcorner = va; 2113 else if (vb->y == vc->y) 2114 vcorner = vb; 2115 else 2116 return false; 2117 2118 // Find short/long edge of the triangle. 2119 BatchVertex* vother = ((vcorner == va) ? vb : va); 2120 vshort = horizontal ? vother : vc; 2121 vlong = vertical ? vother : vc; 2122 2123 // Dark Forces draws its gun sprite vertically, but rotated compared to the sprite date in VRAM. 2124 // Therefore the difference in V should be ignored. 2125 vshort->u = vcorner->u; 2126 vshort->v = vcorner->v; 2127 } 2128 2129 // Need to write the 4th vertex. 2130 DebugAssert(m_batch_vertex_space >= 1); 2131 BatchVertex* last = &(vertices[3] = *vlong); 2132 last->x = vertical ? vshort->x : vlong->x; 2133 last->y = horizontal ? vshort->y : vlong->y; 2134 2135 // Generate indices. 2136 const u32 base_vertex = m_batch_vertex_count; 2137 DebugAssert(m_batch_index_space >= 6); 2138 *(m_batch_index_ptr++) = Truncate16(base_vertex); 2139 *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); 2140 *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); 2141 *(m_batch_index_ptr++) = Truncate16(base_vertex + (vshort - vertices)); 2142 *(m_batch_index_ptr++) = Truncate16(base_vertex + (vlong - vertices)); 2143 *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); 2144 m_batch_index_count += 6; 2145 m_batch_index_space -= 6; 2146 2147 // Upload vertices. 2148 DebugAssert(m_batch_vertex_space >= 4); 2149 std::memcpy(m_batch_vertex_ptr, vertices, sizeof(BatchVertex) * 4); 2150 m_batch_vertex_ptr += 4; 2151 m_batch_vertex_count += 4; 2152 m_batch_vertex_space -= 4; 2153 return true; 2154 } 2155 2156 void GPU_HW::ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices) 2157 { 2158 DebugAssert(num_vertices == 3 || num_vertices == 4); 2159 2160 GSVector2i v0 = GSVector2i::load32(&vertices[0].u); 2161 GSVector2i v1 = GSVector2i::load32(&vertices[1].u); 2162 GSVector2i v2 = GSVector2i::load32(&vertices[2].u); 2163 GSVector2i v3; 2164 GSVector2i min = v0.min_u16(v1).min_u16(v2); 2165 GSVector2i max = v0.max_u16(v1).max_u16(v2); 2166 if (num_vertices == 4) 2167 { 2168 v3 = GSVector2i::load32(&vertices[3].u); 2169 min = min.min_u16(v3); 2170 max = max.max_u16(v3); 2171 } 2172 2173 u32 min_u = min.extract16<0>(); 2174 u32 min_v = min.extract16<1>(); 2175 u32 max_u = max.extract16<0>(); 2176 u32 max_v = max.extract16<1>(); 2177 max_u = (min_u != max_u) ? (max_u - 1) : max_u; 2178 max_v = (min_v != max_v) ? (max_v - 1) : max_v; 2179 2180 for (u32 i = 0; i < num_vertices; i++) 2181 vertices[i].SetUVLimits(min_u, max_u, min_v, max_v); 2182 2183 if (m_texpage_dirty != 0) 2184 CheckForTexPageOverlap(GSVector4i(min).upl32(GSVector4i(max)).u16to32()); 2185 } 2186 2187 void GPU_HW::SetBatchDepthBuffer(bool enabled) 2188 { 2189 if (m_batch.use_depth_buffer == enabled) 2190 return; 2191 2192 if (m_batch_index_count > 0) 2193 { 2194 FlushRender(); 2195 EnsureVertexBufferSpaceForCurrentCommand(); 2196 } 2197 2198 m_batch.use_depth_buffer = enabled; 2199 } 2200 2201 void GPU_HW::CheckForDepthClear(const BatchVertex* vertices, u32 num_vertices) 2202 { 2203 DebugAssert(num_vertices == 3 || num_vertices == 4); 2204 float average_z; 2205 if (num_vertices == 3) 2206 average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w) / 3.0f, 1.0f); 2207 else 2208 average_z = std::min((vertices[0].w + vertices[1].w + vertices[2].w + vertices[3].w) / 4.0f, 1.0f); 2209 2210 if ((average_z - m_last_depth_z) >= g_settings.gpu_pgxp_depth_clear_threshold) 2211 { 2212 FlushRender(); 2213 CopyAndClearDepthBuffer(); 2214 EnsureVertexBufferSpaceForCurrentCommand(); 2215 } 2216 2217 m_last_depth_z = average_z; 2218 } 2219 2220 void GPU_HW::SetBatchSpriteMode(bool enabled) 2221 { 2222 if (m_batch.sprite_mode == enabled) 2223 return; 2224 2225 if (m_batch_index_count > 0) 2226 { 2227 FlushRender(); 2228 EnsureVertexBufferSpaceForCurrentCommand(); 2229 } 2230 2231 GL_INS_FMT("Sprite mode is now {}", enabled ? "ON" : "OFF"); 2232 2233 m_batch.sprite_mode = enabled; 2234 } 2235 2236 void GPU_HW::DrawLine(const GSVector4 bounds, u32 col0, u32 col1, float depth) 2237 { 2238 DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); 2239 2240 const float x0 = bounds.x; 2241 const float y0 = bounds.y; 2242 const float x1 = bounds.z; 2243 const float y1 = bounds.w; 2244 2245 const float dx = x1 - x0; 2246 const float dy = y1 - y0; 2247 if (dx == 0.0f && dy == 0.0f) 2248 { 2249 // Degenerate, render a point. 2250 (m_batch_vertex_ptr++)->Set(x0, y0, depth, 1.0f, col0, 0, 0, 0); 2251 (m_batch_vertex_ptr++)->Set(x0 + 1.0f, y0, depth, 1.0f, col0, 0, 0, 0); 2252 (m_batch_vertex_ptr++)->Set(x1, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0); 2253 (m_batch_vertex_ptr++)->Set(x1 + 1.0f, y1 + 1.0f, depth, 1.0f, col0, 0, 0, 0); 2254 } 2255 else 2256 { 2257 const float abs_dx = std::fabs(dx); 2258 const float abs_dy = std::fabs(dy); 2259 float fill_dx, fill_dy; 2260 float pad_x0 = 0.0f; 2261 float pad_x1 = 0.0f; 2262 float pad_y0 = 0.0f; 2263 float pad_y1 = 0.0f; 2264 2265 // Check for vertical or horizontal major lines. 2266 // When expanding to a rect, do so in the appropriate direction. 2267 // FIXME: This scheme seems to kinda work, but it seems very hard to find a method 2268 // that looks perfect on every game. 2269 // Vagrant Story speech bubbles are a very good test case here! 2270 if (abs_dx > abs_dy) 2271 { 2272 fill_dx = 0.0f; 2273 fill_dy = 1.0f; 2274 const float dydk = dy / abs_dx; 2275 2276 if (dx > 0.0f) 2277 { 2278 // Right 2279 pad_x1 = 1.0f; 2280 pad_y1 = dydk; 2281 } 2282 else 2283 { 2284 // Left 2285 pad_x0 = 1.0f; 2286 pad_y0 = -dydk; 2287 } 2288 } 2289 else 2290 { 2291 fill_dx = 1.0f; 2292 fill_dy = 0.0f; 2293 const float dxdk = dx / abs_dy; 2294 2295 if (dy > 0.0f) 2296 { 2297 // Down 2298 pad_y1 = 1.0f; 2299 pad_x1 = dxdk; 2300 } 2301 else 2302 { 2303 // Up 2304 pad_y0 = 1.0f; 2305 pad_x0 = -dxdk; 2306 } 2307 } 2308 2309 const float ox0 = x0 + pad_x0; 2310 const float oy0 = y0 + pad_y0; 2311 const float ox1 = x1 + pad_x1; 2312 const float oy1 = y1 + pad_y1; 2313 2314 (m_batch_vertex_ptr++)->Set(ox0, oy0, depth, 1.0f, col0, 0, 0, 0); 2315 (m_batch_vertex_ptr++)->Set(ox0 + fill_dx, oy0 + fill_dy, depth, 1.0f, col0, 0, 0, 0); 2316 (m_batch_vertex_ptr++)->Set(ox1, oy1, depth, 1.0f, col1, 0, 0, 0); 2317 (m_batch_vertex_ptr++)->Set(ox1 + fill_dx, oy1 + fill_dy, depth, 1.0f, col1, 0, 0, 0); 2318 } 2319 2320 const u32 start_index = m_batch_vertex_count; 2321 m_batch_vertex_count += 4; 2322 m_batch_vertex_space -= 4; 2323 2324 *(m_batch_index_ptr++) = Truncate16(start_index + 0); 2325 *(m_batch_index_ptr++) = Truncate16(start_index + 1); 2326 *(m_batch_index_ptr++) = Truncate16(start_index + 2); 2327 *(m_batch_index_ptr++) = Truncate16(start_index + 3); 2328 *(m_batch_index_ptr++) = Truncate16(start_index + 2); 2329 *(m_batch_index_ptr++) = Truncate16(start_index + 1); 2330 m_batch_index_count += 6; 2331 m_batch_index_space -= 6; 2332 } 2333 2334 void GPU_HW::LoadVertices() 2335 { 2336 if (m_GPUSTAT.check_mask_before_draw) 2337 m_current_depth++; 2338 2339 const GPURenderCommand rc{m_render_command.bits}; 2340 const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg.bits) << 16); 2341 const float depth = GetCurrentNormalizedVertexDepth(); 2342 2343 switch (rc.primitive) 2344 { 2345 case GPUPrimitive::Polygon: 2346 { 2347 const bool textured = rc.texture_enable; 2348 const bool raw_texture = textured && rc.raw_texture_enable; 2349 const bool shaded = rc.shading_enable; 2350 const bool pgxp = g_settings.gpu_pgxp_enable; 2351 2352 const u32 first_color = rc.color_for_first_vertex; 2353 u32 num_vertices = rc.quad_polygon ? 4 : 3; 2354 std::array<BatchVertex, 4> vertices; 2355 std::array<GSVector2i, 4> native_vertex_positions; 2356 std::array<u16, 4> native_texcoords; 2357 bool valid_w = g_settings.gpu_pgxp_texture_correction; 2358 for (u32 i = 0; i < num_vertices; i++) 2359 { 2360 const u32 vert_color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; 2361 const u32 color = raw_texture ? UINT32_C(0x00808080) : vert_color; 2362 const u64 maddr_and_pos = m_fifo.Pop(); 2363 const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; 2364 const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; 2365 const s32 native_x = native_vertex_positions[i].x = m_drawing_offset.x + vp.x; 2366 const s32 native_y = native_vertex_positions[i].y = m_drawing_offset.y + vp.y; 2367 native_texcoords[i] = texcoord; 2368 vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage, 2369 texcoord, 0xFFFF0000u); 2370 2371 if (pgxp) 2372 { 2373 valid_w &= CPU::PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, 2374 m_drawing_offset.x, m_drawing_offset.y, &vertices[i].x, &vertices[i].y, 2375 &vertices[i].w); 2376 } 2377 } 2378 if (pgxp) 2379 { 2380 if (!valid_w) 2381 { 2382 SetBatchDepthBuffer(false); 2383 if (g_settings.gpu_pgxp_disable_2d) 2384 { 2385 // NOTE: This reads uninitialized data, but it's okay, it doesn't get used. 2386 for (size_t i = 0; i < vertices.size(); i++) 2387 { 2388 BatchVertex& v = vertices[i]; 2389 v.x = static_cast<float>(native_vertex_positions[i].x); 2390 v.y = static_cast<float>(native_vertex_positions[i].y); 2391 v.w = 1.0f; 2392 } 2393 } 2394 else 2395 { 2396 for (BatchVertex& v : vertices) 2397 v.w = 1.0f; 2398 } 2399 } 2400 else if (m_pgxp_depth_buffer) 2401 { 2402 SetBatchDepthBuffer(true); 2403 CheckForDepthClear(vertices.data(), num_vertices); 2404 } 2405 } 2406 2407 // Use PGXP to exclude primitives that are definitely 3D. 2408 const bool is_3d = (vertices[0].w != vertices[1].w || vertices[0].w != vertices[2].w); 2409 if (m_resolution_scale > 1 && !is_3d && rc.quad_polygon) 2410 HandleFlippedQuadTextureCoordinates(vertices.data()); 2411 else if (m_allow_sprite_mode) 2412 SetBatchSpriteMode((pgxp && !is_3d) || IsPossibleSpritePolygon(vertices.data())); 2413 2414 if (m_sw_renderer) 2415 { 2416 GPUBackendDrawPolygonCommand* cmd = m_sw_renderer->NewDrawPolygonCommand(num_vertices); 2417 FillDrawCommand(cmd, rc); 2418 2419 const u32 sw_num_vertices = rc.quad_polygon ? 4 : 3; 2420 for (u32 i = 0; i < sw_num_vertices; i++) 2421 { 2422 GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; 2423 vert->x = native_vertex_positions[i].x; 2424 vert->y = native_vertex_positions[i].y; 2425 vert->texcoord = native_texcoords[i]; 2426 vert->color = vertices[i].color; 2427 } 2428 2429 m_sw_renderer->PushCommand(cmd); 2430 } 2431 2432 // Cull polygons which are too large. 2433 const GSVector2 v0f = GSVector2::load(&vertices[0].x); 2434 const GSVector2 v1f = GSVector2::load(&vertices[1].x); 2435 const GSVector2 v2f = GSVector2::load(&vertices[2].x); 2436 const GSVector2 min_pos_12 = v1f.min(v2f); 2437 const GSVector2 max_pos_12 = v1f.max(v2f); 2438 const GSVector4i draw_rect_012 = GSVector4i(GSVector4(min_pos_12.min(v0f)).upld(GSVector4(max_pos_12.max(v0f)))) 2439 .add32(GSVector4i::cxpr(0, 0, 1, 1)); 2440 const GSVector4i clamped_draw_rect_012 = draw_rect_012.rintersect(m_clamped_drawing_area); 2441 const bool first_tri_culled = (draw_rect_012.width() > MAX_PRIMITIVE_WIDTH || 2442 draw_rect_012.height() > MAX_PRIMITIVE_HEIGHT || clamped_draw_rect_012.rempty()); 2443 if (first_tri_culled) 2444 { 2445 GL_INS_FMT("Culling off-screen/too-large polygon: {},{} {},{} {},{}", native_vertex_positions[0].x, 2446 native_vertex_positions[0].y, native_vertex_positions[1].x, native_vertex_positions[1].y, 2447 native_vertex_positions[2].x, native_vertex_positions[2].y); 2448 2449 if (!rc.quad_polygon) 2450 return; 2451 } 2452 else 2453 { 2454 if (textured && m_compute_uv_range) 2455 ComputePolygonUVLimits(vertices.data(), num_vertices); 2456 2457 AddDrawnRectangle(clamped_draw_rect_012); 2458 AddDrawTriangleTicks(native_vertex_positions[0], native_vertex_positions[1], native_vertex_positions[2], 2459 rc.shading_enable, rc.texture_enable, rc.transparency_enable); 2460 2461 // Expand lines to triangles (Doom, Soul Blade, etc.) 2462 if (!rc.quad_polygon && m_line_detect_mode >= GPULineDetectMode::BasicTriangles && !is_3d && 2463 ExpandLineTriangles(vertices.data())) 2464 { 2465 return; 2466 } 2467 2468 const u32 start_index = m_batch_vertex_count; 2469 DebugAssert(m_batch_index_space >= 3); 2470 *(m_batch_index_ptr++) = Truncate16(start_index); 2471 *(m_batch_index_ptr++) = Truncate16(start_index + 1); 2472 *(m_batch_index_ptr++) = Truncate16(start_index + 2); 2473 m_batch_index_count += 3; 2474 m_batch_index_space -= 3; 2475 } 2476 2477 // quads 2478 if (rc.quad_polygon) 2479 { 2480 const GSVector2 v3f = GSVector2::load(&vertices[3].x); 2481 const GSVector4i draw_rect_123 = GSVector4i(GSVector4(min_pos_12.min(v3f)).upld(GSVector4(max_pos_12.max(v3f)))) 2482 .add32(GSVector4i::cxpr(0, 0, 1, 1)); 2483 const GSVector4i clamped_draw_rect_123 = draw_rect_123.rintersect(m_clamped_drawing_area); 2484 2485 // Cull polygons which are too large. 2486 const bool second_tri_culled = 2487 (draw_rect_123.width() > MAX_PRIMITIVE_WIDTH || draw_rect_123.height() > MAX_PRIMITIVE_HEIGHT || 2488 clamped_draw_rect_123.rempty()); 2489 if (second_tri_culled) 2490 { 2491 GL_INS_FMT("Culling off-screen/too-large polygon (quad second half): {},{} {},{} {},{}", 2492 native_vertex_positions[2].x, native_vertex_positions[2].y, native_vertex_positions[1].x, 2493 native_vertex_positions[1].y, native_vertex_positions[0].x, native_vertex_positions[0].y); 2494 2495 if (first_tri_culled) 2496 return; 2497 } 2498 else 2499 { 2500 if (first_tri_culled && textured && m_compute_uv_range) 2501 ComputePolygonUVLimits(vertices.data(), num_vertices); 2502 2503 AddDrawnRectangle(clamped_draw_rect_123); 2504 AddDrawTriangleTicks(native_vertex_positions[2], native_vertex_positions[1], native_vertex_positions[3], 2505 rc.shading_enable, rc.texture_enable, rc.transparency_enable); 2506 2507 const u32 start_index = m_batch_vertex_count; 2508 DebugAssert(m_batch_index_space >= 3); 2509 *(m_batch_index_ptr++) = Truncate16(start_index + 2); 2510 *(m_batch_index_ptr++) = Truncate16(start_index + 1); 2511 *(m_batch_index_ptr++) = Truncate16(start_index + 3); 2512 m_batch_index_count += 3; 2513 m_batch_index_space -= 3; 2514 } 2515 } 2516 2517 if (num_vertices == 4) 2518 { 2519 DebugAssert(m_batch_vertex_space >= 4); 2520 std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 4); 2521 m_batch_vertex_ptr += 4; 2522 m_batch_vertex_count += 4; 2523 m_batch_vertex_space -= 4; 2524 } 2525 else 2526 { 2527 DebugAssert(m_batch_vertex_space >= 3); 2528 std::memcpy(m_batch_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); 2529 m_batch_vertex_ptr += 3; 2530 m_batch_vertex_count += 3; 2531 m_batch_vertex_space -= 3; 2532 } 2533 } 2534 break; 2535 2536 case GPUPrimitive::Rectangle: 2537 { 2538 const u32 color = (rc.texture_enable && rc.raw_texture_enable) ? UINT32_C(0x00808080) : rc.color_for_first_vertex; 2539 const GPUVertexPosition vp{FifoPop()}; 2540 const s32 pos_x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); 2541 const s32 pos_y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); 2542 2543 const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0); 2544 u32 orig_tex_left = ZeroExtend16(texcoord_x); 2545 u32 orig_tex_top = ZeroExtend16(texcoord_y); 2546 u32 rectangle_width; 2547 u32 rectangle_height; 2548 switch (rc.rectangle_size) 2549 { 2550 case GPUDrawRectangleSize::R1x1: 2551 rectangle_width = 1; 2552 rectangle_height = 1; 2553 break; 2554 case GPUDrawRectangleSize::R8x8: 2555 rectangle_width = 8; 2556 rectangle_height = 8; 2557 break; 2558 case GPUDrawRectangleSize::R16x16: 2559 rectangle_width = 16; 2560 rectangle_height = 16; 2561 break; 2562 default: 2563 { 2564 const u32 width_and_height = FifoPop(); 2565 rectangle_width = (width_and_height & VRAM_WIDTH_MASK); 2566 rectangle_height = ((width_and_height >> 16) & VRAM_HEIGHT_MASK); 2567 } 2568 break; 2569 } 2570 2571 const GSVector4i rect = 2572 GSVector4i(pos_x, pos_y, pos_x + static_cast<s32>(rectangle_width), pos_y + static_cast<s32>(rectangle_height)); 2573 const GSVector4i clamped_rect = m_clamped_drawing_area.rintersect(rect); 2574 if (clamped_rect.rempty()) [[unlikely]] 2575 { 2576 GL_INS_FMT("Culling off-screen rectangle {}", rect); 2577 return; 2578 } 2579 2580 // we can split the rectangle up into potentially 8 quads 2581 SetBatchDepthBuffer(false); 2582 SetBatchSpriteMode(m_allow_sprite_mode); 2583 DebugAssert(m_batch_vertex_space >= MAX_VERTICES_FOR_RECTANGLE && 2584 m_batch_index_space >= MAX_VERTICES_FOR_RECTANGLE); 2585 2586 // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. 2587 u32 tex_top = orig_tex_top; 2588 for (u32 y_offset = 0; y_offset < rectangle_height;) 2589 { 2590 const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); 2591 const float quad_start_y = static_cast<float>(pos_y + static_cast<s32>(y_offset)); 2592 const float quad_end_y = quad_start_y + static_cast<float>(quad_height); 2593 const u32 tex_bottom = tex_top + quad_height; 2594 2595 u32 tex_left = orig_tex_left; 2596 for (u32 x_offset = 0; x_offset < rectangle_width;) 2597 { 2598 const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); 2599 const float quad_start_x = static_cast<float>(pos_x + static_cast<s32>(x_offset)); 2600 const float quad_end_x = quad_start_x + static_cast<float>(quad_width); 2601 const u32 tex_right = tex_left + quad_width; 2602 const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); 2603 2604 if (rc.texture_enable && m_texpage_dirty != 0) 2605 { 2606 CheckForTexPageOverlap(GSVector4i(static_cast<s32>(tex_left), static_cast<s32>(tex_top), 2607 static_cast<s32>(tex_right), static_cast<s32>(tex_bottom))); 2608 } 2609 2610 const u32 base_vertex = m_batch_vertex_count; 2611 (m_batch_vertex_ptr++) 2612 ->Set(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_top), 2613 uv_limits); 2614 (m_batch_vertex_ptr++) 2615 ->Set(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_top), 2616 uv_limits); 2617 (m_batch_vertex_ptr++) 2618 ->Set(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_left), Truncate16(tex_bottom), 2619 uv_limits); 2620 (m_batch_vertex_ptr++) 2621 ->Set(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, Truncate16(tex_right), Truncate16(tex_bottom), 2622 uv_limits); 2623 m_batch_vertex_count += 4; 2624 m_batch_vertex_space -= 4; 2625 2626 *(m_batch_index_ptr++) = Truncate16(base_vertex + 0); 2627 *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); 2628 *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); 2629 *(m_batch_index_ptr++) = Truncate16(base_vertex + 2); 2630 *(m_batch_index_ptr++) = Truncate16(base_vertex + 1); 2631 *(m_batch_index_ptr++) = Truncate16(base_vertex + 3); 2632 m_batch_index_count += 6; 2633 m_batch_index_space -= 6; 2634 2635 x_offset += quad_width; 2636 tex_left = 0; 2637 } 2638 2639 y_offset += quad_height; 2640 tex_top = 0; 2641 } 2642 2643 AddDrawnRectangle(clamped_rect); 2644 AddDrawRectangleTicks(clamped_rect, rc.texture_enable, rc.transparency_enable); 2645 2646 if (m_sw_renderer) 2647 { 2648 GPUBackendDrawRectangleCommand* cmd = m_sw_renderer->NewDrawRectangleCommand(); 2649 FillDrawCommand(cmd, rc); 2650 cmd->color = color; 2651 cmd->x = pos_x; 2652 cmd->y = pos_y; 2653 cmd->width = static_cast<u16>(rectangle_width); 2654 cmd->height = static_cast<u16>(rectangle_height); 2655 cmd->texcoord = (static_cast<u16>(texcoord_y) << 8) | static_cast<u16>(texcoord_x); 2656 m_sw_renderer->PushCommand(cmd); 2657 } 2658 } 2659 break; 2660 2661 case GPUPrimitive::Line: 2662 { 2663 SetBatchDepthBuffer(false); 2664 2665 if (!rc.polyline) 2666 { 2667 DebugAssert(m_batch_vertex_space >= 4 && m_batch_index_space >= 6); 2668 2669 u32 start_color, end_color; 2670 GPUVertexPosition start_pos, end_pos; 2671 if (rc.shading_enable) 2672 { 2673 start_color = rc.color_for_first_vertex; 2674 start_pos.bits = FifoPop(); 2675 end_color = FifoPop() & UINT32_C(0x00FFFFFF); 2676 end_pos.bits = FifoPop(); 2677 } 2678 else 2679 { 2680 start_color = end_color = rc.color_for_first_vertex; 2681 start_pos.bits = FifoPop(); 2682 end_pos.bits = FifoPop(); 2683 } 2684 2685 const GSVector4i vstart_pos = GSVector4i(start_pos.x + m_drawing_offset.x, start_pos.y + m_drawing_offset.y); 2686 const GSVector4i vend_pos = GSVector4i(end_pos.x + m_drawing_offset.x, end_pos.y + m_drawing_offset.y); 2687 const GSVector4i bounds = vstart_pos.xyxy(vend_pos); 2688 const GSVector4i rect = 2689 vstart_pos.min_i32(vend_pos).xyxy(vstart_pos.max_i32(vend_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); 2690 const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); 2691 2692 if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) 2693 { 2694 GL_INS_FMT("Culling too-large/off-screen line: {},{} - {},{}", bounds.x, bounds.y, bounds.z, bounds.w); 2695 return; 2696 } 2697 2698 AddDrawnRectangle(clamped_rect); 2699 AddDrawLineTicks(clamped_rect, rc.shading_enable); 2700 2701 // TODO: Should we do a PGXP lookup here? Most lines are 2D. 2702 DrawLine(GSVector4(bounds), start_color, end_color, depth); 2703 2704 if (m_sw_renderer) 2705 { 2706 GPUBackendDrawLineCommand* cmd = m_sw_renderer->NewDrawLineCommand(2); 2707 FillDrawCommand(cmd, rc); 2708 GSVector4i::storel(&cmd->vertices[0], bounds); 2709 cmd->vertices[0].color = start_color; 2710 GSVector4i::storeh(&cmd->vertices[1], bounds); 2711 cmd->vertices[1].color = end_color; 2712 m_sw_renderer->PushCommand(cmd); 2713 } 2714 } 2715 else 2716 { 2717 // Multiply by two because we don't use line strips. 2718 const u32 num_vertices = GetPolyLineVertexCount(); 2719 DebugAssert(m_batch_vertex_space >= (num_vertices * 4) && m_batch_index_space >= (num_vertices * 6)); 2720 2721 const bool shaded = rc.shading_enable; 2722 2723 u32 buffer_pos = 0; 2724 const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; 2725 GSVector4i start_pos = GSVector4i(start_vp.x + m_drawing_offset.x, start_vp.y + m_drawing_offset.y); 2726 u32 start_color = rc.color_for_first_vertex; 2727 2728 GPUBackendDrawLineCommand* cmd; 2729 if (m_sw_renderer) 2730 { 2731 cmd = m_sw_renderer->NewDrawLineCommand(num_vertices); 2732 FillDrawCommand(cmd, rc); 2733 GSVector4i::storel(&cmd->vertices[0].x, start_pos); 2734 cmd->vertices[0].color = start_color; 2735 } 2736 else 2737 { 2738 cmd = nullptr; 2739 } 2740 2741 for (u32 i = 1; i < num_vertices; i++) 2742 { 2743 const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color; 2744 const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; 2745 const GSVector4i end_pos = GSVector4i(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y); 2746 const GSVector4i bounds = start_pos.xyxy(end_pos); 2747 const GSVector4i rect = 2748 start_pos.min_i32(end_pos).xyxy(start_pos.max_i32(end_pos)).add32(GSVector4i::cxpr(0, 0, 1, 1)); 2749 const GSVector4i clamped_rect = rect.rintersect(m_clamped_drawing_area); 2750 if (rect.width() > MAX_PRIMITIVE_WIDTH || rect.height() > MAX_PRIMITIVE_HEIGHT || clamped_rect.rempty()) 2751 { 2752 GL_INS_FMT("Culling too-large line: {},{} - {},{}", start_pos.x, start_pos.y, end_pos.x, end_pos.y); 2753 } 2754 else 2755 { 2756 AddDrawnRectangle(clamped_rect); 2757 AddDrawLineTicks(clamped_rect, rc.shading_enable); 2758 2759 // TODO: Should we do a PGXP lookup here? Most lines are 2D. 2760 DrawLine(GSVector4(bounds), start_color, end_color, depth); 2761 } 2762 2763 start_pos = end_pos; 2764 start_color = end_color; 2765 2766 if (cmd) 2767 { 2768 GSVector4i::storel(&cmd->vertices[i], end_pos); 2769 cmd->vertices[i].color = end_color; 2770 } 2771 } 2772 2773 if (cmd) 2774 m_sw_renderer->PushCommand(cmd); 2775 } 2776 } 2777 break; 2778 2779 default: 2780 UnreachableCode(); 2781 break; 2782 } 2783 } 2784 2785 bool GPU_HW::BlitVRAMReplacementTexture(const TextureReplacements::ReplacementImage* tex, u32 dst_x, u32 dst_y, 2786 u32 width, u32 height) 2787 { 2788 if (!m_vram_replacement_texture || m_vram_replacement_texture->GetWidth() < tex->GetWidth() || 2789 m_vram_replacement_texture->GetHeight() < tex->GetHeight() || g_gpu_device->GetFeatures().prefer_unused_textures) 2790 { 2791 g_gpu_device->RecycleTexture(std::move(m_vram_replacement_texture)); 2792 2793 if (!(m_vram_replacement_texture = 2794 g_gpu_device->FetchTexture(tex->GetWidth(), tex->GetHeight(), 1, 1, 1, GPUTexture::Type::DynamicTexture, 2795 GPUTexture::Format::RGBA8, tex->GetPixels(), tex->GetPitch()))) 2796 { 2797 return false; 2798 } 2799 } 2800 else 2801 { 2802 if (!m_vram_replacement_texture->Update(0, 0, tex->GetWidth(), tex->GetHeight(), tex->GetPixels(), tex->GetPitch())) 2803 { 2804 ERROR_LOG("Update {}x{} texture failed.", width, height); 2805 return false; 2806 } 2807 } 2808 2809 GL_SCOPE_FMT("BlitVRAMReplacementTexture() {}x{} to {},{} => {},{} ({}x{})", tex->GetWidth(), tex->GetHeight(), dst_x, 2810 dst_y, dst_x + width, dst_y + height, width, height); 2811 2812 const float src_rect[4] = { 2813 0.0f, 0.0f, static_cast<float>(tex->GetWidth()) / static_cast<float>(m_vram_replacement_texture->GetWidth()), 2814 static_cast<float>(tex->GetHeight()) / static_cast<float>(m_vram_replacement_texture->GetHeight())}; 2815 2816 g_gpu_device->SetTextureSampler(0, m_vram_replacement_texture.get(), g_gpu_device->GetLinearSampler()); 2817 g_gpu_device->SetPipeline(m_vram_write_replacement_pipeline.get()); 2818 g_gpu_device->SetViewportAndScissor(dst_x, dst_y, width, height); 2819 g_gpu_device->PushUniformBuffer(src_rect, sizeof(src_rect)); 2820 g_gpu_device->Draw(3, 0); 2821 2822 RestoreDeviceContext(); 2823 return true; 2824 } 2825 2826 ALWAYS_INLINE_RELEASE void GPU_HW::CheckForTexPageOverlap(GSVector4i uv_rect) 2827 { 2828 DebugAssert(m_texpage_dirty != 0 && m_batch.texture_mode != BatchTextureMode::Disabled); 2829 2830 if (m_texture_window_active) 2831 { 2832 const GSVector4i twin = GSVector4i::load<false>(m_batch_ubo_data.u_texture_window); 2833 uv_rect = ((uv_rect & twin.xyxy()) | twin.zwzw()); 2834 2835 // Min could be greater than max after applying window, correct for it. 2836 uv_rect = uv_rect.min_i32(uv_rect.zwzw()).max_i32(uv_rect.xyxy()); 2837 } 2838 2839 const GPUTextureMode tmode = m_draw_mode.mode_reg.texture_mode; 2840 const u32 xshift = (tmode >= GPUTextureMode::Direct16Bit) ? 0 : (2 - static_cast<u8>(tmode)); 2841 const GSVector4i page_offset = GSVector4i::loadl(m_current_texture_page_offset).xyxy(); 2842 2843 uv_rect = uv_rect.blend32<5>(uv_rect.srl32(xshift)); // shift only goes on the x 2844 uv_rect = uv_rect.add32(page_offset); // page offset 2845 uv_rect = uv_rect.add32(GSVector4i::cxpr(0, 0, 1, 1)); // make exclusive 2846 uv_rect = uv_rect.rintersect(VRAM_SIZE_RECT); // clamp to vram bounds 2847 2848 const GSVector4i new_uv_rect = m_current_uv_rect.runion(uv_rect); 2849 2850 if (!m_current_uv_rect.eq(new_uv_rect)) 2851 { 2852 m_current_uv_rect = new_uv_rect; 2853 2854 bool update_drawn = false, update_written = false; 2855 if (m_texpage_dirty & TEXPAGE_DIRTY_DRAWN_RECT) 2856 { 2857 DebugAssert(!m_vram_dirty_draw_rect.eq(INVALID_RECT)); 2858 update_drawn = m_current_uv_rect.rintersects(m_vram_dirty_draw_rect); 2859 if (update_drawn) 2860 { 2861 GL_INS_FMT("Updating VRAM cache due to UV {} intersection with dirty DRAW {}", m_current_uv_rect, 2862 m_vram_dirty_draw_rect); 2863 } 2864 } 2865 if (m_texpage_dirty & TEXPAGE_DIRTY_WRITTEN_RECT) 2866 { 2867 DebugAssert(!m_vram_dirty_write_rect.eq(INVALID_RECT)); 2868 update_written = m_current_uv_rect.rintersects(m_vram_dirty_write_rect); 2869 if (update_written) 2870 { 2871 GL_INS_FMT("Updating VRAM cache due to UV {} intersection with dirty WRITE {}", m_current_uv_rect, 2872 m_vram_dirty_write_rect); 2873 } 2874 } 2875 2876 if (update_drawn || update_written) 2877 { 2878 if (m_batch_index_count > 0) 2879 { 2880 FlushRender(); 2881 EnsureVertexBufferSpaceForCurrentCommand(); 2882 } 2883 2884 UpdateVRAMReadTexture(update_drawn, update_written); 2885 } 2886 } 2887 } 2888 2889 ALWAYS_INLINE bool GPU_HW::IsFlushed() const 2890 { 2891 return (m_batch_index_count == 0); 2892 } 2893 2894 ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsTwoPassRendering() const 2895 { 2896 // We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled 2897 // on a per-pixel basis, and the opaque pixels shouldn't be blended at all. 2898 2899 return (m_batch.texture_mode != BatchTextureMode::Disabled && 2900 (m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground || 2901 (!m_supports_dual_source_blend && m_batch.transparency_mode != GPUTransparencyMode::Disabled))); 2902 } 2903 2904 ALWAYS_INLINE_RELEASE bool GPU_HW::NeedsShaderBlending(GPUTransparencyMode transparency, BatchTextureMode texture_mode, 2905 bool check_mask) const 2906 { 2907 return (m_allow_shader_blend && 2908 ((check_mask && !m_write_mask_as_depth) || 2909 (transparency != GPUTransparencyMode::Disabled && m_prefer_shader_blend) || 2910 (transparency == GPUTransparencyMode::BackgroundMinusForeground) || 2911 (!m_supports_dual_source_blend && texture_mode != BatchTextureMode::Disabled && 2912 (transparency != GPUTransparencyMode::Disabled || IsBlendedTextureFiltering(m_texture_filtering) || 2913 IsBlendedTextureFiltering(m_sprite_texture_filtering))))); 2914 } 2915 2916 void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices, u32 required_indices) 2917 { 2918 if (m_batch_vertex_ptr) 2919 { 2920 if (m_batch_vertex_space >= required_vertices && m_batch_index_space >= required_indices) 2921 return; 2922 2923 FlushRender(); 2924 } 2925 2926 MapGPUBuffer(required_vertices, required_indices); 2927 } 2928 2929 void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() 2930 { 2931 u32 required_vertices; 2932 u32 required_indices; 2933 switch (m_render_command.primitive) 2934 { 2935 case GPUPrimitive::Polygon: 2936 required_vertices = 4; // assume quad, in case of expansion 2937 required_indices = 6; 2938 break; 2939 case GPUPrimitive::Rectangle: 2940 required_vertices = MAX_VERTICES_FOR_RECTANGLE; // TODO: WRong 2941 required_indices = MAX_VERTICES_FOR_RECTANGLE; 2942 break; 2943 case GPUPrimitive::Line: 2944 { 2945 // assume expansion 2946 const u32 vert_count = m_render_command.polyline ? GetPolyLineVertexCount() : 2; 2947 required_vertices = vert_count * 4; 2948 required_indices = vert_count * 6; 2949 } 2950 break; 2951 2952 default: 2953 UnreachableCode(); 2954 } 2955 2956 // can we fit these vertices in the current depth buffer range? 2957 if ((m_current_depth + required_vertices) > MAX_BATCH_VERTEX_COUNTER_IDS) 2958 { 2959 FlushRender(); 2960 ResetBatchVertexDepth(); 2961 MapGPUBuffer(required_vertices, required_indices); 2962 return; 2963 } 2964 2965 EnsureVertexBufferSpace(required_vertices, required_indices); 2966 } 2967 2968 void GPU_HW::ResetBatchVertexDepth() 2969 { 2970 DEV_LOG("Resetting batch vertex depth"); 2971 2972 if (m_write_mask_as_depth) 2973 UpdateDepthBufferFromMaskBit(); 2974 2975 m_current_depth = 1; 2976 } 2977 2978 ALWAYS_INLINE float GPU_HW::GetCurrentNormalizedVertexDepth() const 2979 { 2980 return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f); 2981 } 2982 2983 void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) 2984 { 2985 const bool current_enabled = (m_sw_renderer != nullptr); 2986 const bool new_enabled = g_settings.gpu_use_software_renderer_for_readbacks; 2987 if (current_enabled == new_enabled) 2988 return; 2989 2990 if (!new_enabled) 2991 { 2992 if (m_sw_renderer) 2993 m_sw_renderer->Shutdown(); 2994 m_sw_renderer.reset(); 2995 return; 2996 } 2997 2998 std::unique_ptr<GPU_SW_Backend> sw_renderer = std::make_unique<GPU_SW_Backend>(); 2999 if (!sw_renderer->Initialize(true)) 3000 return; 3001 3002 // We need to fill in the SW renderer's VRAM with the current state for hot toggles. 3003 if (copy_vram_from_hw) 3004 { 3005 FlushRender(); 3006 ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); 3007 3008 // Sync the drawing area and CLUT. 3009 GPUBackendSetDrawingAreaCommand* clip_cmd = sw_renderer->NewSetDrawingAreaCommand(); 3010 clip_cmd->new_area = m_drawing_area; 3011 sw_renderer->PushCommand(clip_cmd); 3012 3013 if (IsCLUTValid()) 3014 { 3015 GPUBackendUpdateCLUTCommand* clut_cmd = sw_renderer->NewUpdateCLUTCommand(); 3016 FillBackendCommandParameters(clut_cmd); 3017 clut_cmd->reg.bits = static_cast<u16>(m_current_clut_reg_bits); 3018 clut_cmd->clut_is_8bit = m_current_clut_is_8bit; 3019 sw_renderer->PushCommand(clut_cmd); 3020 } 3021 } 3022 3023 m_sw_renderer = std::move(sw_renderer); 3024 } 3025 3026 void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const 3027 { 3028 cmd->params.bits = 0; 3029 cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; 3030 cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; 3031 cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; 3032 cmd->params.interlaced_rendering = m_GPUSTAT.SkipDrawingToActiveField(); 3033 } 3034 3035 void GPU_HW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const 3036 { 3037 FillBackendCommandParameters(cmd); 3038 cmd->rc.bits = rc.bits; 3039 cmd->draw_mode.bits = m_draw_mode.mode_reg.bits; 3040 cmd->palette.bits = m_draw_mode.palette_reg.bits; 3041 cmd->window = m_draw_mode.texture_window; 3042 } 3043 3044 void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) 3045 { 3046 GL_SCOPE_FMT("FillVRAM({},{} => {},{} ({}x{}) with 0x{:08X}", x, y, x + width, y + height, width, height, color); 3047 DeactivateROV(); 3048 3049 if (m_sw_renderer) 3050 { 3051 GPUBackendFillVRAMCommand* cmd = m_sw_renderer->NewFillVRAMCommand(); 3052 FillBackendCommandParameters(cmd); 3053 cmd->x = static_cast<u16>(x); 3054 cmd->y = static_cast<u16>(y); 3055 cmd->width = static_cast<u16>(width); 3056 cmd->height = static_cast<u16>(height); 3057 cmd->color = color; 3058 m_sw_renderer->PushCommand(cmd); 3059 } 3060 3061 GL_INS_FMT("Dirty draw area before: {}", m_vram_dirty_draw_rect); 3062 3063 const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); 3064 AddUnclampedDrawnRectangle(bounds); 3065 3066 GL_INS_FMT("Dirty draw area after: {}", m_vram_dirty_draw_rect); 3067 3068 const bool is_oversized = (((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)); 3069 g_gpu_device->SetPipeline( 3070 m_vram_fill_pipelines[BoolToUInt8(is_oversized)][BoolToUInt8(IsInterlacedRenderingEnabled())].get()); 3071 3072 const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); 3073 g_gpu_device->SetViewportAndScissor(scaled_bounds); 3074 3075 struct VRAMFillUBOData 3076 { 3077 u32 u_dst_x; 3078 u32 u_dst_y; 3079 u32 u_end_x; 3080 u32 u_end_y; 3081 std::array<float, 4> u_fill_color; 3082 u32 u_interlaced_displayed_field; 3083 }; 3084 VRAMFillUBOData uniforms; 3085 uniforms.u_dst_x = (x % VRAM_WIDTH) * m_resolution_scale; 3086 uniforms.u_dst_y = (y % VRAM_HEIGHT) * m_resolution_scale; 3087 uniforms.u_end_x = ((x + width) % VRAM_WIDTH) * m_resolution_scale; 3088 uniforms.u_end_y = ((y + height) % VRAM_HEIGHT) * m_resolution_scale; 3089 // drop precision unless true colour is enabled 3090 uniforms.u_fill_color = 3091 GPUDevice::RGBA8ToFloat(m_true_color ? color : VRAMRGBA5551ToRGBA8888(VRAMRGBA8888ToRGBA5551(color))); 3092 uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); 3093 g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); 3094 g_gpu_device->Draw(3, 0); 3095 3096 RestoreDeviceContext(); 3097 } 3098 3099 void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) 3100 { 3101 GL_PUSH_FMT("ReadVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); 3102 3103 if (m_sw_renderer) 3104 { 3105 m_sw_renderer->Sync(false); 3106 GL_POP(); 3107 return; 3108 } 3109 3110 // Get bounds with wrap-around handled. 3111 GSVector4i copy_rect = GetVRAMTransferBounds(x, y, width, height); 3112 3113 // Has to be aligned to an even pixel for the download, due to 32-bit packing. 3114 if (copy_rect.left & 1) 3115 copy_rect.left--; 3116 if (copy_rect.right & 1) 3117 copy_rect.right++; 3118 3119 DebugAssert((copy_rect.left % 2) == 0 && (copy_rect.width() % 2) == 0); 3120 const u32 encoded_left = copy_rect.left / 2; 3121 const u32 encoded_top = copy_rect.top; 3122 const u32 encoded_width = copy_rect.width() / 2; 3123 const u32 encoded_height = copy_rect.height(); 3124 3125 // Encode the 24-bit texture as 16-bit. 3126 const s32 uniforms[4] = {copy_rect.left, copy_rect.top, copy_rect.width(), copy_rect.height()}; 3127 g_gpu_device->SetRenderTarget(m_vram_readback_texture.get()); 3128 g_gpu_device->SetPipeline(m_vram_readback_pipeline.get()); 3129 g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); 3130 g_gpu_device->SetViewportAndScissor(0, 0, encoded_width, encoded_height); 3131 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 3132 g_gpu_device->Draw(3, 0); 3133 m_vram_readback_texture->MakeReadyForSampling(); 3134 GL_POP(); 3135 3136 // Stage the readback and copy it into our shadow buffer. 3137 if (m_vram_readback_download_texture->IsImported()) 3138 { 3139 // Fast path, read directly. 3140 m_vram_readback_download_texture->CopyFromTexture(encoded_left, encoded_top, m_vram_readback_texture.get(), 0, 0, 3141 encoded_width, encoded_height, 0, 0, false); 3142 m_vram_readback_download_texture->Flush(); 3143 } 3144 else 3145 { 3146 // Copy to staging buffer, then to VRAM. 3147 m_vram_readback_download_texture->CopyFromTexture(0, 0, m_vram_readback_texture.get(), 0, 0, encoded_width, 3148 encoded_height, 0, 0, true); 3149 m_vram_readback_download_texture->ReadTexels(0, 0, encoded_width, encoded_height, 3150 &g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left], 3151 VRAM_WIDTH * sizeof(u16)); 3152 } 3153 3154 RestoreDeviceContext(); 3155 } 3156 3157 void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool set_mask, bool check_mask) 3158 { 3159 GL_SCOPE_FMT("UpdateVRAM({},{} => {},{} ({}x{})", x, y, x + width, y + height, width, height); 3160 3161 if (m_sw_renderer) 3162 { 3163 const u32 num_words = width * height; 3164 GPUBackendUpdateVRAMCommand* cmd = m_sw_renderer->NewUpdateVRAMCommand(num_words); 3165 FillBackendCommandParameters(cmd); 3166 cmd->params.set_mask_while_drawing = set_mask; 3167 cmd->params.check_mask_before_draw = check_mask; 3168 cmd->x = static_cast<u16>(x); 3169 cmd->y = static_cast<u16>(y); 3170 cmd->width = static_cast<u16>(width); 3171 cmd->height = static_cast<u16>(height); 3172 std::memcpy(cmd->data, data, sizeof(u16) * num_words); 3173 m_sw_renderer->PushCommand(cmd); 3174 } 3175 3176 const GSVector4i bounds = GetVRAMTransferBounds(x, y, width, height); 3177 DebugAssert(bounds.right <= static_cast<s32>(VRAM_WIDTH) && bounds.bottom <= static_cast<s32>(VRAM_HEIGHT)); 3178 AddWrittenRectangle(bounds); 3179 3180 if (check_mask) 3181 { 3182 // set new vertex counter since we want this to take into consideration previous masked pixels 3183 m_current_depth++; 3184 } 3185 else 3186 { 3187 const TextureReplacements::ReplacementImage* rtex = TextureReplacements::GetVRAMReplacement(width, height, data); 3188 if (rtex && BlitVRAMReplacementTexture(rtex, x * m_resolution_scale, y * m_resolution_scale, 3189 width * m_resolution_scale, height * m_resolution_scale)) 3190 { 3191 return; 3192 } 3193 } 3194 3195 UpdateVRAMOnGPU(x, y, width, height, data, sizeof(u16) * width, set_mask, check_mask, bounds); 3196 } 3197 3198 void GPU_HW::UpdateVRAMOnGPU(u32 x, u32 y, u32 width, u32 height, const void* data, u32 data_pitch, bool set_mask, 3199 bool check_mask, const GSVector4i bounds) 3200 { 3201 DeactivateROV(); 3202 3203 std::unique_ptr<GPUTexture> upload_texture; 3204 u32 map_index; 3205 3206 if (!g_gpu_device->GetFeatures().supports_texture_buffers) 3207 { 3208 map_index = 0; 3209 upload_texture = g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, 3210 GPUTexture::Format::R16U, data, data_pitch); 3211 if (!upload_texture) 3212 { 3213 ERROR_LOG("Failed to get {}x{} upload texture. Things are gonna break.", width, height); 3214 return; 3215 } 3216 } 3217 else 3218 { 3219 const u32 num_pixels = width * height; 3220 const u32 dst_pitch = width * sizeof(u16); 3221 void* map = m_vram_upload_buffer->Map(num_pixels); 3222 map_index = m_vram_upload_buffer->GetCurrentPosition(); 3223 StringUtil::StrideMemCpy(map, dst_pitch, data, data_pitch, dst_pitch, height); 3224 m_vram_upload_buffer->Unmap(num_pixels); 3225 } 3226 3227 struct VRAMWriteUBOData 3228 { 3229 u32 u_dst_x; 3230 u32 u_dst_y; 3231 u32 u_end_x; 3232 u32 u_end_y; 3233 u32 u_width; 3234 u32 u_height; 3235 u32 u_buffer_base_offset; 3236 u32 u_mask_or_bits; 3237 float u_depth_value; 3238 }; 3239 const VRAMWriteUBOData uniforms = { 3240 (x % VRAM_WIDTH), (y % VRAM_HEIGHT), ((x + width) % VRAM_WIDTH), ((y + height) % VRAM_HEIGHT), width, 3241 height, map_index, (set_mask) ? 0x8000u : 0x00, GetCurrentNormalizedVertexDepth()}; 3242 3243 // the viewport should already be set to the full vram, so just adjust the scissor 3244 const GSVector4i scaled_bounds = bounds.mul32l(GSVector4i(m_resolution_scale)); 3245 g_gpu_device->SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.width(), scaled_bounds.height()); 3246 g_gpu_device->SetPipeline(m_vram_write_pipelines[BoolToUInt8(check_mask && m_write_mask_as_depth)].get()); 3247 g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); 3248 if (upload_texture) 3249 { 3250 g_gpu_device->SetTextureSampler(0, upload_texture.get(), g_gpu_device->GetNearestSampler()); 3251 g_gpu_device->Draw(3, 0); 3252 g_gpu_device->RecycleTexture(std::move(upload_texture)); 3253 } 3254 else 3255 { 3256 g_gpu_device->SetTextureBuffer(0, m_vram_upload_buffer.get()); 3257 g_gpu_device->Draw(3, 0); 3258 } 3259 3260 RestoreDeviceContext(); 3261 } 3262 3263 void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) 3264 { 3265 GL_SCOPE_FMT("CopyVRAM({}x{} @ {},{} => {},{}", width, height, src_x, src_y, dst_x, dst_y); 3266 3267 if (m_sw_renderer) 3268 { 3269 GPUBackendCopyVRAMCommand* cmd = m_sw_renderer->NewCopyVRAMCommand(); 3270 FillBackendCommandParameters(cmd); 3271 cmd->src_x = static_cast<u16>(src_x); 3272 cmd->src_y = static_cast<u16>(src_y); 3273 cmd->dst_x = static_cast<u16>(dst_x); 3274 cmd->dst_y = static_cast<u16>(dst_y); 3275 cmd->width = static_cast<u16>(width); 3276 cmd->height = static_cast<u16>(height); 3277 m_sw_renderer->PushCommand(cmd); 3278 } 3279 3280 // masking enabled, oversized, or overlapping 3281 const bool use_shader = 3282 (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || 3283 ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || 3284 ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT); 3285 const GSVector4i src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); 3286 const GSVector4i dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); 3287 const bool intersect_with_draw = m_vram_dirty_draw_rect.rintersects(src_bounds); 3288 const bool intersect_with_write = m_vram_dirty_write_rect.rintersects(src_bounds); 3289 3290 if (use_shader || IsUsingMultisampling()) 3291 { 3292 if (intersect_with_draw || intersect_with_write) 3293 UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); 3294 AddUnclampedDrawnRectangle(dst_bounds); 3295 3296 DeactivateROV(); 3297 3298 struct VRAMCopyUBOData 3299 { 3300 u32 u_src_x; 3301 u32 u_src_y; 3302 u32 u_dst_x; 3303 u32 u_dst_y; 3304 u32 u_end_x; 3305 u32 u_end_y; 3306 u32 u_width; 3307 u32 u_height; 3308 u32 u_set_mask_bit; 3309 float u_depth_value; 3310 }; 3311 const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale, 3312 (src_y % VRAM_HEIGHT) * m_resolution_scale, 3313 (dst_x % VRAM_WIDTH) * m_resolution_scale, 3314 (dst_y % VRAM_HEIGHT) * m_resolution_scale, 3315 ((dst_x + width) % VRAM_WIDTH) * m_resolution_scale, 3316 ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, 3317 width * m_resolution_scale, 3318 height * m_resolution_scale, 3319 m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, 3320 GetCurrentNormalizedVertexDepth()}; 3321 3322 // VRAM read texture should already be bound. 3323 const GSVector4i dst_bounds_scaled = dst_bounds.mul32l(GSVector4i(m_resolution_scale)); 3324 g_gpu_device->SetViewportAndScissor(dst_bounds_scaled); 3325 g_gpu_device->SetPipeline( 3326 m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw && m_write_mask_as_depth)].get()); 3327 g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); 3328 g_gpu_device->Draw(3, 0); 3329 RestoreDeviceContext(); 3330 3331 if (m_GPUSTAT.check_mask_before_draw && !m_pgxp_depth_buffer) 3332 m_current_depth++; 3333 3334 return; 3335 } 3336 3337 GPUTexture* src_tex = m_vram_texture.get(); 3338 const bool overlaps_with_self = src_bounds.rintersects(dst_bounds); 3339 if (!g_gpu_device->GetFeatures().texture_copy_to_self || overlaps_with_self) 3340 { 3341 src_tex = m_vram_read_texture.get(); 3342 if (intersect_with_draw || intersect_with_write) 3343 UpdateVRAMReadTexture(intersect_with_draw, intersect_with_write); 3344 } 3345 3346 if (intersect_with_draw) 3347 { 3348 AddUnclampedDrawnRectangle(dst_bounds); 3349 } 3350 else if (intersect_with_write) 3351 { 3352 AddWrittenRectangle(dst_bounds); 3353 } 3354 else 3355 { 3356 const bool use_write = 3357 (!m_vram_dirty_write_rect.eq(INVALID_RECT) && !m_vram_dirty_draw_rect.eq(INVALID_RECT) && 3358 RectDistance(m_vram_dirty_write_rect, dst_bounds) < RectDistance(m_vram_dirty_draw_rect, dst_bounds)); 3359 if (use_write) 3360 AddWrittenRectangle(dst_bounds); 3361 else 3362 AddUnclampedDrawnRectangle(dst_bounds); 3363 } 3364 3365 if (m_GPUSTAT.check_mask_before_draw) 3366 { 3367 // set new vertex counter since we want this to take into consideration previous masked pixels 3368 m_current_depth++; 3369 } 3370 3371 g_gpu_device->CopyTextureRegion(m_vram_texture.get(), dst_x * m_resolution_scale, dst_y * m_resolution_scale, 0, 0, 3372 src_tex, src_x * m_resolution_scale, src_y * m_resolution_scale, 0, 0, 3373 width * m_resolution_scale, height * m_resolution_scale); 3374 if (src_tex != m_vram_texture.get()) 3375 m_vram_read_texture->MakeReadyForSampling(); 3376 } 3377 3378 void GPU_HW::DispatchRenderCommand() 3379 { 3380 const GPURenderCommand rc{m_render_command.bits}; 3381 3382 BatchTextureMode texture_mode = BatchTextureMode::Disabled; 3383 if (rc.IsTexturingEnabled()) 3384 { 3385 // texture page changed - check that the new page doesn't intersect the drawing area 3386 if (m_draw_mode.IsTexturePageChanged()) 3387 { 3388 m_draw_mode.ClearTexturePageChangedFlag(); 3389 3390 #if 0 3391 if (!m_vram_dirty_draw_rect.eq(INVALID_RECT) || !m_vram_dirty_write_rect.eq(INVALID_RECT)) 3392 { 3393 GL_INS_FMT("VRAM DIRTY: {} {}", m_vram_dirty_draw_rect, m_vram_dirty_write_rect); 3394 GL_INS_FMT("PAGE RECT: {}", m_draw_mode.mode_reg.GetTexturePageRectangle()); 3395 if (m_draw_mode.mode_reg.IsUsingPalette()) 3396 GL_INS_FMT("PALETTE RECT: {}", m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode)); 3397 } 3398 #endif 3399 3400 if (m_draw_mode.mode_reg.IsUsingPalette()) 3401 { 3402 const GSVector4i palette_rect = m_draw_mode.palette_reg.GetRectangle(m_draw_mode.mode_reg.texture_mode); 3403 const bool update_drawn = palette_rect.rintersects(m_vram_dirty_draw_rect); 3404 const bool update_written = palette_rect.rintersects(m_vram_dirty_write_rect); 3405 if (update_drawn || update_written) 3406 { 3407 GL_INS("Palette in VRAM dirty area, flushing cache"); 3408 if (!IsFlushed()) 3409 FlushRender(); 3410 3411 UpdateVRAMReadTexture(update_drawn, update_written); 3412 } 3413 } 3414 3415 const GSVector4i page_rect = m_draw_mode.mode_reg.GetTexturePageRectangle(); 3416 GSVector4i::storel(m_current_texture_page_offset, page_rect); 3417 3418 u8 new_texpage_dirty = m_vram_dirty_draw_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_DRAWN_RECT : 0; 3419 new_texpage_dirty |= m_vram_dirty_write_rect.rintersects(page_rect) ? TEXPAGE_DIRTY_WRITTEN_RECT : 0; 3420 3421 if (new_texpage_dirty != 0) 3422 { 3423 GL_INS("Texpage is in dirty area, checking UV ranges"); 3424 m_texpage_dirty = new_texpage_dirty; 3425 m_compute_uv_range = true; 3426 m_current_uv_rect = INVALID_RECT; 3427 } 3428 else 3429 { 3430 m_compute_uv_range = m_clamp_uvs; 3431 if (m_texpage_dirty) 3432 GL_INS("Texpage is no longer dirty"); 3433 m_texpage_dirty = 0; 3434 } 3435 } 3436 3437 texture_mode = (m_draw_mode.mode_reg.texture_mode == GPUTextureMode::Reserved_Direct16Bit) ? 3438 BatchTextureMode::Direct16Bit : 3439 static_cast<BatchTextureMode>(m_draw_mode.mode_reg.texture_mode.GetValue()); 3440 } 3441 3442 // has any state changed which requires a new batch? 3443 // Reverse blending breaks with mixed transparent and opaque pixels, so we have to do one draw per polygon. 3444 // If we have fbfetch, we don't need to draw it in two passes. Test case: Suikoden 2 shadows. 3445 const GPUTransparencyMode transparency_mode = 3446 rc.transparency_enable ? m_draw_mode.mode_reg.transparency_mode : GPUTransparencyMode::Disabled; 3447 const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; 3448 if (texture_mode != m_batch.texture_mode || transparency_mode != m_batch.transparency_mode || 3449 (transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && !m_allow_shader_blend) || 3450 dithering_enable != m_batch.dithering) 3451 { 3452 FlushRender(); 3453 } 3454 3455 EnsureVertexBufferSpaceForCurrentCommand(); 3456 3457 if (m_batch_index_count == 0) 3458 { 3459 // transparency mode change 3460 const bool check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; 3461 if (transparency_mode != GPUTransparencyMode::Disabled && !m_rov_active && !m_prefer_shader_blend && 3462 !NeedsShaderBlending(transparency_mode, texture_mode, check_mask_before_draw)) 3463 { 3464 static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; 3465 3466 const float src_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][0]; 3467 const float dst_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][1]; 3468 m_batch_ubo_dirty |= (m_batch_ubo_data.u_src_alpha_factor != src_alpha_factor || 3469 m_batch_ubo_data.u_dst_alpha_factor != dst_alpha_factor); 3470 m_batch_ubo_data.u_src_alpha_factor = src_alpha_factor; 3471 m_batch_ubo_data.u_dst_alpha_factor = dst_alpha_factor; 3472 } 3473 3474 const bool set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; 3475 if (m_batch.check_mask_before_draw != check_mask_before_draw || 3476 m_batch.set_mask_while_drawing != set_mask_while_drawing) 3477 { 3478 m_batch.check_mask_before_draw = check_mask_before_draw; 3479 m_batch.set_mask_while_drawing = set_mask_while_drawing; 3480 m_batch_ubo_dirty |= (m_batch_ubo_data.u_set_mask_while_drawing != BoolToUInt32(set_mask_while_drawing)); 3481 m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(set_mask_while_drawing); 3482 } 3483 3484 m_batch.interlacing = IsInterlacedRenderingEnabled(); 3485 if (m_batch.interlacing) 3486 { 3487 const u32 displayed_field = GetActiveLineLSB(); 3488 m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field); 3489 m_batch_ubo_data.u_interlaced_displayed_field = displayed_field; 3490 } 3491 3492 // update state 3493 m_batch.texture_mode = texture_mode; 3494 m_batch.transparency_mode = transparency_mode; 3495 m_batch.dithering = dithering_enable; 3496 3497 if (m_draw_mode.IsTextureWindowChanged()) 3498 { 3499 m_draw_mode.ClearTextureWindowChangedFlag(); 3500 3501 m_batch_ubo_data.u_texture_window[0] = ZeroExtend32(m_draw_mode.texture_window.and_x); 3502 m_batch_ubo_data.u_texture_window[1] = ZeroExtend32(m_draw_mode.texture_window.and_y); 3503 m_batch_ubo_data.u_texture_window[2] = ZeroExtend32(m_draw_mode.texture_window.or_x); 3504 m_batch_ubo_data.u_texture_window[3] = ZeroExtend32(m_draw_mode.texture_window.or_y); 3505 3506 m_texture_window_active = ((m_draw_mode.texture_window.and_x & m_draw_mode.texture_window.and_y) != 0xFF || 3507 ((m_draw_mode.texture_window.or_x | m_draw_mode.texture_window.or_y) != 0)); 3508 m_batch_ubo_dirty = true; 3509 } 3510 3511 if (m_drawing_area_changed) 3512 { 3513 m_drawing_area_changed = false; 3514 SetClampedDrawingArea(); 3515 SetScissor(); 3516 3517 if (m_pgxp_depth_buffer && m_last_depth_z < 1.0f) 3518 { 3519 FlushRender(); 3520 CopyAndClearDepthBuffer(); 3521 EnsureVertexBufferSpaceForCurrentCommand(); 3522 } 3523 3524 if (m_sw_renderer) 3525 { 3526 GPUBackendSetDrawingAreaCommand* cmd = m_sw_renderer->NewSetDrawingAreaCommand(); 3527 cmd->new_area = m_drawing_area; 3528 m_sw_renderer->PushCommand(cmd); 3529 } 3530 } 3531 } 3532 3533 LoadVertices(); 3534 } 3535 3536 void GPU_HW::UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) 3537 { 3538 // Not done in HW, but need to forward through to SW if using that for readbacks 3539 if (m_sw_renderer) 3540 { 3541 GPUBackendUpdateCLUTCommand* cmd = m_sw_renderer->NewUpdateCLUTCommand(); 3542 FillBackendCommandParameters(cmd); 3543 cmd->reg.bits = reg.bits; 3544 cmd->clut_is_8bit = clut_is_8bit; 3545 m_sw_renderer->PushCommand(cmd); 3546 } 3547 } 3548 3549 void GPU_HW::FlushRender() 3550 { 3551 const u32 base_vertex = m_batch_base_vertex; 3552 const u32 base_index = m_batch_base_index; 3553 const u32 index_count = m_batch_index_count; 3554 DebugAssert((m_batch_vertex_ptr != nullptr) == (m_batch_index_ptr != nullptr)); 3555 if (m_batch_vertex_ptr) 3556 UnmapGPUBuffer(m_batch_vertex_count, index_count); 3557 if (index_count == 0) 3558 return; 3559 3560 #ifdef _DEBUG 3561 GL_SCOPE_FMT("Hardware Draw {}", ++s_draw_number); 3562 #endif 3563 3564 GL_INS_FMT("Dirty draw area: {}", m_vram_dirty_draw_rect); 3565 3566 if (m_batch_ubo_dirty) 3567 { 3568 g_gpu_device->UploadUniformBuffer(&m_batch_ubo_data, sizeof(m_batch_ubo_data)); 3569 // m_counters.num_ubo_updates++; 3570 m_batch_ubo_dirty = false; 3571 } 3572 3573 if (m_wireframe_mode != GPUWireframeMode::OnlyWireframe) 3574 { 3575 if (NeedsShaderBlending(m_batch.transparency_mode, m_batch.texture_mode, m_batch.check_mask_before_draw) || 3576 m_rov_active || (m_use_rov_for_shader_blend && m_pgxp_depth_buffer)) 3577 { 3578 DrawBatchVertices(BatchRenderMode::ShaderBlend, index_count, base_index, base_vertex); 3579 } 3580 else if (NeedsTwoPassRendering()) 3581 { 3582 DrawBatchVertices(BatchRenderMode::OnlyOpaque, index_count, base_index, base_vertex); 3583 DrawBatchVertices(BatchRenderMode::OnlyTransparent, index_count, base_index, base_vertex); 3584 } 3585 else 3586 { 3587 DrawBatchVertices(m_batch.GetRenderMode(), index_count, base_index, base_vertex); 3588 } 3589 } 3590 3591 if (m_wireframe_mode != GPUWireframeMode::Disabled) 3592 { 3593 // This'll be less than ideal, but wireframe is for debugging, so take the perf hit. 3594 DeactivateROV(); 3595 g_gpu_device->SetPipeline(m_wireframe_pipeline.get()); 3596 g_gpu_device->DrawIndexed(index_count, base_index, base_vertex); 3597 } 3598 } 3599 3600 void GPU_HW::UpdateDisplay() 3601 { 3602 FlushRender(); 3603 DeactivateROV(); 3604 3605 GL_SCOPE("UpdateDisplay()"); 3606 3607 if (g_settings.debugging.show_vram) 3608 { 3609 if (IsUsingMultisampling()) 3610 { 3611 UpdateVRAMReadTexture(true, true); 3612 SetDisplayTexture(m_vram_read_texture.get(), nullptr, 0, 0, m_vram_read_texture->GetWidth(), 3613 m_vram_read_texture->GetHeight()); 3614 } 3615 else 3616 { 3617 SetDisplayTexture(m_vram_texture.get(), nullptr, 0, 0, m_vram_texture->GetWidth(), m_vram_texture->GetHeight()); 3618 } 3619 3620 return; 3621 } 3622 3623 const bool interlaced = IsInterlacedDisplayEnabled(); 3624 const u32 interlaced_field = GetInterlacedDisplayField(); 3625 const u32 resolution_scale = m_GPUSTAT.display_area_color_depth_24 ? 1 : m_resolution_scale; 3626 const u32 scaled_vram_offset_x = m_crtc_state.display_vram_left * resolution_scale; 3627 const u32 scaled_vram_offset_y = (m_crtc_state.display_vram_top * resolution_scale) + 3628 ((interlaced && m_GPUSTAT.vertical_resolution) ? interlaced_field : 0); 3629 const u32 scaled_display_width = m_crtc_state.display_vram_width * resolution_scale; 3630 const u32 scaled_display_height = m_crtc_state.display_vram_height * resolution_scale; 3631 const u32 read_height = interlaced ? (scaled_display_height / 2u) : scaled_display_height; 3632 const u32 line_skip = BoolToUInt32(interlaced && m_GPUSTAT.vertical_resolution); 3633 bool drew_anything = false; 3634 3635 // Don't bother grabbing depth if postfx doesn't need it. 3636 GPUTexture* depth_source = (!m_GPUSTAT.display_area_color_depth_24 && m_pgxp_depth_buffer && 3637 PostProcessing::InternalChain.NeedsDepthBuffer()) ? 3638 (m_depth_was_copied ? m_vram_depth_copy_texture.get() : m_vram_depth_texture.get()) : 3639 nullptr; 3640 3641 if (IsDisplayDisabled()) 3642 { 3643 ClearDisplayTexture(); 3644 return; 3645 } 3646 else if (!m_GPUSTAT.display_area_color_depth_24 && !IsUsingMultisampling() && 3647 (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture->GetWidth() && 3648 (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture->GetHeight() && 3649 !PostProcessing::InternalChain.IsActive()) 3650 { 3651 SetDisplayTexture(m_vram_texture.get(), depth_source, scaled_vram_offset_x, scaled_vram_offset_y, 3652 scaled_display_width, read_height); 3653 3654 // Fast path if no copies are needed. 3655 if (interlaced) 3656 { 3657 GL_INS("Deinterlace fast path"); 3658 drew_anything = true; 3659 Deinterlace(interlaced_field, line_skip); 3660 } 3661 else 3662 { 3663 GL_INS("Direct display"); 3664 } 3665 } 3666 else 3667 { 3668 if (!m_vram_extract_texture || m_vram_extract_texture->GetWidth() != scaled_display_width || 3669 m_vram_extract_texture->GetHeight() != read_height) 3670 { 3671 if (!g_gpu_device->ResizeTexture(&m_vram_extract_texture, scaled_display_width, read_height, 3672 GPUTexture::Type::RenderTarget, GPUTexture::Format::RGBA8)) [[unlikely]] 3673 { 3674 ClearDisplayTexture(); 3675 return; 3676 } 3677 } 3678 3679 m_vram_texture->MakeReadyForSampling(); 3680 g_gpu_device->InvalidateRenderTarget(m_vram_extract_texture.get()); 3681 3682 if (depth_source && 3683 ((m_vram_extract_depth_texture && m_vram_extract_depth_texture->GetWidth() == scaled_display_width && 3684 m_vram_extract_depth_texture->GetHeight() == scaled_display_height) || 3685 !g_gpu_device->ResizeTexture(&m_vram_extract_depth_texture, scaled_display_width, scaled_display_height, 3686 GPUTexture::Type::RenderTarget, VRAM_DS_COLOR_FORMAT))) 3687 { 3688 depth_source->MakeReadyForSampling(); 3689 g_gpu_device->InvalidateRenderTarget(m_vram_extract_depth_texture.get()); 3690 3691 GPUTexture* targets[] = {m_vram_extract_texture.get(), m_vram_extract_depth_texture.get()}; 3692 g_gpu_device->SetRenderTargets(targets, static_cast<u32>(std::size(targets)), nullptr); 3693 g_gpu_device->SetPipeline(m_vram_extract_pipeline[2].get()); 3694 3695 g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); 3696 g_gpu_device->SetTextureSampler(1, depth_source, g_gpu_device->GetNearestSampler()); 3697 } 3698 else 3699 { 3700 g_gpu_device->SetRenderTarget(m_vram_extract_texture.get()); 3701 g_gpu_device->SetPipeline(m_vram_extract_pipeline[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)].get()); 3702 g_gpu_device->SetTextureSampler(0, m_vram_texture.get(), g_gpu_device->GetNearestSampler()); 3703 } 3704 3705 const u32 reinterpret_start_x = m_crtc_state.regs.X * resolution_scale; 3706 const u32 skip_x = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * resolution_scale; 3707 GL_INS_FMT("VRAM extract, depth = {}, 24bpp = {}, skip_x = {}, line_skip = {}", depth_source ? "yes" : "no", 3708 m_GPUSTAT.display_area_color_depth_24.GetValue(), skip_x, line_skip); 3709 GL_INS_FMT("Source: {},{} => {},{} ({}x{})", reinterpret_start_x, scaled_vram_offset_y, 3710 reinterpret_start_x + scaled_display_width, scaled_vram_offset_y + read_height, scaled_display_width, 3711 read_height); 3712 3713 const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, skip_x, line_skip}; 3714 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 3715 3716 g_gpu_device->SetViewportAndScissor(0, 0, scaled_display_width, read_height); 3717 g_gpu_device->Draw(3, 0); 3718 3719 m_vram_extract_texture->MakeReadyForSampling(); 3720 if (depth_source) 3721 { 3722 // Thanks DX11... 3723 m_vram_extract_depth_texture->MakeReadyForSampling(); 3724 g_gpu_device->SetTextureSampler(1, nullptr, nullptr); 3725 } 3726 3727 drew_anything = true; 3728 3729 SetDisplayTexture(m_vram_extract_texture.get(), depth_source ? m_vram_extract_depth_texture.get() : nullptr, 0, 0, 3730 scaled_display_width, read_height); 3731 if (g_settings.display_24bit_chroma_smoothing) 3732 { 3733 if (ApplyChromaSmoothing()) 3734 { 3735 if (interlaced) 3736 Deinterlace(interlaced_field, 0); 3737 } 3738 } 3739 else 3740 { 3741 if (interlaced) 3742 Deinterlace(interlaced_field, 0); 3743 } 3744 } 3745 3746 if (m_downsample_mode != GPUDownsampleMode::Disabled && !m_GPUSTAT.display_area_color_depth_24) 3747 { 3748 DebugAssert(m_display_texture); 3749 DownsampleFramebuffer(); 3750 } 3751 3752 if (drew_anything) 3753 RestoreDeviceContext(); 3754 } 3755 3756 void GPU_HW::UpdateDownsamplingLevels() 3757 { 3758 if (m_downsample_mode == GPUDownsampleMode::Adaptive) 3759 { 3760 m_downsample_scale_or_levels = 0; 3761 u32 current_width = VRAM_WIDTH * m_resolution_scale; 3762 while (current_width >= VRAM_WIDTH) 3763 { 3764 m_downsample_scale_or_levels++; 3765 current_width /= 2; 3766 } 3767 } 3768 else if (m_downsample_mode == GPUDownsampleMode::Box) 3769 { 3770 m_downsample_scale_or_levels = m_resolution_scale / GetBoxDownsampleScale(m_resolution_scale); 3771 } 3772 else 3773 { 3774 m_downsample_scale_or_levels = 0; 3775 } 3776 3777 // Toss downsampling buffer, it's likely going to change resolution. 3778 g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); 3779 } 3780 3781 void GPU_HW::OnBufferSwapped() 3782 { 3783 GL_INS("OnBufferSwapped()"); 3784 m_depth_was_copied = false; 3785 } 3786 3787 void GPU_HW::DownsampleFramebuffer() 3788 { 3789 GPUTexture* source = m_display_texture; 3790 const u32 left = m_display_texture_view_x; 3791 const u32 top = m_display_texture_view_y; 3792 const u32 width = m_display_texture_view_width; 3793 const u32 height = m_display_texture_view_height; 3794 3795 if (m_downsample_mode == GPUDownsampleMode::Adaptive) 3796 DownsampleFramebufferAdaptive(source, left, top, width, height); 3797 else 3798 DownsampleFramebufferBoxFilter(source, left, top, width, height); 3799 } 3800 3801 void GPU_HW::DownsampleFramebufferAdaptive(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) 3802 { 3803 GL_PUSH_FMT("DownsampleFramebufferAdaptive ({},{} => {},{})", left, top, left + width, left + height); 3804 3805 struct SmoothingUBOData 3806 { 3807 float min_uv[2]; 3808 float max_uv[2]; 3809 float rcp_size[2]; 3810 float lod; 3811 }; 3812 3813 if (!m_downsample_texture || m_downsample_texture->GetWidth() != width || m_downsample_texture->GetHeight() != height) 3814 { 3815 g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); 3816 m_downsample_texture = 3817 g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT); 3818 } 3819 std::unique_ptr<GPUTexture, GPUDevice::PooledTextureDeleter> level_texture = g_gpu_device->FetchAutoRecycleTexture( 3820 width, height, 1, m_downsample_scale_or_levels, 1, GPUTexture::Type::Texture, VRAM_RT_FORMAT); 3821 std::unique_ptr<GPUTexture, GPUDevice::PooledTextureDeleter> weight_texture = 3822 g_gpu_device->FetchAutoRecycleTexture(std::max(width >> (m_downsample_scale_or_levels - 1), 1u), 3823 std::max(height >> (m_downsample_scale_or_levels - 1), 1u), 1, 1, 1, 3824 GPUTexture::Type::RenderTarget, GPUTexture::Format::R8); 3825 if (!m_downsample_texture || !level_texture || !weight_texture) 3826 { 3827 ERROR_LOG("Failed to create {}x{} RTs for adaptive downsampling", width, height); 3828 return; 3829 } 3830 3831 g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, 0, source, left, top, 0, 0, width, height); 3832 g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_lod_sampler.get()); 3833 3834 SmoothingUBOData uniforms; 3835 3836 // create mip chain 3837 for (u32 level = 1; level < m_downsample_scale_or_levels; level++) 3838 { 3839 GL_SCOPE_FMT("Create miplevel {}", level); 3840 3841 const u32 level_width = width >> level; 3842 const u32 level_height = height >> level; 3843 const float rcp_width = 1.0f / static_cast<float>(level_texture->GetMipWidth(level)); 3844 const float rcp_height = 1.0f / static_cast<float>(level_texture->GetMipHeight(level)); 3845 uniforms.min_uv[0] = 0.0f; 3846 uniforms.min_uv[1] = 0.0f; 3847 uniforms.max_uv[0] = static_cast<float>(level_width) * rcp_width; 3848 uniforms.max_uv[1] = static_cast<float>(level_height) * rcp_height; 3849 uniforms.rcp_size[0] = rcp_width; 3850 uniforms.rcp_size[1] = rcp_height; 3851 uniforms.lod = static_cast<float>(level - 1); 3852 3853 g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); 3854 g_gpu_device->SetRenderTarget(m_downsample_texture.get()); 3855 g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, level_width, level_height)); 3856 g_gpu_device->SetPipeline((level == 1) ? m_downsample_first_pass_pipeline.get() : 3857 m_downsample_mid_pass_pipeline.get()); 3858 g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); 3859 g_gpu_device->Draw(3, 0); 3860 g_gpu_device->CopyTextureRegion(level_texture.get(), 0, 0, 0, level, m_downsample_texture.get(), 0, 0, 0, 0, 3861 level_width, level_height); 3862 } 3863 3864 // blur pass at lowest level 3865 { 3866 GL_SCOPE("Blur"); 3867 3868 const u32 last_level = m_downsample_scale_or_levels - 1; 3869 const u32 last_width = level_texture->GetMipWidth(last_level); 3870 const u32 last_height = level_texture->GetMipHeight(last_level); 3871 const float rcp_width = 1.0f / static_cast<float>(m_downsample_texture->GetWidth()); 3872 const float rcp_height = 1.0f / static_cast<float>(m_downsample_texture->GetHeight()); 3873 uniforms.min_uv[0] = 0.0f; 3874 uniforms.min_uv[1] = 0.0f; 3875 uniforms.max_uv[0] = static_cast<float>(last_width) * rcp_width; 3876 uniforms.max_uv[1] = static_cast<float>(last_height) * rcp_height; 3877 uniforms.rcp_size[0] = rcp_width; 3878 uniforms.rcp_size[1] = rcp_height; 3879 uniforms.lod = 0.0f; 3880 3881 m_downsample_texture->MakeReadyForSampling(); 3882 g_gpu_device->InvalidateRenderTarget(weight_texture.get()); 3883 g_gpu_device->SetRenderTarget(weight_texture.get()); 3884 g_gpu_device->SetTextureSampler(0, m_downsample_texture.get(), g_gpu_device->GetNearestSampler()); 3885 g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, last_width, last_height)); 3886 g_gpu_device->SetPipeline(m_downsample_blur_pass_pipeline.get()); 3887 g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); 3888 g_gpu_device->Draw(3, 0); 3889 weight_texture->MakeReadyForSampling(); 3890 } 3891 3892 // composite downsampled and upsampled images together 3893 { 3894 GL_SCOPE("Composite"); 3895 3896 uniforms.min_uv[0] = 0.0f; 3897 uniforms.min_uv[1] = 0.0f; 3898 uniforms.max_uv[0] = 1.0f; 3899 uniforms.max_uv[1] = 1.0f; 3900 3901 g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); 3902 g_gpu_device->SetRenderTarget(m_downsample_texture.get()); 3903 g_gpu_device->SetTextureSampler(0, level_texture.get(), m_downsample_composite_sampler.get()); 3904 g_gpu_device->SetTextureSampler(1, weight_texture.get(), m_downsample_lod_sampler.get()); 3905 g_gpu_device->SetViewportAndScissor(GSVector4i(0, 0, width, height)); 3906 g_gpu_device->SetPipeline(m_downsample_composite_pass_pipeline.get()); 3907 g_gpu_device->PushUniformBuffer(&uniforms, sizeof(uniforms)); 3908 g_gpu_device->Draw(3, 0); 3909 m_downsample_texture->MakeReadyForSampling(); 3910 } 3911 3912 GL_POP(); 3913 3914 RestoreDeviceContext(); 3915 3916 SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, width, height); 3917 } 3918 3919 void GPU_HW::DownsampleFramebufferBoxFilter(GPUTexture* source, u32 left, u32 top, u32 width, u32 height) 3920 { 3921 GL_SCOPE_FMT("DownsampleFramebufferBoxFilter({},{} => {},{} ({}x{})", left, top, left + width, top + height, width, 3922 height); 3923 3924 const u32 ds_width = width / m_downsample_scale_or_levels; 3925 const u32 ds_height = height / m_downsample_scale_or_levels; 3926 3927 if (!m_downsample_texture || m_downsample_texture->GetWidth() != ds_width || 3928 m_downsample_texture->GetHeight() != ds_height) 3929 { 3930 g_gpu_device->RecycleTexture(std::move(m_downsample_texture)); 3931 m_downsample_texture = 3932 g_gpu_device->FetchTexture(ds_width, ds_height, 1, 1, 1, GPUTexture::Type::RenderTarget, VRAM_RT_FORMAT); 3933 } 3934 if (!m_downsample_texture) 3935 { 3936 ERROR_LOG("Failed to create {}x{} RT for box downsampling", width, height); 3937 return; 3938 } 3939 3940 source->MakeReadyForSampling(); 3941 3942 const u32 uniforms[4] = {left, top, 0u, 0u}; 3943 3944 g_gpu_device->InvalidateRenderTarget(m_downsample_texture.get()); 3945 g_gpu_device->SetRenderTarget(m_downsample_texture.get()); 3946 g_gpu_device->SetPipeline(m_downsample_first_pass_pipeline.get()); 3947 g_gpu_device->SetTextureSampler(0, source, g_gpu_device->GetNearestSampler()); 3948 g_gpu_device->SetViewportAndScissor(0, 0, ds_width, ds_height); 3949 g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); 3950 g_gpu_device->Draw(3, 0); 3951 3952 RestoreDeviceContext(); 3953 3954 SetDisplayTexture(m_downsample_texture.get(), m_display_depth_buffer, 0, 0, ds_width, ds_height); 3955 } 3956 3957 void GPU_HW::DrawRendererStats() 3958 { 3959 if (ImGui::CollapsingHeader("Renderer Statistics", ImGuiTreeNodeFlags_DefaultOpen)) 3960 { 3961 static const ImVec4 active_color{1.0f, 1.0f, 1.0f, 1.0f}; 3962 static const ImVec4 inactive_color{0.4f, 0.4f, 0.4f, 1.0f}; 3963 3964 ImGui::Columns(2); 3965 ImGui::SetColumnWidth(0, 200.0f * ImGuiManager::GetGlobalScale()); 3966 3967 ImGui::TextUnformatted("Resolution Scale:"); 3968 ImGui::NextColumn(); 3969 ImGui::Text("%u (VRAM %ux%u)", m_resolution_scale, VRAM_WIDTH * m_resolution_scale, 3970 VRAM_HEIGHT * m_resolution_scale); 3971 ImGui::NextColumn(); 3972 3973 ImGui::TextUnformatted("Effective Display Resolution:"); 3974 ImGui::NextColumn(); 3975 ImGui::Text("%ux%u", m_crtc_state.display_vram_width * m_resolution_scale, 3976 m_crtc_state.display_vram_height * m_resolution_scale); 3977 ImGui::NextColumn(); 3978 3979 ImGui::TextUnformatted("True Color:"); 3980 ImGui::NextColumn(); 3981 ImGui::TextColored(m_true_color ? active_color : inactive_color, m_true_color ? "Enabled" : "Disabled"); 3982 ImGui::NextColumn(); 3983 3984 const bool debanding = (g_settings.gpu_true_color && g_settings.gpu_debanding); 3985 ImGui::TextUnformatted("Debanding:"); 3986 ImGui::NextColumn(); 3987 ImGui::TextColored(debanding ? active_color : inactive_color, debanding ? "Enabled" : "Disabled"); 3988 ImGui::NextColumn(); 3989 3990 const bool scaled_dithering = (m_resolution_scale > 1 && g_settings.gpu_scaled_dithering); 3991 ImGui::TextUnformatted("Scaled Dithering:"); 3992 ImGui::NextColumn(); 3993 ImGui::TextColored(scaled_dithering ? active_color : inactive_color, scaled_dithering ? "Enabled" : "Disabled"); 3994 ImGui::NextColumn(); 3995 3996 ImGui::TextUnformatted("Texture Filtering:"); 3997 ImGui::NextColumn(); 3998 ImGui::TextColored((m_texture_filtering != GPUTextureFilter::Nearest) ? active_color : inactive_color, "%s", 3999 Settings::GetTextureFilterDisplayName(m_texture_filtering)); 4000 ImGui::NextColumn(); 4001 4002 ImGui::TextUnformatted("PGXP:"); 4003 ImGui::NextColumn(); 4004 ImGui::TextColored(g_settings.gpu_pgxp_enable ? active_color : inactive_color, "Geom"); 4005 ImGui::SameLine(); 4006 ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_culling) ? active_color : inactive_color, 4007 "Cull"); 4008 ImGui::SameLine(); 4009 ImGui::TextColored( 4010 (g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_texture_correction) ? active_color : inactive_color, "Tex"); 4011 ImGui::SameLine(); 4012 ImGui::TextColored((g_settings.gpu_pgxp_enable && g_settings.gpu_pgxp_vertex_cache) ? active_color : inactive_color, 4013 "Cache"); 4014 ImGui::NextColumn(); 4015 4016 ImGui::Columns(1); 4017 } 4018 } 4019 4020 std::unique_ptr<GPU> GPU::CreateHardwareRenderer() 4021 { 4022 std::unique_ptr<GPU_HW> gpu(std::make_unique<GPU_HW>()); 4023 if (!gpu->Initialize()) 4024 return nullptr; 4025 4026 return gpu; 4027 }