opengl_texture.cpp (32569B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR PolyForm-Strict-1.0.0) 3 4 #include "opengl_texture.h" 5 #include "opengl_device.h" 6 #include "opengl_stream_buffer.h" 7 8 #include "common/align.h" 9 #include "common/assert.h" 10 #include "common/intrin.h" 11 #include "common/log.h" 12 #include "common/string_util.h" 13 14 #include <array> 15 #include <limits> 16 #include <tuple> 17 18 Log_SetChannel(OpenGLDevice); 19 20 // Looking across a range of GPUs, the optimal copy alignment for Vulkan drivers seems 21 // to be between 1 (AMD/NV) and 64 (Intel). So, we'll go with 64 here. 22 static constexpr u32 TEXTURE_UPLOAD_ALIGNMENT = 64; 23 24 // The pitch alignment must be less or equal to the upload alignment. 25 // We need 32 here for AVX2, so 64 is also fine. 26 static constexpr u32 TEXTURE_UPLOAD_PITCH_ALIGNMENT = 64; 27 28 const std::tuple<GLenum, GLenum, GLenum>& OpenGLTexture::GetPixelFormatMapping(GPUTexture::Format format, bool gles) 29 { 30 static constexpr std::array<std::tuple<GLenum, GLenum, GLenum>, static_cast<u32>(GPUTexture::Format::MaxCount)> 31 mapping = {{ 32 {}, // Unknown 33 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 34 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 35 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 36 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 37 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 38 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 39 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 40 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F 41 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 42 {GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16 43 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I 44 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U 45 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F 46 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I 47 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U 48 {GL_R32F, GL_RED, GL_FLOAT}, // R32F 49 {GL_RG8, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // RG8 50 {GL_RG16F, GL_RG, GL_UNSIGNED_SHORT}, // RG16 51 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F 52 {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F 53 {GL_RGBA16, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA16 54 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F 55 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F 56 {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 57 }}; 58 59 // GLES doesn't have the non-normalized 16-bit formats.. use float and hope for the best, lol. 60 static constexpr std::array<std::tuple<GLenum, GLenum, GLenum>, static_cast<u32>(GPUTexture::Format::MaxCount)> 61 mapping_gles = {{ 62 {}, // Unknown 63 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 64 {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // BGRA8 65 {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 66 {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // RGBA5551 67 {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8 68 {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_SHORT}, // D16 69 {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT}, // D24S8 70 {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32F 71 {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT}, // D32FS8 72 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16 73 {GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16I 74 {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16U 75 {GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16F 76 {GL_R32I, GL_RED_INTEGER, GL_INT}, // R32I 77 {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32U 78 {GL_R32F, GL_RED, GL_FLOAT}, // R32F 79 {GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // RG8 80 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16 81 {GL_RG16F, GL_RG, GL_HALF_FLOAT}, // RG16F 82 {GL_RG32F, GL_RG, GL_FLOAT}, // RG32F 83 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16 84 {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // RGBA16F 85 {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // RGBA32F 86 {GL_RGB10_A2, GL_BGRA, GL_UNSIGNED_INT_2_10_10_10_REV}, // RGB10A2 87 }}; 88 89 return gles ? mapping_gles[static_cast<u32>(format)] : mapping[static_cast<u32>(format)]; 90 } 91 92 OpenGLTexture::OpenGLTexture(u32 width, u32 height, u32 layers, u32 levels, u32 samples, Type type, Format format, 93 GLuint id) 94 : GPUTexture(static_cast<u16>(width), static_cast<u16>(height), static_cast<u8>(layers), static_cast<u8>(levels), 95 static_cast<u8>(samples), type, format), 96 m_id(id) 97 { 98 } 99 100 OpenGLTexture::~OpenGLTexture() 101 { 102 if (m_id != 0) 103 { 104 OpenGLDevice::GetInstance().UnbindTexture(this); 105 glDeleteTextures(1, &m_id); 106 m_id = 0; 107 } 108 } 109 110 bool OpenGLTexture::UseTextureStorage(bool multisampled) 111 { 112 return GLAD_GL_ARB_texture_storage || (multisampled ? GLAD_GL_ES_VERSION_3_1 : GLAD_GL_ES_VERSION_3_0); 113 } 114 115 bool OpenGLTexture::UseTextureStorage() const 116 { 117 return UseTextureStorage(IsMultisampled()); 118 } 119 120 std::unique_ptr<OpenGLTexture> OpenGLTexture::Create(u32 width, u32 height, u32 layers, u32 levels, u32 samples, 121 Type type, Format format, const void* data, u32 data_pitch) 122 { 123 if (!ValidateConfig(width, height, layers, levels, samples, type, format)) 124 return nullptr; 125 126 if (layers > 1 && data) 127 { 128 ERROR_LOG("Loading texture array data not currently supported"); 129 return nullptr; 130 } 131 132 const GLenum target = 133 ((samples > 1) ? GL_TEXTURE_2D_MULTISAMPLE : ((layers > 1) ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D)); 134 const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(format, OpenGLDevice::IsGLES()); 135 136 OpenGLDevice::BindUpdateTextureUnit(); 137 138 glGetError(); 139 140 GLuint id; 141 glGenTextures(1, &id); 142 glBindTexture(target, id); 143 144 if (samples > 1) 145 { 146 Assert(!data); 147 if (UseTextureStorage(true)) 148 { 149 glTexStorage2DMultisample(target, samples, gl_internal_format, width, height, GL_FALSE); 150 } 151 else 152 { 153 glTexImage2DMultisample(target, samples, gl_internal_format, width, height, GL_FALSE); 154 } 155 156 glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); 157 glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels); 158 } 159 else 160 { 161 const bool use_texture_storage = UseTextureStorage(false); 162 if (use_texture_storage) 163 { 164 if (layers > 1) 165 glTexStorage3D(target, levels, gl_internal_format, width, height, layers); 166 else 167 glTexStorage2D(target, levels, gl_internal_format, width, height); 168 } 169 170 if (!use_texture_storage || data) 171 { 172 const u32 pixel_size = GetPixelSize(format); 173 const u32 alignment = ((data_pitch % 4) == 0) ? 4 : (((data_pitch % 2) == 0) ? 2 : 1); 174 if (data) 175 { 176 GPUDevice::GetStatistics().buffer_streamed += data_pitch * height; 177 GPUDevice::GetStatistics().num_uploads++; 178 179 glPixelStorei(GL_UNPACK_ROW_LENGTH, data_pitch / pixel_size); 180 if (alignment != 4) 181 glPixelStorei(GL_UNPACK_ALIGNMENT, alignment); 182 } 183 184 const u8* data_ptr = static_cast<const u8*>(data); 185 u32 current_width = width; 186 u32 current_height = height; 187 for (u32 i = 0; i < levels; i++) 188 { 189 if (use_texture_storage) 190 { 191 if (layers > 1) 192 glTexSubImage3D(target, i, 0, 0, 0, current_width, current_height, layers, gl_format, gl_type, data_ptr); 193 else 194 glTexSubImage2D(target, i, 0, 0, current_width, current_height, gl_format, gl_type, data_ptr); 195 } 196 else 197 { 198 if (layers > 1) 199 glTexImage3D(target, i, gl_internal_format, current_width, current_height, layers, 0, gl_format, gl_type, 200 data_ptr); 201 else 202 glTexImage2D(target, i, gl_internal_format, current_width, current_height, 0, gl_format, gl_type, data_ptr); 203 } 204 205 if (data_ptr) 206 data_ptr += data_pitch * current_width; 207 208 current_width = (current_width > 1) ? (current_width / 2u) : current_width; 209 current_height = (current_height > 1) ? (current_height / 2u) : current_height; 210 211 // TODO: Incorrect assumption. 212 data_pitch = pixel_size * current_width; 213 } 214 215 if (data) 216 { 217 if (alignment != 4) 218 glPixelStorei(GL_UNPACK_ALIGNMENT, 4); 219 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 220 } 221 } 222 223 if (!use_texture_storage) 224 { 225 glTexParameteri(target, GL_TEXTURE_BASE_LEVEL, 0); 226 glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, levels - 1); 227 } 228 } 229 230 GLenum error = glGetError(); 231 if (error != GL_NO_ERROR) 232 { 233 ERROR_LOG("Failed to create texture: 0x{:X}", error); 234 glDeleteTextures(1, &id); 235 return nullptr; 236 } 237 238 return std::unique_ptr<OpenGLTexture>(new OpenGLTexture(width, height, layers, levels, samples, type, format, id)); 239 } 240 241 void OpenGLTexture::CommitClear() 242 { 243 OpenGLDevice::GetInstance().CommitClear(this); 244 } 245 246 bool OpenGLTexture::Update(u32 x, u32 y, u32 width, u32 height, const void* data, u32 pitch, u32 layer /*= 0*/, 247 u32 level /*= 0*/) 248 { 249 // TODO: perf counters 250 251 // Worth using the PBO? Driver probably knows better... 252 const GLenum target = GetGLTarget(); 253 const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); 254 const u32 preferred_pitch = 255 Common::AlignUpPow2(static_cast<u32>(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 256 const u32 map_size = preferred_pitch * static_cast<u32>(height); 257 OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); 258 259 CommitClear(); 260 261 GPUDevice::GetStatistics().buffer_streamed += map_size; 262 GPUDevice::GetStatistics().num_uploads++; 263 264 OpenGLDevice::BindUpdateTextureUnit(); 265 glBindTexture(target, m_id); 266 267 if (!sb || map_size > sb->GetChunkSize()) 268 { 269 GL_INS_FMT("Not using PBO for map size {}", map_size); 270 glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize()); 271 glTexSubImage2D(target, layer, x, y, width, height, gl_format, gl_type, data); 272 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 273 } 274 else 275 { 276 const auto map = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, map_size); 277 StringUtil::StrideMemCpy(map.pointer, preferred_pitch, data, pitch, width * GetPixelSize(), height); 278 sb->Unmap(map_size); 279 sb->Bind(); 280 281 glPixelStorei(GL_UNPACK_ROW_LENGTH, preferred_pitch / GetPixelSize()); 282 glTexSubImage2D(GL_TEXTURE_2D, layer, x, y, width, height, gl_format, gl_type, 283 reinterpret_cast<void*>(static_cast<uintptr_t>(map.buffer_offset))); 284 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 285 286 sb->Unbind(); 287 } 288 289 glBindTexture(target, 0); 290 return true; 291 } 292 293 bool OpenGLTexture::Map(void** map, u32* map_stride, u32 x, u32 y, u32 width, u32 height, u32 layer /*= 0*/, 294 u32 level /*= 0*/) 295 { 296 if ((x + width) > GetMipWidth(level) || (y + height) > GetMipHeight(level) || layer > m_layers || level > m_levels) 297 return false; 298 299 const u32 pitch = Common::AlignUpPow2(static_cast<u32>(width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 300 const u32 upload_size = pitch * static_cast<u32>(height); 301 OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); 302 if (!sb || upload_size > sb->GetSize()) 303 return false; 304 305 const auto res = sb->Map(TEXTURE_UPLOAD_ALIGNMENT, upload_size); 306 *map = res.pointer; 307 *map_stride = pitch; 308 309 m_map_offset = res.buffer_offset; 310 m_map_x = static_cast<u16>(x); 311 m_map_y = static_cast<u16>(y); 312 m_map_width = static_cast<u16>(width); 313 m_map_height = static_cast<u16>(height); 314 m_map_layer = static_cast<u8>(layer); 315 m_map_level = static_cast<u8>(level); 316 return true; 317 } 318 319 void OpenGLTexture::Unmap() 320 { 321 CommitClear(); 322 323 const u32 pitch = Common::AlignUpPow2(static_cast<u32>(m_map_width) * GetPixelSize(), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 324 const u32 upload_size = pitch * static_cast<u32>(m_map_height); 325 326 GPUDevice::GetStatistics().buffer_streamed += upload_size; 327 GPUDevice::GetStatistics().num_uploads++; 328 329 OpenGLStreamBuffer* sb = OpenGLDevice::GetTextureStreamBuffer(); 330 sb->Unmap(upload_size); 331 sb->Bind(); 332 333 OpenGLDevice::BindUpdateTextureUnit(); 334 335 const GLenum target = GetGLTarget(); 336 glBindTexture(target, m_id); 337 338 glPixelStorei(GL_UNPACK_ROW_LENGTH, pitch / GetPixelSize()); 339 340 const auto [gl_internal_format, gl_format, gl_type] = GetPixelFormatMapping(m_format, OpenGLDevice::IsGLES()); 341 if (IsTextureArray()) 342 { 343 glTexSubImage3D(target, m_map_level, m_map_x, m_map_y, m_map_layer, m_map_width, m_map_height, 1, gl_format, 344 gl_type, reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset))); 345 } 346 else 347 { 348 glTexSubImage2D(target, m_map_level, m_map_x, m_map_y, m_map_width, m_map_height, gl_format, gl_type, 349 reinterpret_cast<void*>(static_cast<uintptr_t>(m_map_offset))); 350 } 351 352 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); 353 354 glBindTexture(target, 0); 355 356 sb->Unbind(); 357 } 358 359 void OpenGLTexture::SetDebugName(std::string_view name) 360 { 361 #ifdef _DEBUG 362 if (glObjectLabel) 363 glObjectLabel(GL_TEXTURE, m_id, static_cast<GLsizei>(name.length()), static_cast<const GLchar*>(name.data())); 364 #endif 365 } 366 367 #if 0 368 // If we don't have border clamp.. too bad, just hope for the best. 369 if (!m_gl_context->IsGLES() || GLAD_GL_ES_VERSION_3_2 || GLAD_GL_NV_texture_border_clamp || 370 GLAD_GL_EXT_texture_border_clamp || GLAD_GL_OES_texture_border_clamp) 371 #endif 372 373 ////////////////////////////////////////////////////////////////////////// 374 375 OpenGLSampler::OpenGLSampler(GLuint id) : GPUSampler(), m_id(id) 376 { 377 } 378 379 OpenGLSampler::~OpenGLSampler() 380 { 381 OpenGLDevice::GetInstance().UnbindSampler(m_id); 382 } 383 384 void OpenGLSampler::SetDebugName(std::string_view name) 385 { 386 #ifdef _DEBUG 387 if (glObjectLabel) 388 glObjectLabel(GL_SAMPLER, m_id, static_cast<GLsizei>(name.length()), static_cast<const GLchar*>(name.data())); 389 #endif 390 } 391 392 std::unique_ptr<GPUSampler> OpenGLDevice::CreateSampler(const GPUSampler::Config& config) 393 { 394 static constexpr std::array<GLenum, static_cast<u8>(GPUSampler::AddressMode::MaxCount)> ta = {{ 395 GL_REPEAT, // Repeat 396 GL_CLAMP_TO_EDGE, // ClampToEdge 397 GL_CLAMP_TO_BORDER, // ClampToBorder 398 GL_MIRRORED_REPEAT, // MirrorRepeat 399 }}; 400 401 // [mipmap_on_off][mipmap][filter] 402 static constexpr GLenum filters[2][2][2] = { 403 { 404 // mipmap=off 405 {GL_NEAREST, GL_LINEAR}, // mipmap=nearest 406 {GL_NEAREST, GL_LINEAR}, // mipmap=linear 407 }, 408 { 409 // mipmap=on 410 {GL_NEAREST_MIPMAP_NEAREST, GL_LINEAR_MIPMAP_NEAREST}, // mipmap=nearest 411 {GL_NEAREST_MIPMAP_LINEAR, GL_LINEAR_MIPMAP_LINEAR}, // mipmap=linear 412 }, 413 }; 414 415 GLuint sampler; 416 glGetError(); 417 glGenSamplers(1, &sampler); 418 if (glGetError() != GL_NO_ERROR) 419 { 420 ERROR_LOG("Failed to create sampler: {:X}", sampler); 421 return {}; 422 } 423 424 glSamplerParameteri(sampler, GL_TEXTURE_WRAP_S, ta[static_cast<u8>(config.address_u.GetValue())]); 425 glSamplerParameteri(sampler, GL_TEXTURE_WRAP_T, ta[static_cast<u8>(config.address_v.GetValue())]); 426 glSamplerParameteri(sampler, GL_TEXTURE_WRAP_R, ta[static_cast<u8>(config.address_w.GetValue())]); 427 const u8 mipmap_on_off = (config.min_lod != 0 || config.max_lod != 0); 428 glSamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, 429 filters[mipmap_on_off][static_cast<u8>(config.mip_filter.GetValue())] 430 [static_cast<u8>(config.min_filter.GetValue())]); 431 glSamplerParameteri( 432 sampler, GL_TEXTURE_MAG_FILTER, 433 filters[0][static_cast<u8>(config.mip_filter.GetValue())][static_cast<u8>(config.mag_filter.GetValue())]); 434 glSamplerParameterf(sampler, GL_TEXTURE_MIN_LOD, static_cast<float>(config.min_lod)); 435 glSamplerParameterf(sampler, GL_TEXTURE_MAX_LOD, static_cast<float>(config.max_lod)); 436 glSamplerParameterfv(sampler, GL_TEXTURE_BORDER_COLOR, config.GetBorderFloatColor().data()); 437 if (config.anisotropy > 1) 438 glSamplerParameterf(sampler, GL_TEXTURE_MAX_ANISOTROPY, static_cast<float>(config.anisotropy.GetValue())); 439 440 return std::unique_ptr<GPUSampler>(new OpenGLSampler(sampler)); 441 } 442 443 ////////////////////////////////////////////////////////////////////////// 444 445 void OpenGLDevice::CommitClear(OpenGLTexture* tex) 446 { 447 switch (tex->GetState()) 448 { 449 case GPUTexture::State::Invalidated: 450 { 451 tex->SetState(GPUTexture::State::Dirty); 452 453 if (glInvalidateTexImage) 454 { 455 glInvalidateTexImage(tex->GetGLId(), 0); 456 } 457 else if (glInvalidateFramebuffer) 458 { 459 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); 460 461 const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; 462 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0); 463 464 glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); 465 466 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0); 467 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); 468 } 469 } 470 break; 471 472 case GPUTexture::State::Cleared: 473 { 474 tex->SetState(GPUTexture::State::Dirty); 475 476 if (glClearTexImage) 477 { 478 const auto [gl_internal_format, gl_format, gl_type] = 479 OpenGLTexture::GetPixelFormatMapping(tex->GetFormat(), m_gl_context->IsGLES()); 480 glClearTexImage(tex->GetGLId(), 0, gl_format, gl_type, &tex->GetClearValue()); 481 } 482 else 483 { 484 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_write_fbo); 485 486 const GLenum attachment = tex->IsDepthStencil() ? GL_DEPTH_ATTACHMENT : GL_COLOR_ATTACHMENT0; 487 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, tex->GetGLTarget(), tex->GetGLId(), 0); 488 489 if (tex->IsDepthStencil()) 490 { 491 const float depth = tex->GetClearDepth(); 492 glDisable(GL_SCISSOR_TEST); 493 if (!m_last_depth_state.depth_write) 494 glDepthMask(GL_TRUE); 495 glClearBufferfv(GL_DEPTH, 0, &depth); 496 if (!m_last_depth_state.depth_write) 497 glDepthMask(GL_FALSE); 498 glEnable(GL_SCISSOR_TEST); 499 } 500 else 501 { 502 const auto color = tex->GetUNormClearColor(); 503 glDisable(GL_SCISSOR_TEST); 504 if (m_last_blend_state.write_mask != 0xf) 505 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); 506 glClearBufferfv(GL_COLOR, 0, color.data()); 507 if (m_last_blend_state.write_mask != 0xf) 508 { 509 glColorMask(m_last_blend_state.write_r, m_last_blend_state.write_g, m_last_blend_state.write_b, 510 m_last_blend_state.write_a); 511 } 512 glEnable(GL_SCISSOR_TEST); 513 } 514 515 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, GL_TEXTURE_2D, 0, 0); 516 glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_current_fbo); 517 } 518 } 519 break; 520 521 case GPUTexture::State::Dirty: 522 break; 523 524 default: 525 UnreachableCode(); 526 break; 527 } 528 } 529 530 void OpenGLDevice::CommitRTClearInFB(OpenGLTexture* tex, u32 idx) 531 { 532 switch (tex->GetState()) 533 { 534 case GPUTexture::State::Invalidated: 535 { 536 const GLenum attachment = GL_COLOR_ATTACHMENT0 + idx; 537 glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); 538 tex->SetState(GPUTexture::State::Dirty); 539 } 540 break; 541 542 case GPUTexture::State::Cleared: 543 { 544 const auto color = tex->GetUNormClearColor(); 545 glDisable(GL_SCISSOR_TEST); 546 if (m_last_blend_state.write_mask != 0xf) 547 glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); 548 glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), color.data()); 549 if (m_last_blend_state.write_mask != 0xf) 550 { 551 glColorMask(m_last_blend_state.write_r, m_last_blend_state.write_g, m_last_blend_state.write_b, 552 m_last_blend_state.write_a); 553 } 554 glEnable(GL_SCISSOR_TEST); 555 tex->SetState(GPUTexture::State::Dirty); 556 } 557 break; 558 559 case GPUTexture::State::Dirty: 560 break; 561 562 default: 563 UnreachableCode(); 564 break; 565 } 566 } 567 568 void OpenGLDevice::CommitDSClearInFB(OpenGLTexture* tex) 569 { 570 switch (tex->GetState()) 571 { 572 case GPUTexture::State::Invalidated: 573 { 574 const GLenum attachment = GL_DEPTH_ATTACHMENT; 575 glInvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); 576 tex->SetState(GPUTexture::State::Dirty); 577 } 578 break; 579 580 case GPUTexture::State::Cleared: 581 { 582 const float depth = tex->GetClearDepth(); 583 glDisable(GL_SCISSOR_TEST); 584 if (!m_last_depth_state.depth_write) 585 glDepthMask(GL_TRUE); 586 glClearBufferfv(GL_DEPTH, 0, &depth); 587 if (!m_last_depth_state.depth_write) 588 glDepthMask(GL_FALSE); 589 glEnable(GL_SCISSOR_TEST); 590 tex->SetState(GPUTexture::State::Dirty); 591 } 592 break; 593 594 case GPUTexture::State::Dirty: 595 break; 596 597 default: 598 UnreachableCode(); 599 break; 600 } 601 } 602 603 ////////////////////////////////////////////////////////////////////////// 604 605 OpenGLTextureBuffer::OpenGLTextureBuffer(Format format, u32 size_in_elements, 606 std::unique_ptr<OpenGLStreamBuffer> buffer, GLuint texture_id) 607 : GPUTextureBuffer(format, size_in_elements), m_buffer(std::move(buffer)), m_texture_id(texture_id) 608 { 609 } 610 611 OpenGLTextureBuffer::~OpenGLTextureBuffer() 612 { 613 OpenGLDevice& dev = OpenGLDevice::GetInstance(); 614 if (m_texture_id != 0) 615 { 616 dev.UnbindTexture(m_texture_id); 617 glDeleteTextures(1, &m_texture_id); 618 } 619 else if (dev.GetFeatures().texture_buffers_emulated_with_ssbo && m_buffer) 620 { 621 dev.UnbindSSBO(m_buffer->GetGLBufferId()); 622 } 623 } 624 625 bool OpenGLTextureBuffer::CreateBuffer() 626 { 627 const bool use_ssbo = OpenGLDevice::GetInstance().GetFeatures().texture_buffers_emulated_with_ssbo; 628 629 const GLenum target = (use_ssbo ? GL_SHADER_STORAGE_BUFFER : GL_TEXTURE_BUFFER); 630 m_buffer = OpenGLStreamBuffer::Create(target, GetSizeInBytes()); 631 if (!m_buffer) 632 return false; 633 634 if (!use_ssbo) 635 { 636 glGetError(); 637 glGenTextures(1, &m_texture_id); 638 if (const GLenum err = glGetError(); err != GL_NO_ERROR) 639 { 640 ERROR_LOG("Failed to create texture for buffer: 0x{:X}", err); 641 return false; 642 } 643 644 OpenGLDevice::BindUpdateTextureUnit(); 645 glBindTexture(GL_TEXTURE_BUFFER, m_texture_id); 646 glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, m_buffer->GetGLBufferId()); 647 } 648 649 m_buffer->Unbind(); 650 651 return true; 652 } 653 654 void* OpenGLTextureBuffer::Map(u32 required_elements) 655 { 656 const u32 esize = GetElementSize(m_format); 657 const auto map = m_buffer->Map(esize, esize * required_elements); 658 m_current_position = map.index_aligned; 659 return map.pointer; 660 } 661 662 void OpenGLTextureBuffer::Unmap(u32 used_elements) 663 { 664 const u32 size = used_elements * GetElementSize(m_format); 665 GPUDevice::GetStatistics().buffer_streamed += size; 666 GPUDevice::GetStatistics().num_uploads++; 667 m_buffer->Unmap(size); 668 } 669 670 void OpenGLTextureBuffer::SetDebugName(std::string_view name) 671 { 672 #ifdef _DEBUG 673 if (glObjectLabel) 674 { 675 glObjectLabel(GL_TEXTURE, m_buffer->GetGLBufferId(), static_cast<GLsizei>(name.length()), 676 static_cast<const GLchar*>(name.data())); 677 } 678 #endif 679 } 680 681 std::unique_ptr<GPUTextureBuffer> OpenGLDevice::CreateTextureBuffer(GPUTextureBuffer::Format format, 682 u32 size_in_elements) 683 { 684 const bool use_ssbo = OpenGLDevice::GetInstance().GetFeatures().texture_buffers_emulated_with_ssbo; 685 const u32 buffer_size = GPUTextureBuffer::GetElementSize(format) * size_in_elements; 686 687 if (use_ssbo) 688 { 689 GLint64 max_ssbo_size = 0; 690 glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size); 691 if (static_cast<GLint64>(buffer_size) > max_ssbo_size) 692 { 693 ERROR_LOG("Buffer size of {} not supported, max is {}", buffer_size, max_ssbo_size); 694 return {}; 695 } 696 } 697 698 const GLenum target = (use_ssbo ? GL_SHADER_STORAGE_BUFFER : GL_TEXTURE_BUFFER); 699 std::unique_ptr<OpenGLStreamBuffer> buffer = OpenGLStreamBuffer::Create(target, buffer_size); 700 if (!buffer) 701 return {}; 702 buffer->Unbind(); 703 704 GLuint texture_id = 0; 705 if (!use_ssbo) 706 { 707 glGetError(); 708 glGenTextures(1, &texture_id); 709 if (const GLenum err = glGetError(); err != GL_NO_ERROR) 710 { 711 ERROR_LOG("Failed to create texture for buffer: 0x{:X}", err); 712 return {}; 713 } 714 715 OpenGLDevice::BindUpdateTextureUnit(); 716 glBindTexture(GL_TEXTURE_BUFFER, texture_id); 717 glTexBuffer(GL_TEXTURE_BUFFER, GL_R16UI, buffer->GetGLBufferId()); 718 } 719 720 return std::unique_ptr<GPUTextureBuffer>( 721 new OpenGLTextureBuffer(format, size_in_elements, std::move(buffer), texture_id)); 722 } 723 724 OpenGLDownloadTexture::OpenGLDownloadTexture(u32 width, u32 height, GPUTexture::Format format, bool imported, 725 GLuint buffer_id, u8* cpu_buffer, u32 buffer_size, const u8* map_ptr, 726 u32 map_pitch) 727 : GPUDownloadTexture(width, height, format, imported), m_buffer_id(buffer_id), m_buffer_size(buffer_size), 728 m_cpu_buffer(cpu_buffer) 729 { 730 m_map_pointer = map_ptr; 731 m_current_pitch = map_pitch; 732 } 733 734 OpenGLDownloadTexture::~OpenGLDownloadTexture() 735 { 736 if (m_buffer_id != 0) 737 { 738 if (m_sync) 739 glDeleteSync(m_sync); 740 741 if (m_map_pointer) 742 { 743 glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id); 744 glUnmapBuffer(GL_PIXEL_PACK_BUFFER); 745 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 746 } 747 748 glDeleteBuffers(1, &m_buffer_id); 749 } 750 else if (m_cpu_buffer && !m_is_imported) 751 { 752 Common::AlignedFree(m_cpu_buffer); 753 } 754 } 755 756 std::unique_ptr<OpenGLDownloadTexture> OpenGLDownloadTexture::Create(u32 width, u32 height, GPUTexture::Format format, 757 void* memory, size_t memory_size, u32 memory_pitch) 758 { 759 const u32 buffer_pitch = 760 memory ? memory_pitch : 761 Common::AlignUpPow2(GPUTexture::CalcUploadPitch(format, width), TEXTURE_UPLOAD_PITCH_ALIGNMENT); 762 const u32 buffer_size = memory ? static_cast<u32>(memory_size) : (height * buffer_pitch); 763 764 const bool use_buffer_storage = (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage) && 765 !memory && OpenGLDevice::ShouldUsePBOsForDownloads(); 766 if (use_buffer_storage) 767 { 768 GLuint buffer_id; 769 glGenBuffers(1, &buffer_id); 770 glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_id); 771 772 const u32 flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; 773 const u32 map_flags = GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT; 774 775 if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage) 776 glBufferStorage(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags); 777 else if (GLAD_GL_EXT_buffer_storage) 778 glBufferStorageEXT(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, flags); 779 780 u8* buffer_map = static_cast<u8*>(glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, buffer_size, map_flags)); 781 782 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 783 784 if (!buffer_map) 785 { 786 ERROR_LOG("Failed to map persistent download buffer"); 787 glDeleteBuffers(1, &buffer_id); 788 return {}; 789 } 790 791 return std::unique_ptr<OpenGLDownloadTexture>(new OpenGLDownloadTexture( 792 width, height, format, false, buffer_id, nullptr, buffer_size, buffer_map, buffer_pitch)); 793 } 794 795 // Fallback to glReadPixels() + CPU buffer. 796 const bool imported = (memory != nullptr); 797 u8* cpu_buffer = 798 imported ? static_cast<u8*>(memory) : static_cast<u8*>(Common::AlignedMalloc(buffer_size, VECTOR_ALIGNMENT)); 799 if (!cpu_buffer) 800 return {}; 801 802 return std::unique_ptr<OpenGLDownloadTexture>( 803 new OpenGLDownloadTexture(width, height, format, imported, 0, cpu_buffer, buffer_size, cpu_buffer, buffer_pitch)); 804 } 805 806 void OpenGLDownloadTexture::CopyFromTexture(u32 dst_x, u32 dst_y, GPUTexture* src, u32 src_x, u32 src_y, u32 width, 807 u32 height, u32 src_layer, u32 src_level, bool use_transfer_pitch) 808 { 809 OpenGLTexture* const srcgl = static_cast<OpenGLTexture*>(src); 810 OpenGLDevice& dev = OpenGLDevice::GetInstance(); 811 812 DebugAssert(srcgl->GetFormat() == m_format); 813 DebugAssert(src_level < srcgl->GetLevels()); 814 DebugAssert((src_x + width) <= srcgl->GetMipWidth(src_level) && (src_y + height) <= srcgl->GetMipHeight(src_level)); 815 DebugAssert((dst_x + width) <= m_width && (dst_y + height) <= m_height); 816 DebugAssert((dst_x == 0 && dst_y == 0) || !use_transfer_pitch); 817 DebugAssert(!m_is_imported || !use_transfer_pitch); 818 819 dev.CommitClear(srcgl); 820 821 u32 copy_offset, copy_size, copy_rows; 822 if (!m_is_imported) 823 m_current_pitch = GetTransferPitch(use_transfer_pitch ? width : m_width, TEXTURE_UPLOAD_PITCH_ALIGNMENT); 824 GetTransferSize(dst_x, dst_y, width, height, m_current_pitch, ©_offset, ©_size, ©_rows); 825 dev.GetStatistics().num_downloads++; 826 827 GLint alignment; 828 if (m_current_pitch & 1) 829 alignment = 1; 830 else if (m_current_pitch & 2) 831 alignment = 2; 832 else 833 alignment = 4; 834 835 glPixelStorei(GL_PACK_ALIGNMENT, alignment); 836 glPixelStorei(GL_PACK_ROW_LENGTH, GPUTexture::CalcUploadRowLengthFromPitch(m_format, m_current_pitch)); 837 838 if (!m_cpu_buffer) 839 { 840 // Read to PBO. 841 glBindBuffer(GL_PIXEL_PACK_BUFFER, m_buffer_id); 842 } 843 844 const auto [gl_internal_format, gl_format, gl_type] = 845 OpenGLTexture::GetPixelFormatMapping(srcgl->GetFormat(), dev.IsGLES()); 846 if (GLAD_GL_VERSION_4_5 || GLAD_GL_ARB_get_texture_sub_image) 847 { 848 glGetTextureSubImage(srcgl->GetGLId(), src_level, src_x, src_y, 0, width, height, 1, gl_format, gl_type, 849 m_current_pitch * height, m_cpu_buffer + copy_offset); 850 } 851 else 852 { 853 glBindFramebuffer(GL_READ_FRAMEBUFFER, dev.m_read_fbo); 854 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, srcgl->GetGLId(), 0); 855 856 glReadPixels(src_x, src_y, width, height, gl_format, gl_type, m_cpu_buffer + copy_offset); 857 858 glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); 859 } 860 861 if (m_cpu_buffer) 862 { 863 // If using CPU buffers, we never need to flush. 864 m_needs_flush = false; 865 } 866 else 867 { 868 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 869 870 // Create a sync object so we know when the GPU is done copying. 871 if (m_sync) 872 glDeleteSync(m_sync); 873 874 m_sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); 875 m_needs_flush = true; 876 } 877 878 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 879 } 880 881 bool OpenGLDownloadTexture::Map(u32 x, u32 y, u32 width, u32 height) 882 { 883 // Either always mapped, or CPU buffer. 884 return true; 885 } 886 887 void OpenGLDownloadTexture::Unmap() 888 { 889 // Either always mapped, or CPU buffer. 890 } 891 892 void OpenGLDownloadTexture::Flush() 893 { 894 // If we're using CPU buffers, we did the readback synchronously... 895 if (!m_needs_flush || !m_sync) 896 return; 897 898 m_needs_flush = false; 899 900 glClientWaitSync(m_sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); 901 glDeleteSync(m_sync); 902 m_sync = {}; 903 } 904 905 void OpenGLDownloadTexture::SetDebugName(std::string_view name) 906 { 907 if (name.empty()) 908 return; 909 910 if (glObjectLabel) 911 glObjectLabel(GL_BUFFER, m_buffer_id, static_cast<GLsizei>(name.length()), name.data()); 912 } 913 914 std::unique_ptr<GPUDownloadTexture> OpenGLDevice::CreateDownloadTexture(u32 width, u32 height, 915 GPUTexture::Format format) 916 { 917 return OpenGLDownloadTexture::Create(width, height, format, nullptr, 0, 0); 918 } 919 920 std::unique_ptr<GPUDownloadTexture> OpenGLDevice::CreateDownloadTexture(u32 width, u32 height, 921 GPUTexture::Format format, void* memory, 922 size_t memory_size, u32 memory_stride) 923 { 924 // not _really_ memory importing, but PBOs are broken on Intel.... 925 return OpenGLDownloadTexture::Create(width, height, format, memory, memory_size, memory_stride); 926 }