dma.cpp (34249B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "dma.h" 5 #include "bus.h" 6 #include "cdrom.h" 7 #include "cpu_core.h" 8 #include "gpu.h" 9 #include "imgui.h" 10 #include "interrupt_controller.h" 11 #include "mdec.h" 12 #include "pad.h" 13 #include "spu.h" 14 #include "system.h" 15 16 #include "util/imgui_manager.h" 17 #include "util/state_wrapper.h" 18 19 #include "common/bitfield.h" 20 #include "common/log.h" 21 #include "common/string_util.h" 22 23 #include "fmt/format.h" 24 25 #include <array> 26 #include <memory> 27 #include <vector> 28 29 Log_SetChannel(DMA); 30 31 namespace DMA { 32 namespace { 33 34 enum class SyncMode : u32 35 { 36 Manual = 0, 37 Request = 1, 38 LinkedList = 2, 39 Reserved = 3 40 }; 41 42 static constexpr PhysicalMemoryAddress BASE_ADDRESS_MASK = UINT32_C(0x00FFFFFF); 43 static constexpr PhysicalMemoryAddress TRANSFER_ADDRESS_MASK = UINT32_C(0x00FFFFFC); 44 static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFFFFF); 45 46 static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10; 47 static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5; 48 static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10; 49 50 struct ChannelState 51 { 52 u32 base_address = 0; 53 54 union BlockControl 55 { 56 u32 bits; 57 union 58 { 59 BitField<u32, u32, 0, 16> word_count; 60 61 u32 GetWordCount() const { return (word_count == 0) ? 0x10000 : word_count; } 62 } manual; 63 union 64 { 65 BitField<u32, u32, 0, 16> block_size; 66 BitField<u32, u32, 16, 16> block_count; 67 68 u32 GetBlockSize() const { return (block_size == 0) ? 0x10000 : block_size; } 69 u32 GetBlockCount() const { return (block_count == 0) ? 0x10000 : block_count; } 70 } request; 71 } block_control = {}; 72 73 union ChannelControl 74 { 75 u32 bits; 76 BitField<u32, bool, 0, 1> copy_to_device; 77 BitField<u32, bool, 1, 1> address_step_reverse; 78 BitField<u32, bool, 8, 1> chopping_enable; 79 BitField<u32, SyncMode, 9, 2> sync_mode; 80 BitField<u32, u32, 16, 3> chopping_dma_window_size; 81 BitField<u32, u32, 20, 3> chopping_cpu_window_size; 82 BitField<u32, bool, 24, 1> enable_busy; 83 BitField<u32, bool, 28, 1> start_trigger; 84 85 static constexpr u32 WRITE_MASK = 0b01110001'01110111'00000111'00000011; 86 } channel_control = {}; 87 88 bool request = false; 89 }; 90 91 union DPCRRegister 92 { 93 u32 bits; 94 95 BitField<u32, u8, 0, 3> MDECin_priority; 96 BitField<u32, bool, 3, 1> MDECin_master_enable; 97 BitField<u32, u8, 4, 3> MDECout_priority; 98 BitField<u32, bool, 7, 1> MDECout_master_enable; 99 BitField<u32, u8, 8, 3> GPU_priority; 100 BitField<u32, bool, 10, 1> GPU_master_enable; 101 BitField<u32, u8, 12, 3> CDROM_priority; 102 BitField<u32, bool, 15, 1> CDROM_master_enable; 103 BitField<u32, u8, 16, 3> SPU_priority; 104 BitField<u32, bool, 19, 1> SPU_master_enable; 105 BitField<u32, u8, 20, 3> PIO_priority; 106 BitField<u32, bool, 23, 1> PIO_master_enable; 107 BitField<u32, u8, 24, 3> OTC_priority; 108 BitField<u32, bool, 27, 1> OTC_master_enable; 109 BitField<u32, u8, 28, 3> priority_offset; 110 BitField<u32, bool, 31, 1> unused; 111 112 ALWAYS_INLINE u8 GetPriority(Channel channel) const { return ((bits >> (static_cast<u8>(channel) * 4)) & u32(3)); } 113 ALWAYS_INLINE bool GetMasterEnable(Channel channel) const 114 { 115 return ConvertToBoolUnchecked((bits >> (static_cast<u8>(channel) * 4 + 3)) & u32(1)); 116 } 117 }; 118 119 static constexpr u32 DICR_WRITE_MASK = 0b00000000'11111111'10000000'00111111; 120 static constexpr u32 DICR_RESET_MASK = 0b01111111'00000000'00000000'00000000; 121 union DICRRegister 122 { 123 u32 bits; 124 125 BitField<u32, bool, 15, 1> bus_error; 126 BitField<u32, bool, 16, 1> MDECin_irq_enable; 127 BitField<u32, bool, 17, 1> MDECout_irq_enable; 128 BitField<u32, bool, 18, 1> GPU_irq_enable; 129 BitField<u32, bool, 19, 1> CDROM_irq_enable; 130 BitField<u32, bool, 20, 1> SPU_irq_enable; 131 BitField<u32, bool, 21, 1> PIO_irq_enable; 132 BitField<u32, bool, 22, 1> OTC_irq_enable; 133 BitField<u32, bool, 23, 1> master_enable; 134 BitField<u32, bool, 24, 1> MDECin_irq_flag; 135 BitField<u32, bool, 25, 1> MDECout_irq_flag; 136 BitField<u32, bool, 26, 1> GPU_irq_flag; 137 BitField<u32, bool, 27, 1> CDROM_irq_flag; 138 BitField<u32, bool, 28, 1> SPU_irq_flag; 139 BitField<u32, bool, 29, 1> PIO_irq_flag; 140 BitField<u32, bool, 30, 1> OTC_irq_flag; 141 BitField<u32, bool, 31, 1> master_flag; 142 143 ALWAYS_INLINE bool GetIRQEnabled(Channel channel) const 144 { 145 return ConvertToBoolUnchecked((bits >> (static_cast<u8>(channel) + 16)) & 1u); 146 } 147 148 ALWAYS_INLINE bool GetIRQFlag(Channel channel) const 149 { 150 return ConvertToBoolUnchecked((bits >> (static_cast<u8>(channel) + 24)) & 1u); 151 } 152 153 ALWAYS_INLINE void SetIRQFlag(Channel channel) { bits |= (1u << (static_cast<u8>(channel) + 24)); } 154 155 ALWAYS_INLINE bool ShouldSetIRQFlag(Channel channel) 156 { 157 // bus errors trigger IRQ unconditionally, completion requires the master flag to be enabled 158 return ConvertToBoolUnchecked(((bits >> (static_cast<u8>(channel) + 16)) & ((bits >> 23) & 1u))); 159 } 160 161 ALWAYS_INLINE void UpdateMasterFlag() 162 { 163 master_flag = 164 (((bits & (1u << 15)) != 0u) || // bus error, or 165 (((bits & (1u << 23)) != 0u) != 0u && (bits & (0b1111111u << 24)) != 0u)); // master enable + irq on any channel 166 } 167 }; 168 } // namespace 169 170 static void ClearState(); 171 172 // is everything enabled for a channel to operate? 173 static bool CanTransferChannel(Channel channel, bool ignore_halt); 174 static bool IsTransferHalted(); 175 static void UpdateIRQ(); 176 177 static void HaltTransfer(TickCount duration); 178 static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late); 179 180 template<Channel channel> 181 static bool TransferChannel(); 182 183 static bool IsLinkedListTerminator(PhysicalMemoryAddress address); 184 static bool CheckForBusError(Channel channel, ChannelState& cs, PhysicalMemoryAddress address, u32 size); 185 static void CompleteTransfer(Channel channel, ChannelState& cs); 186 187 // from device -> memory 188 template<Channel channel> 189 static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_count); 190 191 // from memory -> device 192 template<Channel channel> 193 static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count); 194 195 static TickCount GetMaxSliceTicks(TickCount max_slice_size); 196 197 // configuration 198 namespace { 199 struct DMAState 200 { 201 std::vector<u32> transfer_buffer; 202 TimingEvent unhalt_event{"DMA Transfer Unhalt", 1, 1, &DMA::UnhaltTransfer, nullptr}; 203 TickCount halt_ticks_remaining = 0; 204 205 std::array<ChannelState, NUM_CHANNELS> channels; 206 DPCRRegister DPCR = {}; 207 DICRRegister DICR = {}; 208 }; 209 } // namespace 210 211 ALIGN_TO_CACHE_LINE static DMAState s_state; 212 213 static constexpr std::array<bool (*)(), NUM_CHANNELS> s_channel_transfer_functions = {{ 214 &TransferChannel<Channel::MDECin>, 215 &TransferChannel<Channel::MDECout>, 216 &TransferChannel<Channel::GPU>, 217 &TransferChannel<Channel::CDROM>, 218 &TransferChannel<Channel::SPU>, 219 &TransferChannel<Channel::PIO>, 220 &TransferChannel<Channel::OTC>, 221 }}; 222 223 [[maybe_unused]] static constexpr std::array<const char*, NUM_CHANNELS> s_channel_names = { 224 {"MDECin", "MDECout", "GPU", "CDROM", "SPU", "PIO", "OTC"}}; 225 226 }; // namespace DMA 227 228 template<> 229 struct fmt::formatter<DMA::Channel> : fmt::formatter<fmt::string_view> 230 { 231 auto format(DMA::Channel channel, fmt::format_context& ctx) const 232 { 233 return formatter<fmt::string_view>::format(DMA::s_channel_names[static_cast<u32>(channel)], ctx); 234 } 235 }; 236 237 void DMA::Initialize() 238 { 239 s_state.unhalt_event.SetInterval(g_settings.dma_halt_ticks); 240 Reset(); 241 } 242 243 void DMA::Shutdown() 244 { 245 ClearState(); 246 s_state.unhalt_event.Deactivate(); 247 } 248 249 void DMA::Reset() 250 { 251 ClearState(); 252 s_state.unhalt_event.Deactivate(); 253 } 254 255 void DMA::ClearState() 256 { 257 for (u32 i = 0; i < NUM_CHANNELS; i++) 258 { 259 ChannelState& cs = s_state.channels[i]; 260 cs.base_address = 0; 261 cs.block_control.bits = 0; 262 cs.channel_control.bits = 0; 263 cs.request = false; 264 } 265 266 s_state.DPCR.bits = 0x07654321; 267 s_state.DICR.bits = 0; 268 269 s_state.halt_ticks_remaining = 0; 270 } 271 272 bool DMA::DoState(StateWrapper& sw) 273 { 274 sw.Do(&s_state.halt_ticks_remaining); 275 276 for (u32 i = 0; i < NUM_CHANNELS; i++) 277 { 278 ChannelState& cs = s_state.channels[i]; 279 sw.Do(&cs.base_address); 280 sw.Do(&cs.block_control.bits); 281 sw.Do(&cs.channel_control.bits); 282 sw.Do(&cs.request); 283 } 284 285 sw.Do(&s_state.DPCR.bits); 286 sw.Do(&s_state.DICR.bits); 287 288 if (sw.IsReading()) 289 { 290 if (s_state.halt_ticks_remaining > 0) 291 s_state.unhalt_event.SetIntervalAndSchedule(s_state.halt_ticks_remaining); 292 else 293 s_state.unhalt_event.Deactivate(); 294 } 295 296 return !sw.HasError(); 297 } 298 299 u32 DMA::ReadRegister(u32 offset) 300 { 301 const u32 channel_index = offset >> 4; 302 if (channel_index < 7) 303 { 304 switch (offset & UINT32_C(0x0F)) 305 { 306 case 0x00: 307 { 308 TRACE_LOG("DMA[{}] base address -> 0x{:08X}", static_cast<Channel>(channel_index), 309 s_state.channels[channel_index].base_address); 310 return s_state.channels[channel_index].base_address; 311 } 312 case 0x04: 313 { 314 TRACE_LOG("DMA[{}] block control -> 0x{:08X}", static_cast<Channel>(channel_index), 315 s_state.channels[channel_index].block_control.bits); 316 return s_state.channels[channel_index].block_control.bits; 317 } 318 case 0x08: 319 { 320 TRACE_LOG("DMA[{}] channel control -> 0x{:08X}", static_cast<Channel>(channel_index), 321 s_state.channels[channel_index].channel_control.bits); 322 return s_state.channels[channel_index].channel_control.bits; 323 } 324 default: 325 break; 326 } 327 } 328 else 329 { 330 if (offset == 0x70) 331 { 332 TRACE_LOG("DPCR -> 0x{:08X}", s_state.DPCR.bits); 333 return s_state.DPCR.bits; 334 } 335 else if (offset == 0x74) 336 { 337 TRACE_LOG("DICR -> 0x{:08X}", s_state.DICR.bits); 338 return s_state.DICR.bits; 339 } 340 } 341 342 ERROR_LOG("Unhandled register read: {:02X}", offset); 343 return UINT32_C(0xFFFFFFFF); 344 } 345 346 void DMA::WriteRegister(u32 offset, u32 value) 347 { 348 const u32 channel_index = offset >> 4; 349 if (channel_index < 7) 350 { 351 ChannelState& state = s_state.channels[channel_index]; 352 switch (offset & UINT32_C(0x0F)) 353 { 354 case 0x00: 355 { 356 state.base_address = value & BASE_ADDRESS_MASK; 357 TRACE_LOG("DMA channel {} base address <- 0x{:08X}", static_cast<Channel>(channel_index), state.base_address); 358 return; 359 } 360 case 0x04: 361 { 362 TRACE_LOG("DMA channel {} block control <- 0x{:08X}", static_cast<Channel>(channel_index), value); 363 state.block_control.bits = value; 364 return; 365 } 366 367 case 0x08: 368 { 369 // HACK: Due to running DMA in slices, we can't wait for the current halt time to finish before running the 370 // first block of a new channel. This affects games like FF8, where they kick a SPU transfer while a GPU 371 // transfer is happening, and the SPU transfer gets delayed until the GPU transfer unhalts and finishes, and 372 // breaks the interrupt. 373 const bool ignore_halt = !state.channel_control.enable_busy && (value & (1u << 24)); 374 375 state.channel_control.bits = (state.channel_control.bits & ~ChannelState::ChannelControl::WRITE_MASK) | 376 (value & ChannelState::ChannelControl::WRITE_MASK); 377 TRACE_LOG("DMA channel {} channel control <- 0x{:08X}", static_cast<Channel>(channel_index), 378 state.channel_control.bits); 379 380 // start/trigger bit must be enabled for OTC 381 if (static_cast<Channel>(channel_index) == Channel::OTC) 382 SetRequest(static_cast<Channel>(channel_index), state.channel_control.start_trigger); 383 384 if (CanTransferChannel(static_cast<Channel>(channel_index), ignore_halt)) 385 { 386 if (static_cast<Channel>(channel_index) != Channel::OTC && 387 state.channel_control.sync_mode == SyncMode::Manual && state.channel_control.chopping_enable) 388 { 389 // Figure out how roughly many CPU cycles it'll take for the transfer to complete, and delay the transfer. 390 // Needed for Lagnacure Legend, which sets DICR to enable interrupts after CHCR to kickstart the transfer. 391 // This has an artificial 500 cycle cap, setting it too high causes Namco Museum Vol. 4 and a couple of 392 // other games to crash... so clearly something is missing here. 393 const u32 block_words = (1u << state.channel_control.chopping_dma_window_size); 394 const u32 cpu_cycles_per_block = (1u << state.channel_control.chopping_cpu_window_size); 395 const u32 blocks = state.block_control.manual.word_count / block_words; 396 const TickCount delay_cycles = std::min(static_cast<TickCount>(cpu_cycles_per_block * blocks), 500); 397 if (delay_cycles > 1 && true) 398 { 399 DEV_LOG("Delaying {} transfer by {} cycles due to chopping", static_cast<Channel>(channel_index), 400 delay_cycles); 401 HaltTransfer(delay_cycles); 402 } 403 else 404 { 405 s_channel_transfer_functions[channel_index](); 406 } 407 } 408 else 409 { 410 s_channel_transfer_functions[channel_index](); 411 } 412 } 413 return; 414 } 415 416 default: 417 break; 418 } 419 } 420 else 421 { 422 switch (offset) 423 { 424 case 0x70: 425 { 426 TRACE_LOG("DPCR <- 0x{:08X}", value); 427 s_state.DPCR.bits = value; 428 429 for (u32 i = 0; i < NUM_CHANNELS; i++) 430 { 431 if (CanTransferChannel(static_cast<Channel>(i), false)) 432 { 433 if (!s_channel_transfer_functions[i]()) 434 break; 435 } 436 } 437 438 return; 439 } 440 441 case 0x74: 442 { 443 TRACE_LOG("DICR <- 0x{:08X}", value); 444 s_state.DICR.bits = (s_state.DICR.bits & ~DICR_WRITE_MASK) | (value & DICR_WRITE_MASK); 445 s_state.DICR.bits = s_state.DICR.bits & ~(value & DICR_RESET_MASK); 446 UpdateIRQ(); 447 return; 448 } 449 450 default: 451 break; 452 } 453 } 454 455 ERROR_LOG("Unhandled register write: {:02X} <- {:08X}", offset, value); 456 } 457 458 void DMA::SetRequest(Channel channel, bool request) 459 { 460 ChannelState& cs = s_state.channels[static_cast<u32>(channel)]; 461 if (cs.request == request) 462 return; 463 464 cs.request = request; 465 if (CanTransferChannel(channel, false)) 466 s_channel_transfer_functions[static_cast<u32>(channel)](); 467 } 468 469 ALWAYS_INLINE_RELEASE bool DMA::CanTransferChannel(Channel channel, bool ignore_halt) 470 { 471 if (!s_state.DPCR.GetMasterEnable(channel)) 472 return false; 473 474 const ChannelState& cs = s_state.channels[static_cast<u32>(channel)]; 475 if (!cs.channel_control.enable_busy) 476 return false; 477 478 if (cs.channel_control.sync_mode != SyncMode::Manual && (IsTransferHalted() && !ignore_halt)) 479 return false; 480 481 return cs.request; 482 } 483 484 bool DMA::IsTransferHalted() 485 { 486 return s_state.unhalt_event.IsActive(); 487 } 488 489 void DMA::UpdateIRQ() 490 { 491 [[maybe_unused]] const auto old_dicr = s_state.DICR; 492 s_state.DICR.UpdateMasterFlag(); 493 if (!old_dicr.master_flag && s_state.DICR.master_flag) 494 TRACE_LOG("Firing DMA master interrupt"); 495 InterruptController::SetLineState(InterruptController::IRQ::DMA, s_state.DICR.master_flag); 496 } 497 498 ALWAYS_INLINE_RELEASE bool DMA::IsLinkedListTerminator(PhysicalMemoryAddress address) 499 { 500 return ((address & LINKED_LIST_TERMINATOR) == LINKED_LIST_TERMINATOR); 501 } 502 503 ALWAYS_INLINE_RELEASE bool DMA::CheckForBusError(Channel channel, ChannelState& cs, PhysicalMemoryAddress address, 504 u32 size) 505 { 506 // Relying on a transfer partially happening at the end of RAM, then hitting a bus error would be pretty silly. 507 if ((address + size) >= Bus::g_ram_mapped_size) [[unlikely]] 508 { 509 DEBUG_LOG("DMA bus error on channel {} at address 0x{:08X} size {}", channel, address, size); 510 cs.channel_control.enable_busy = false; 511 s_state.DICR.bus_error = true; 512 s_state.DICR.SetIRQFlag(channel); 513 UpdateIRQ(); 514 return true; 515 } 516 517 return false; 518 } 519 520 ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState& cs) 521 { 522 // start/busy bit is cleared on end of transfer 523 DEBUG_LOG("DMA transfer for channel {} complete", channel); 524 cs.channel_control.enable_busy = false; 525 if (s_state.DICR.ShouldSetIRQFlag(channel)) 526 { 527 DEBUG_LOG("Setting DMA interrupt for channel {}", channel); 528 s_state.DICR.SetIRQFlag(channel); 529 UpdateIRQ(); 530 } 531 } 532 533 TickCount DMA::GetMaxSliceTicks(TickCount max_slice_size) 534 { 535 const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : max_slice_size; 536 if (!TimingEvents::IsRunningEvents()) 537 return max; 538 539 const TickCount remaining_in_event_loop = 540 static_cast<TickCount>(TimingEvents::GetEventRunTickCounter() - TimingEvents::GetGlobalTickCounter()); 541 return std::max<TickCount>(max - remaining_in_event_loop, 1); 542 } 543 544 template<DMA::Channel channel> 545 bool DMA::TransferChannel() 546 { 547 ChannelState& cs = s_state.channels[static_cast<u32>(channel)]; 548 549 const bool copy_to_device = cs.channel_control.copy_to_device; 550 551 // start/trigger bit is cleared on beginning of transfer 552 cs.channel_control.start_trigger = false; 553 554 PhysicalMemoryAddress current_address = cs.base_address; 555 const PhysicalMemoryAddress increment = cs.channel_control.address_step_reverse ? static_cast<u32>(-4) : UINT32_C(4); 556 switch (cs.channel_control.sync_mode) 557 { 558 case SyncMode::Manual: 559 { 560 const u32 word_count = cs.block_control.manual.GetWordCount(); 561 DEBUG_LOG("DMA[{}]: Copying {} words {} 0x{:08X}", channel, word_count, copy_to_device ? "from" : "to", 562 current_address); 563 564 const PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK; 565 if (CheckForBusError(channel, cs, transfer_addr, (word_count - 1) * increment)) [[unlikely]] 566 return true; 567 568 TickCount used_ticks; 569 if (copy_to_device) 570 used_ticks = TransferMemoryToDevice<channel>(transfer_addr, increment, word_count); 571 else 572 used_ticks = TransferDeviceToMemory<channel>(transfer_addr, increment, word_count); 573 574 CPU::AddPendingTicks(used_ticks); 575 CompleteTransfer(channel, cs); 576 return true; 577 } 578 579 case SyncMode::LinkedList: 580 { 581 if (!copy_to_device) 582 { 583 Panic("Linked list not implemented for DMA reads"); 584 return true; 585 } 586 587 DEBUG_LOG("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address); 588 589 // Prove to the compiler that nothing's going to modify these. 590 const u8* const ram_ptr = Bus::g_ram; 591 const u32 mask = Bus::g_ram_mask; 592 593 const TickCount slice_ticks = GetMaxSliceTicks(g_settings.dma_max_slice_ticks); 594 TickCount remaining_ticks = slice_ticks; 595 while (cs.request && remaining_ticks > 0) 596 { 597 u32 header; 598 PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK; 599 if (CheckForBusError(channel, cs, transfer_addr, sizeof(header))) [[unlikely]] 600 { 601 cs.base_address = current_address; 602 return true; 603 } 604 605 std::memcpy(&header, &ram_ptr[transfer_addr & mask], sizeof(header)); 606 const u32 word_count = header >> 24; 607 const u32 next_address = header & 0x00FFFFFFu; 608 TRACE_LOG(" .. linked list entry at 0x{:08X} size={}({} words) next=0x{:08X}", current_address, word_count * 4, 609 word_count, next_address); 610 611 const TickCount setup_ticks = (word_count > 0) ? 612 (LINKED_LIST_HEADER_READ_TICKS + LINKED_LIST_BLOCK_SETUP_TICKS) : 613 LINKED_LIST_HEADER_READ_TICKS; 614 CPU::AddPendingTicks(setup_ticks); 615 remaining_ticks -= setup_ticks; 616 617 if (word_count > 0) 618 { 619 if (CheckForBusError(channel, cs, transfer_addr, (word_count - 1) * increment)) [[unlikely]] 620 { 621 cs.base_address = current_address; 622 return true; 623 } 624 625 const TickCount block_ticks = TransferMemoryToDevice<channel>(transfer_addr + sizeof(header), 4, word_count); 626 CPU::AddPendingTicks(block_ticks); 627 remaining_ticks -= block_ticks; 628 } 629 630 current_address = next_address; 631 if (IsLinkedListTerminator(current_address)) 632 { 633 // Terminator is 24 bits, so is MADR, so it'll always be 0xFFFFFF. 634 cs.base_address = LINKED_LIST_TERMINATOR; 635 CompleteTransfer(channel, cs); 636 return true; 637 } 638 } 639 640 cs.base_address = current_address; 641 if (cs.request) 642 { 643 // stall the transfer for a bit if we ran for too long 644 HaltTransfer(g_settings.dma_halt_ticks); 645 return false; 646 } 647 else 648 { 649 // linked list not yet complete 650 return true; 651 } 652 } 653 654 case SyncMode::Request: 655 { 656 DEBUG_LOG("DMA[{}]: Copying {} blocks of size {} ({} total words) {} 0x{:08X}", channel, 657 cs.block_control.request.GetBlockCount(), cs.block_control.request.GetBlockSize(), 658 cs.block_control.request.GetBlockCount() * cs.block_control.request.GetBlockSize(), 659 copy_to_device ? "from" : "to", current_address); 660 661 const u32 block_size = cs.block_control.request.GetBlockSize(); 662 u32 blocks_remaining = cs.block_control.request.GetBlockCount(); 663 TickCount ticks_remaining = GetMaxSliceTicks(g_settings.dma_max_slice_ticks); 664 665 if (copy_to_device) 666 { 667 do 668 { 669 const PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK; 670 if (CheckForBusError(channel, cs, transfer_addr, (block_size - 1) * increment)) [[unlikely]] 671 { 672 cs.base_address = current_address; 673 cs.block_control.request.block_count = blocks_remaining; 674 return true; 675 } 676 677 const TickCount ticks = TransferMemoryToDevice<channel>(transfer_addr, increment, block_size); 678 CPU::AddPendingTicks(ticks); 679 680 ticks_remaining -= ticks; 681 blocks_remaining--; 682 683 current_address = (transfer_addr + (increment * block_size)); 684 } while (cs.request && blocks_remaining > 0 && ticks_remaining > 0); 685 } 686 else 687 { 688 do 689 { 690 const PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK; 691 if (CheckForBusError(channel, cs, transfer_addr, (block_size - 1) * increment)) [[unlikely]] 692 { 693 cs.base_address = current_address; 694 cs.block_control.request.block_count = blocks_remaining; 695 return true; 696 } 697 698 const TickCount ticks = TransferDeviceToMemory<channel>(transfer_addr, increment, block_size); 699 CPU::AddPendingTicks(ticks); 700 701 ticks_remaining -= ticks; 702 blocks_remaining--; 703 704 current_address = (transfer_addr + (increment * block_size)); 705 } while (cs.request && blocks_remaining > 0 && ticks_remaining > 0); 706 } 707 708 cs.base_address = current_address; 709 cs.block_control.request.block_count = blocks_remaining; 710 711 // finish transfer later if the request was cleared 712 if (blocks_remaining > 0) 713 { 714 if (cs.request) 715 { 716 // we got halted 717 if (!s_state.unhalt_event.IsActive()) 718 HaltTransfer(g_settings.dma_halt_ticks); 719 720 return false; 721 } 722 723 return true; 724 } 725 726 CompleteTransfer(channel, cs); 727 return true; 728 } 729 730 default: 731 Panic("Unimplemented sync mode"); 732 } 733 734 UnreachableCode(); 735 } 736 737 void DMA::HaltTransfer(TickCount duration) 738 { 739 s_state.halt_ticks_remaining += duration; 740 DEBUG_LOG("Halting DMA for {} ticks", s_state.halt_ticks_remaining); 741 if (s_state.unhalt_event.IsActive()) 742 return; 743 744 DebugAssert(!s_state.unhalt_event.IsActive()); 745 s_state.unhalt_event.SetIntervalAndSchedule(s_state.halt_ticks_remaining); 746 } 747 748 void DMA::UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late) 749 { 750 DEBUG_LOG("Resuming DMA after {} ticks, {} ticks late", ticks, -(s_state.halt_ticks_remaining - ticks)); 751 s_state.halt_ticks_remaining -= ticks; 752 s_state.unhalt_event.Deactivate(); 753 754 // TODO: Use channel priority. But doing it in ascending order is probably good enough. 755 // Main thing is that OTC happens after GPU, because otherwise it'll wipe out the LL. 756 for (u32 i = 0; i < NUM_CHANNELS; i++) 757 { 758 if (CanTransferChannel(static_cast<Channel>(i), false)) 759 { 760 if (!s_channel_transfer_functions[i]()) 761 return; 762 } 763 } 764 765 // We didn't run too long, so reset timer. 766 s_state.halt_ticks_remaining = 0; 767 } 768 769 template<DMA::Channel channel> 770 TickCount DMA::TransferMemoryToDevice(u32 address, u32 increment, u32 word_count) 771 { 772 const u32 mask = Bus::g_ram_mask; 773 #ifdef _DEBUG 774 if ((address & mask) != address) 775 DEBUG_LOG("DMA TO {} from masked RAM address 0x{:08X} => 0x{:08X}", channel, address, (address & mask)); 776 #endif 777 778 address &= mask; 779 780 const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address); 781 if constexpr (channel != Channel::GPU) 782 { 783 if (static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address) [[unlikely]] 784 { 785 // Use temp buffer if it's wrapping around 786 if (s_state.transfer_buffer.size() < word_count) 787 s_state.transfer_buffer.resize(word_count); 788 src_pointer = s_state.transfer_buffer.data(); 789 790 u8* ram_pointer = Bus::g_ram; 791 for (u32 i = 0; i < word_count; i++) 792 { 793 std::memcpy(&s_state.transfer_buffer[i], &ram_pointer[address], sizeof(u32)); 794 address = (address + increment) & mask; 795 } 796 } 797 } 798 799 switch (channel) 800 { 801 case Channel::GPU: 802 { 803 if (g_gpu->BeginDMAWrite()) [[likely]] 804 { 805 u8* ram_pointer = Bus::g_ram; 806 for (u32 i = 0; i < word_count; i++) 807 { 808 u32 value; 809 std::memcpy(&value, &ram_pointer[address], sizeof(u32)); 810 g_gpu->DMAWrite(address, value); 811 address = (address + increment) & mask; 812 } 813 g_gpu->EndDMAWrite(); 814 } 815 } 816 break; 817 818 case Channel::SPU: 819 SPU::DMAWrite(src_pointer, word_count); 820 break; 821 822 case Channel::MDECin: 823 MDEC::DMAWrite(src_pointer, word_count); 824 break; 825 826 case Channel::CDROM: 827 case Channel::MDECout: 828 case Channel::PIO: 829 default: 830 ERROR_LOG("Unhandled DMA channel {} for device write", static_cast<u32>(channel)); 831 break; 832 } 833 834 return Bus::GetDMARAMTickCount(word_count); 835 } 836 837 template<DMA::Channel channel> 838 TickCount DMA::TransferDeviceToMemory(u32 address, u32 increment, u32 word_count) 839 { 840 const u32 mask = Bus::g_ram_mask; 841 #ifdef _DEBUG 842 if ((address & mask) != address) 843 DEBUG_LOG("DMA FROM {} to masked RAM address 0x{:08X} => 0x{:08X}", channel, address, (address & mask)); 844 #endif 845 846 // TODO: This might not be correct for OTC. 847 address &= mask; 848 849 if constexpr (channel == Channel::OTC) 850 { 851 // clear ordering table 852 u8* ram_pointer = Bus::g_ram; 853 const u32 word_count_less_1 = word_count - 1; 854 for (u32 i = 0; i < word_count_less_1; i++) 855 { 856 u32 next = ((address - 4) & mask); 857 std::memcpy(&ram_pointer[address], &next, sizeof(next)); 858 address = next; 859 } 860 861 const u32 terminator = UINT32_C(0xFFFFFF); 862 std::memcpy(&ram_pointer[address], &terminator, sizeof(terminator)); 863 return Bus::GetDMARAMTickCount(word_count); 864 } 865 866 u32* dest_pointer = reinterpret_cast<u32*>(&Bus::g_ram[address]); 867 if (static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address) [[unlikely]] 868 { 869 // Use temp buffer if it's wrapping around 870 if (s_state.transfer_buffer.size() < word_count) 871 s_state.transfer_buffer.resize(word_count); 872 dest_pointer = s_state.transfer_buffer.data(); 873 } 874 875 // Read from device. 876 switch (channel) 877 { 878 case Channel::GPU: 879 g_gpu->DMARead(dest_pointer, word_count); 880 break; 881 882 case Channel::CDROM: 883 CDROM::DMARead(dest_pointer, word_count); 884 break; 885 886 case Channel::SPU: 887 SPU::DMARead(dest_pointer, word_count); 888 break; 889 890 case Channel::MDECout: 891 MDEC::DMARead(dest_pointer, word_count); 892 break; 893 894 default: 895 ERROR_LOG("Unhandled DMA channel {} for device read", static_cast<u32>(channel)); 896 std::fill_n(dest_pointer, word_count, UINT32_C(0xFFFFFFFF)); 897 break; 898 } 899 900 if (dest_pointer == s_state.transfer_buffer.data()) [[unlikely]] 901 { 902 u8* ram_pointer = Bus::g_ram; 903 for (u32 i = 0; i < word_count; i++) 904 { 905 std::memcpy(&ram_pointer[address], &s_state.transfer_buffer[i], sizeof(u32)); 906 address = (address + increment) & mask; 907 } 908 } 909 910 return Bus::GetDMARAMTickCount(word_count); 911 } 912 913 void DMA::DrawDebugStateWindow() 914 { 915 static constexpr u32 NUM_COLUMNS = 10; 916 static constexpr std::array<const char*, NUM_COLUMNS> column_names = { 917 {"#", "Req", "Direction", "Chopping", "Mode", "Busy", "Enable", "Priority", "IRQ", "Flag"}}; 918 static constexpr std::array<const char*, 4> sync_mode_names = {{"Manual", "Request", "LinkedList", "Reserved"}}; 919 920 const float framebuffer_scale = ImGuiManager::GetGlobalScale(); 921 922 ImGui::SetNextWindowSize(ImVec2(850.0f * framebuffer_scale, 250.0f * framebuffer_scale), ImGuiCond_FirstUseEver); 923 if (!ImGui::Begin("DMA State", nullptr)) 924 { 925 ImGui::End(); 926 return; 927 } 928 929 ImGui::Columns(NUM_COLUMNS); 930 ImGui::SetColumnWidth(0, 100.0f * framebuffer_scale); 931 ImGui::SetColumnWidth(1, 50.0f * framebuffer_scale); 932 ImGui::SetColumnWidth(2, 100.0f * framebuffer_scale); 933 ImGui::SetColumnWidth(3, 150.0f * framebuffer_scale); 934 ImGui::SetColumnWidth(4, 80.0f * framebuffer_scale); 935 ImGui::SetColumnWidth(5, 80.0f * framebuffer_scale); 936 ImGui::SetColumnWidth(6, 80.0f * framebuffer_scale); 937 ImGui::SetColumnWidth(7, 80.0f * framebuffer_scale); 938 ImGui::SetColumnWidth(8, 80.0f * framebuffer_scale); 939 ImGui::SetColumnWidth(9, 80.0f * framebuffer_scale); 940 941 for (const char* title : column_names) 942 { 943 ImGui::TextUnformatted(title); 944 ImGui::NextColumn(); 945 } 946 947 const ImVec4 active(1.0f, 1.0f, 1.0f, 1.0f); 948 const ImVec4 inactive(0.5f, 0.5f, 0.5f, 1.0f); 949 950 for (u32 i = 0; i < NUM_CHANNELS; i++) 951 { 952 const ChannelState& cs = s_state.channels[i]; 953 954 ImGui::TextColored(cs.channel_control.enable_busy ? active : inactive, "%u[%s]", i, s_channel_names[i]); 955 ImGui::NextColumn(); 956 ImGui::TextColored(cs.request ? active : inactive, cs.request ? "Yes" : "No"); 957 ImGui::NextColumn(); 958 ImGui::Text("%s%s", cs.channel_control.copy_to_device ? "FromRAM" : "ToRAM", 959 cs.channel_control.address_step_reverse ? " Addr+" : " Addr-"); 960 ImGui::NextColumn(); 961 ImGui::TextColored(cs.channel_control.chopping_enable ? active : inactive, "%s/%u/%u", 962 cs.channel_control.chopping_enable ? "Yes" : "No", 963 cs.channel_control.chopping_cpu_window_size.GetValue(), 964 cs.channel_control.chopping_dma_window_size.GetValue()); 965 ImGui::NextColumn(); 966 ImGui::Text("%s", sync_mode_names[static_cast<u8>(cs.channel_control.sync_mode.GetValue())]); 967 ImGui::NextColumn(); 968 ImGui::TextColored(cs.channel_control.enable_busy ? active : inactive, "%s%s", 969 cs.channel_control.enable_busy ? "Busy" : "Idle", 970 cs.channel_control.start_trigger ? " (Trigger)" : ""); 971 ImGui::NextColumn(); 972 ImGui::TextColored(s_state.DPCR.GetMasterEnable(static_cast<Channel>(i)) ? active : inactive, 973 s_state.DPCR.GetMasterEnable(static_cast<Channel>(i)) ? "Enabled" : "Disabled"); 974 ImGui::NextColumn(); 975 ImGui::TextColored(s_state.DPCR.GetMasterEnable(static_cast<Channel>(i)) ? active : inactive, "%u", 976 s_state.DPCR.GetPriority(static_cast<Channel>(i))); 977 ImGui::NextColumn(); 978 ImGui::TextColored(s_state.DICR.GetIRQEnabled(static_cast<Channel>(i)) ? active : inactive, 979 s_state.DICR.GetIRQEnabled(static_cast<Channel>(i)) ? "Enabled" : "Disabled"); 980 ImGui::NextColumn(); 981 ImGui::TextColored(s_state.DICR.GetIRQFlag(static_cast<Channel>(i)) ? active : inactive, 982 s_state.DICR.GetIRQFlag(static_cast<Channel>(i)) ? "IRQ" : ""); 983 ImGui::NextColumn(); 984 } 985 986 ImGui::Columns(1); 987 ImGui::End(); 988 } 989 990 // Instantiate channel functions. 991 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count); 992 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count); 993 template bool DMA::TransferChannel<DMA::Channel::MDECin>(); 994 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count); 995 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count); 996 template bool DMA::TransferChannel<DMA::Channel::MDECout>(); 997 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count); 998 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count); 999 template bool DMA::TransferChannel<DMA::Channel::GPU>(); 1000 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count); 1001 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count); 1002 template bool DMA::TransferChannel<DMA::Channel::CDROM>(); 1003 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count); 1004 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count); 1005 template bool DMA::TransferChannel<DMA::Channel::SPU>(); 1006 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count); 1007 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count); 1008 template bool DMA::TransferChannel<DMA::Channel::PIO>(); 1009 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count); 1010 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count); 1011 template bool DMA::TransferChannel<DMA::Channel::OTC>();