duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

dma.cpp (34249B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "dma.h"
      5 #include "bus.h"
      6 #include "cdrom.h"
      7 #include "cpu_core.h"
      8 #include "gpu.h"
      9 #include "imgui.h"
     10 #include "interrupt_controller.h"
     11 #include "mdec.h"
     12 #include "pad.h"
     13 #include "spu.h"
     14 #include "system.h"
     15 
     16 #include "util/imgui_manager.h"
     17 #include "util/state_wrapper.h"
     18 
     19 #include "common/bitfield.h"
     20 #include "common/log.h"
     21 #include "common/string_util.h"
     22 
     23 #include "fmt/format.h"
     24 
     25 #include <array>
     26 #include <memory>
     27 #include <vector>
     28 
     29 Log_SetChannel(DMA);
     30 
     31 namespace DMA {
     32 namespace {
     33 
     34 enum class SyncMode : u32
     35 {
     36   Manual = 0,
     37   Request = 1,
     38   LinkedList = 2,
     39   Reserved = 3
     40 };
     41 
     42 static constexpr PhysicalMemoryAddress BASE_ADDRESS_MASK = UINT32_C(0x00FFFFFF);
     43 static constexpr PhysicalMemoryAddress TRANSFER_ADDRESS_MASK = UINT32_C(0x00FFFFFC);
     44 static constexpr PhysicalMemoryAddress LINKED_LIST_TERMINATOR = UINT32_C(0x00FFFFFF);
     45 
     46 static constexpr TickCount LINKED_LIST_HEADER_READ_TICKS = 10;
     47 static constexpr TickCount LINKED_LIST_BLOCK_SETUP_TICKS = 5;
     48 static constexpr TickCount SLICE_SIZE_WHEN_TRANSMITTING_PAD = 10;
     49 
     50 struct ChannelState
     51 {
     52   u32 base_address = 0;
     53 
     54   union BlockControl
     55   {
     56     u32 bits;
     57     union
     58     {
     59       BitField<u32, u32, 0, 16> word_count;
     60 
     61       u32 GetWordCount() const { return (word_count == 0) ? 0x10000 : word_count; }
     62     } manual;
     63     union
     64     {
     65       BitField<u32, u32, 0, 16> block_size;
     66       BitField<u32, u32, 16, 16> block_count;
     67 
     68       u32 GetBlockSize() const { return (block_size == 0) ? 0x10000 : block_size; }
     69       u32 GetBlockCount() const { return (block_count == 0) ? 0x10000 : block_count; }
     70     } request;
     71   } block_control = {};
     72 
     73   union ChannelControl
     74   {
     75     u32 bits;
     76     BitField<u32, bool, 0, 1> copy_to_device;
     77     BitField<u32, bool, 1, 1> address_step_reverse;
     78     BitField<u32, bool, 8, 1> chopping_enable;
     79     BitField<u32, SyncMode, 9, 2> sync_mode;
     80     BitField<u32, u32, 16, 3> chopping_dma_window_size;
     81     BitField<u32, u32, 20, 3> chopping_cpu_window_size;
     82     BitField<u32, bool, 24, 1> enable_busy;
     83     BitField<u32, bool, 28, 1> start_trigger;
     84 
     85     static constexpr u32 WRITE_MASK = 0b01110001'01110111'00000111'00000011;
     86   } channel_control = {};
     87 
     88   bool request = false;
     89 };
     90 
     91 union DPCRRegister
     92 {
     93   u32 bits;
     94 
     95   BitField<u32, u8, 0, 3> MDECin_priority;
     96   BitField<u32, bool, 3, 1> MDECin_master_enable;
     97   BitField<u32, u8, 4, 3> MDECout_priority;
     98   BitField<u32, bool, 7, 1> MDECout_master_enable;
     99   BitField<u32, u8, 8, 3> GPU_priority;
    100   BitField<u32, bool, 10, 1> GPU_master_enable;
    101   BitField<u32, u8, 12, 3> CDROM_priority;
    102   BitField<u32, bool, 15, 1> CDROM_master_enable;
    103   BitField<u32, u8, 16, 3> SPU_priority;
    104   BitField<u32, bool, 19, 1> SPU_master_enable;
    105   BitField<u32, u8, 20, 3> PIO_priority;
    106   BitField<u32, bool, 23, 1> PIO_master_enable;
    107   BitField<u32, u8, 24, 3> OTC_priority;
    108   BitField<u32, bool, 27, 1> OTC_master_enable;
    109   BitField<u32, u8, 28, 3> priority_offset;
    110   BitField<u32, bool, 31, 1> unused;
    111 
    112   ALWAYS_INLINE u8 GetPriority(Channel channel) const { return ((bits >> (static_cast<u8>(channel) * 4)) & u32(3)); }
    113   ALWAYS_INLINE bool GetMasterEnable(Channel channel) const
    114   {
    115     return ConvertToBoolUnchecked((bits >> (static_cast<u8>(channel) * 4 + 3)) & u32(1));
    116   }
    117 };
    118 
    119 static constexpr u32 DICR_WRITE_MASK = 0b00000000'11111111'10000000'00111111;
    120 static constexpr u32 DICR_RESET_MASK = 0b01111111'00000000'00000000'00000000;
    121 union DICRRegister
    122 {
    123   u32 bits;
    124 
    125   BitField<u32, bool, 15, 1> bus_error;
    126   BitField<u32, bool, 16, 1> MDECin_irq_enable;
    127   BitField<u32, bool, 17, 1> MDECout_irq_enable;
    128   BitField<u32, bool, 18, 1> GPU_irq_enable;
    129   BitField<u32, bool, 19, 1> CDROM_irq_enable;
    130   BitField<u32, bool, 20, 1> SPU_irq_enable;
    131   BitField<u32, bool, 21, 1> PIO_irq_enable;
    132   BitField<u32, bool, 22, 1> OTC_irq_enable;
    133   BitField<u32, bool, 23, 1> master_enable;
    134   BitField<u32, bool, 24, 1> MDECin_irq_flag;
    135   BitField<u32, bool, 25, 1> MDECout_irq_flag;
    136   BitField<u32, bool, 26, 1> GPU_irq_flag;
    137   BitField<u32, bool, 27, 1> CDROM_irq_flag;
    138   BitField<u32, bool, 28, 1> SPU_irq_flag;
    139   BitField<u32, bool, 29, 1> PIO_irq_flag;
    140   BitField<u32, bool, 30, 1> OTC_irq_flag;
    141   BitField<u32, bool, 31, 1> master_flag;
    142 
    143   ALWAYS_INLINE bool GetIRQEnabled(Channel channel) const
    144   {
    145     return ConvertToBoolUnchecked((bits >> (static_cast<u8>(channel) + 16)) & 1u);
    146   }
    147 
    148   ALWAYS_INLINE bool GetIRQFlag(Channel channel) const
    149   {
    150     return ConvertToBoolUnchecked((bits >> (static_cast<u8>(channel) + 24)) & 1u);
    151   }
    152 
    153   ALWAYS_INLINE void SetIRQFlag(Channel channel) { bits |= (1u << (static_cast<u8>(channel) + 24)); }
    154 
    155   ALWAYS_INLINE bool ShouldSetIRQFlag(Channel channel)
    156   {
    157     // bus errors trigger IRQ unconditionally, completion requires the master flag to be enabled
    158     return ConvertToBoolUnchecked(((bits >> (static_cast<u8>(channel) + 16)) & ((bits >> 23) & 1u)));
    159   }
    160 
    161   ALWAYS_INLINE void UpdateMasterFlag()
    162   {
    163     master_flag =
    164       (((bits & (1u << 15)) != 0u) ||                                             // bus error, or
    165        (((bits & (1u << 23)) != 0u) != 0u && (bits & (0b1111111u << 24)) != 0u)); // master enable + irq on any channel
    166   }
    167 };
    168 } // namespace
    169 
    170 static void ClearState();
    171 
    172 // is everything enabled for a channel to operate?
    173 static bool CanTransferChannel(Channel channel, bool ignore_halt);
    174 static bool IsTransferHalted();
    175 static void UpdateIRQ();
    176 
    177 static void HaltTransfer(TickCount duration);
    178 static void UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late);
    179 
    180 template<Channel channel>
    181 static bool TransferChannel();
    182 
    183 static bool IsLinkedListTerminator(PhysicalMemoryAddress address);
    184 static bool CheckForBusError(Channel channel, ChannelState& cs, PhysicalMemoryAddress address, u32 size);
    185 static void CompleteTransfer(Channel channel, ChannelState& cs);
    186 
    187 // from device -> memory
    188 template<Channel channel>
    189 static TickCount TransferDeviceToMemory(u32 address, u32 increment, u32 word_count);
    190 
    191 // from memory -> device
    192 template<Channel channel>
    193 static TickCount TransferMemoryToDevice(u32 address, u32 increment, u32 word_count);
    194 
    195 static TickCount GetMaxSliceTicks(TickCount max_slice_size);
    196 
    197 // configuration
    198 namespace {
    199 struct DMAState
    200 {
    201   std::vector<u32> transfer_buffer;
    202   TimingEvent unhalt_event{"DMA Transfer Unhalt", 1, 1, &DMA::UnhaltTransfer, nullptr};
    203   TickCount halt_ticks_remaining = 0;
    204 
    205   std::array<ChannelState, NUM_CHANNELS> channels;
    206   DPCRRegister DPCR = {};
    207   DICRRegister DICR = {};
    208 };
    209 } // namespace
    210 
    211 ALIGN_TO_CACHE_LINE static DMAState s_state;
    212 
    213 static constexpr std::array<bool (*)(), NUM_CHANNELS> s_channel_transfer_functions = {{
    214   &TransferChannel<Channel::MDECin>,
    215   &TransferChannel<Channel::MDECout>,
    216   &TransferChannel<Channel::GPU>,
    217   &TransferChannel<Channel::CDROM>,
    218   &TransferChannel<Channel::SPU>,
    219   &TransferChannel<Channel::PIO>,
    220   &TransferChannel<Channel::OTC>,
    221 }};
    222 
    223 [[maybe_unused]] static constexpr std::array<const char*, NUM_CHANNELS> s_channel_names = {
    224   {"MDECin", "MDECout", "GPU", "CDROM", "SPU", "PIO", "OTC"}};
    225 
    226 }; // namespace DMA
    227 
    228 template<>
    229 struct fmt::formatter<DMA::Channel> : fmt::formatter<fmt::string_view>
    230 {
    231   auto format(DMA::Channel channel, fmt::format_context& ctx) const
    232   {
    233     return formatter<fmt::string_view>::format(DMA::s_channel_names[static_cast<u32>(channel)], ctx);
    234   }
    235 };
    236 
    237 void DMA::Initialize()
    238 {
    239   s_state.unhalt_event.SetInterval(g_settings.dma_halt_ticks);
    240   Reset();
    241 }
    242 
    243 void DMA::Shutdown()
    244 {
    245   ClearState();
    246   s_state.unhalt_event.Deactivate();
    247 }
    248 
    249 void DMA::Reset()
    250 {
    251   ClearState();
    252   s_state.unhalt_event.Deactivate();
    253 }
    254 
    255 void DMA::ClearState()
    256 {
    257   for (u32 i = 0; i < NUM_CHANNELS; i++)
    258   {
    259     ChannelState& cs = s_state.channels[i];
    260     cs.base_address = 0;
    261     cs.block_control.bits = 0;
    262     cs.channel_control.bits = 0;
    263     cs.request = false;
    264   }
    265 
    266   s_state.DPCR.bits = 0x07654321;
    267   s_state.DICR.bits = 0;
    268 
    269   s_state.halt_ticks_remaining = 0;
    270 }
    271 
    272 bool DMA::DoState(StateWrapper& sw)
    273 {
    274   sw.Do(&s_state.halt_ticks_remaining);
    275 
    276   for (u32 i = 0; i < NUM_CHANNELS; i++)
    277   {
    278     ChannelState& cs = s_state.channels[i];
    279     sw.Do(&cs.base_address);
    280     sw.Do(&cs.block_control.bits);
    281     sw.Do(&cs.channel_control.bits);
    282     sw.Do(&cs.request);
    283   }
    284 
    285   sw.Do(&s_state.DPCR.bits);
    286   sw.Do(&s_state.DICR.bits);
    287 
    288   if (sw.IsReading())
    289   {
    290     if (s_state.halt_ticks_remaining > 0)
    291       s_state.unhalt_event.SetIntervalAndSchedule(s_state.halt_ticks_remaining);
    292     else
    293       s_state.unhalt_event.Deactivate();
    294   }
    295 
    296   return !sw.HasError();
    297 }
    298 
    299 u32 DMA::ReadRegister(u32 offset)
    300 {
    301   const u32 channel_index = offset >> 4;
    302   if (channel_index < 7)
    303   {
    304     switch (offset & UINT32_C(0x0F))
    305     {
    306       case 0x00:
    307       {
    308         TRACE_LOG("DMA[{}] base address -> 0x{:08X}", static_cast<Channel>(channel_index),
    309                   s_state.channels[channel_index].base_address);
    310         return s_state.channels[channel_index].base_address;
    311       }
    312       case 0x04:
    313       {
    314         TRACE_LOG("DMA[{}] block control -> 0x{:08X}", static_cast<Channel>(channel_index),
    315                   s_state.channels[channel_index].block_control.bits);
    316         return s_state.channels[channel_index].block_control.bits;
    317       }
    318       case 0x08:
    319       {
    320         TRACE_LOG("DMA[{}] channel control -> 0x{:08X}", static_cast<Channel>(channel_index),
    321                   s_state.channels[channel_index].channel_control.bits);
    322         return s_state.channels[channel_index].channel_control.bits;
    323       }
    324       default:
    325         break;
    326     }
    327   }
    328   else
    329   {
    330     if (offset == 0x70)
    331     {
    332       TRACE_LOG("DPCR -> 0x{:08X}", s_state.DPCR.bits);
    333       return s_state.DPCR.bits;
    334     }
    335     else if (offset == 0x74)
    336     {
    337       TRACE_LOG("DICR -> 0x{:08X}", s_state.DICR.bits);
    338       return s_state.DICR.bits;
    339     }
    340   }
    341 
    342   ERROR_LOG("Unhandled register read: {:02X}", offset);
    343   return UINT32_C(0xFFFFFFFF);
    344 }
    345 
    346 void DMA::WriteRegister(u32 offset, u32 value)
    347 {
    348   const u32 channel_index = offset >> 4;
    349   if (channel_index < 7)
    350   {
    351     ChannelState& state = s_state.channels[channel_index];
    352     switch (offset & UINT32_C(0x0F))
    353     {
    354       case 0x00:
    355       {
    356         state.base_address = value & BASE_ADDRESS_MASK;
    357         TRACE_LOG("DMA channel {} base address <- 0x{:08X}", static_cast<Channel>(channel_index), state.base_address);
    358         return;
    359       }
    360       case 0x04:
    361       {
    362         TRACE_LOG("DMA channel {} block control <- 0x{:08X}", static_cast<Channel>(channel_index), value);
    363         state.block_control.bits = value;
    364         return;
    365       }
    366 
    367       case 0x08:
    368       {
    369         // HACK: Due to running DMA in slices, we can't wait for the current halt time to finish before running the
    370         // first block of a new channel. This affects games like FF8, where they kick a SPU transfer while a GPU
    371         // transfer is happening, and the SPU transfer gets delayed until the GPU transfer unhalts and finishes, and
    372         // breaks the interrupt.
    373         const bool ignore_halt = !state.channel_control.enable_busy && (value & (1u << 24));
    374 
    375         state.channel_control.bits = (state.channel_control.bits & ~ChannelState::ChannelControl::WRITE_MASK) |
    376                                      (value & ChannelState::ChannelControl::WRITE_MASK);
    377         TRACE_LOG("DMA channel {} channel control <- 0x{:08X}", static_cast<Channel>(channel_index),
    378                   state.channel_control.bits);
    379 
    380         // start/trigger bit must be enabled for OTC
    381         if (static_cast<Channel>(channel_index) == Channel::OTC)
    382           SetRequest(static_cast<Channel>(channel_index), state.channel_control.start_trigger);
    383 
    384         if (CanTransferChannel(static_cast<Channel>(channel_index), ignore_halt))
    385         {
    386           if (static_cast<Channel>(channel_index) != Channel::OTC &&
    387               state.channel_control.sync_mode == SyncMode::Manual && state.channel_control.chopping_enable)
    388           {
    389             // Figure out how roughly many CPU cycles it'll take for the transfer to complete, and delay the transfer.
    390             // Needed for Lagnacure Legend, which sets DICR to enable interrupts after CHCR to kickstart the transfer.
    391             // This has an artificial 500 cycle cap, setting it too high causes Namco Museum Vol. 4 and a couple of
    392             // other games to crash... so clearly something is missing here.
    393             const u32 block_words = (1u << state.channel_control.chopping_dma_window_size);
    394             const u32 cpu_cycles_per_block = (1u << state.channel_control.chopping_cpu_window_size);
    395             const u32 blocks = state.block_control.manual.word_count / block_words;
    396             const TickCount delay_cycles = std::min(static_cast<TickCount>(cpu_cycles_per_block * blocks), 500);
    397             if (delay_cycles > 1 && true)
    398             {
    399               DEV_LOG("Delaying {} transfer by {} cycles due to chopping", static_cast<Channel>(channel_index),
    400                       delay_cycles);
    401               HaltTransfer(delay_cycles);
    402             }
    403             else
    404             {
    405               s_channel_transfer_functions[channel_index]();
    406             }
    407           }
    408           else
    409           {
    410             s_channel_transfer_functions[channel_index]();
    411           }
    412         }
    413         return;
    414       }
    415 
    416       default:
    417         break;
    418     }
    419   }
    420   else
    421   {
    422     switch (offset)
    423     {
    424       case 0x70:
    425       {
    426         TRACE_LOG("DPCR <- 0x{:08X}", value);
    427         s_state.DPCR.bits = value;
    428 
    429         for (u32 i = 0; i < NUM_CHANNELS; i++)
    430         {
    431           if (CanTransferChannel(static_cast<Channel>(i), false))
    432           {
    433             if (!s_channel_transfer_functions[i]())
    434               break;
    435           }
    436         }
    437 
    438         return;
    439       }
    440 
    441       case 0x74:
    442       {
    443         TRACE_LOG("DICR <- 0x{:08X}", value);
    444         s_state.DICR.bits = (s_state.DICR.bits & ~DICR_WRITE_MASK) | (value & DICR_WRITE_MASK);
    445         s_state.DICR.bits = s_state.DICR.bits & ~(value & DICR_RESET_MASK);
    446         UpdateIRQ();
    447         return;
    448       }
    449 
    450       default:
    451         break;
    452     }
    453   }
    454 
    455   ERROR_LOG("Unhandled register write: {:02X} <- {:08X}", offset, value);
    456 }
    457 
    458 void DMA::SetRequest(Channel channel, bool request)
    459 {
    460   ChannelState& cs = s_state.channels[static_cast<u32>(channel)];
    461   if (cs.request == request)
    462     return;
    463 
    464   cs.request = request;
    465   if (CanTransferChannel(channel, false))
    466     s_channel_transfer_functions[static_cast<u32>(channel)]();
    467 }
    468 
    469 ALWAYS_INLINE_RELEASE bool DMA::CanTransferChannel(Channel channel, bool ignore_halt)
    470 {
    471   if (!s_state.DPCR.GetMasterEnable(channel))
    472     return false;
    473 
    474   const ChannelState& cs = s_state.channels[static_cast<u32>(channel)];
    475   if (!cs.channel_control.enable_busy)
    476     return false;
    477 
    478   if (cs.channel_control.sync_mode != SyncMode::Manual && (IsTransferHalted() && !ignore_halt))
    479     return false;
    480 
    481   return cs.request;
    482 }
    483 
    484 bool DMA::IsTransferHalted()
    485 {
    486   return s_state.unhalt_event.IsActive();
    487 }
    488 
    489 void DMA::UpdateIRQ()
    490 {
    491   [[maybe_unused]] const auto old_dicr = s_state.DICR;
    492   s_state.DICR.UpdateMasterFlag();
    493   if (!old_dicr.master_flag && s_state.DICR.master_flag)
    494     TRACE_LOG("Firing DMA master interrupt");
    495   InterruptController::SetLineState(InterruptController::IRQ::DMA, s_state.DICR.master_flag);
    496 }
    497 
    498 ALWAYS_INLINE_RELEASE bool DMA::IsLinkedListTerminator(PhysicalMemoryAddress address)
    499 {
    500   return ((address & LINKED_LIST_TERMINATOR) == LINKED_LIST_TERMINATOR);
    501 }
    502 
    503 ALWAYS_INLINE_RELEASE bool DMA::CheckForBusError(Channel channel, ChannelState& cs, PhysicalMemoryAddress address,
    504                                                  u32 size)
    505 {
    506   // Relying on a transfer partially happening at the end of RAM, then hitting a bus error would be pretty silly.
    507   if ((address + size) >= Bus::g_ram_mapped_size) [[unlikely]]
    508   {
    509     DEBUG_LOG("DMA bus error on channel {} at address 0x{:08X} size {}", channel, address, size);
    510     cs.channel_control.enable_busy = false;
    511     s_state.DICR.bus_error = true;
    512     s_state.DICR.SetIRQFlag(channel);
    513     UpdateIRQ();
    514     return true;
    515   }
    516 
    517   return false;
    518 }
    519 
    520 ALWAYS_INLINE_RELEASE void DMA::CompleteTransfer(Channel channel, ChannelState& cs)
    521 {
    522   // start/busy bit is cleared on end of transfer
    523   DEBUG_LOG("DMA transfer for channel {} complete", channel);
    524   cs.channel_control.enable_busy = false;
    525   if (s_state.DICR.ShouldSetIRQFlag(channel))
    526   {
    527     DEBUG_LOG("Setting DMA interrupt for channel {}", channel);
    528     s_state.DICR.SetIRQFlag(channel);
    529     UpdateIRQ();
    530   }
    531 }
    532 
    533 TickCount DMA::GetMaxSliceTicks(TickCount max_slice_size)
    534 {
    535   const TickCount max = Pad::IsTransmitting() ? SLICE_SIZE_WHEN_TRANSMITTING_PAD : max_slice_size;
    536   if (!TimingEvents::IsRunningEvents())
    537     return max;
    538 
    539   const TickCount remaining_in_event_loop =
    540     static_cast<TickCount>(TimingEvents::GetEventRunTickCounter() - TimingEvents::GetGlobalTickCounter());
    541   return std::max<TickCount>(max - remaining_in_event_loop, 1);
    542 }
    543 
    544 template<DMA::Channel channel>
    545 bool DMA::TransferChannel()
    546 {
    547   ChannelState& cs = s_state.channels[static_cast<u32>(channel)];
    548 
    549   const bool copy_to_device = cs.channel_control.copy_to_device;
    550 
    551   // start/trigger bit is cleared on beginning of transfer
    552   cs.channel_control.start_trigger = false;
    553 
    554   PhysicalMemoryAddress current_address = cs.base_address;
    555   const PhysicalMemoryAddress increment = cs.channel_control.address_step_reverse ? static_cast<u32>(-4) : UINT32_C(4);
    556   switch (cs.channel_control.sync_mode)
    557   {
    558     case SyncMode::Manual:
    559     {
    560       const u32 word_count = cs.block_control.manual.GetWordCount();
    561       DEBUG_LOG("DMA[{}]: Copying {} words {} 0x{:08X}", channel, word_count, copy_to_device ? "from" : "to",
    562                 current_address);
    563 
    564       const PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK;
    565       if (CheckForBusError(channel, cs, transfer_addr, (word_count - 1) * increment)) [[unlikely]]
    566         return true;
    567 
    568       TickCount used_ticks;
    569       if (copy_to_device)
    570         used_ticks = TransferMemoryToDevice<channel>(transfer_addr, increment, word_count);
    571       else
    572         used_ticks = TransferDeviceToMemory<channel>(transfer_addr, increment, word_count);
    573 
    574       CPU::AddPendingTicks(used_ticks);
    575       CompleteTransfer(channel, cs);
    576       return true;
    577     }
    578 
    579     case SyncMode::LinkedList:
    580     {
    581       if (!copy_to_device)
    582       {
    583         Panic("Linked list not implemented for DMA reads");
    584         return true;
    585       }
    586 
    587       DEBUG_LOG("DMA[{}]: Copying linked list starting at 0x{:08X} to device", channel, current_address);
    588 
    589       // Prove to the compiler that nothing's going to modify these.
    590       const u8* const ram_ptr = Bus::g_ram;
    591       const u32 mask = Bus::g_ram_mask;
    592 
    593       const TickCount slice_ticks = GetMaxSliceTicks(g_settings.dma_max_slice_ticks);
    594       TickCount remaining_ticks = slice_ticks;
    595       while (cs.request && remaining_ticks > 0)
    596       {
    597         u32 header;
    598         PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK;
    599         if (CheckForBusError(channel, cs, transfer_addr, sizeof(header))) [[unlikely]]
    600         {
    601           cs.base_address = current_address;
    602           return true;
    603         }
    604 
    605         std::memcpy(&header, &ram_ptr[transfer_addr & mask], sizeof(header));
    606         const u32 word_count = header >> 24;
    607         const u32 next_address = header & 0x00FFFFFFu;
    608         TRACE_LOG(" .. linked list entry at 0x{:08X} size={}({} words) next=0x{:08X}", current_address, word_count * 4,
    609                   word_count, next_address);
    610 
    611         const TickCount setup_ticks = (word_count > 0) ?
    612                                         (LINKED_LIST_HEADER_READ_TICKS + LINKED_LIST_BLOCK_SETUP_TICKS) :
    613                                         LINKED_LIST_HEADER_READ_TICKS;
    614         CPU::AddPendingTicks(setup_ticks);
    615         remaining_ticks -= setup_ticks;
    616 
    617         if (word_count > 0)
    618         {
    619           if (CheckForBusError(channel, cs, transfer_addr, (word_count - 1) * increment)) [[unlikely]]
    620           {
    621             cs.base_address = current_address;
    622             return true;
    623           }
    624 
    625           const TickCount block_ticks = TransferMemoryToDevice<channel>(transfer_addr + sizeof(header), 4, word_count);
    626           CPU::AddPendingTicks(block_ticks);
    627           remaining_ticks -= block_ticks;
    628         }
    629 
    630         current_address = next_address;
    631         if (IsLinkedListTerminator(current_address))
    632         {
    633           // Terminator is 24 bits, so is MADR, so it'll always be 0xFFFFFF.
    634           cs.base_address = LINKED_LIST_TERMINATOR;
    635           CompleteTransfer(channel, cs);
    636           return true;
    637         }
    638       }
    639 
    640       cs.base_address = current_address;
    641       if (cs.request)
    642       {
    643         // stall the transfer for a bit if we ran for too long
    644         HaltTransfer(g_settings.dma_halt_ticks);
    645         return false;
    646       }
    647       else
    648       {
    649         // linked list not yet complete
    650         return true;
    651       }
    652     }
    653 
    654     case SyncMode::Request:
    655     {
    656       DEBUG_LOG("DMA[{}]: Copying {} blocks of size {} ({} total words) {} 0x{:08X}", channel,
    657                 cs.block_control.request.GetBlockCount(), cs.block_control.request.GetBlockSize(),
    658                 cs.block_control.request.GetBlockCount() * cs.block_control.request.GetBlockSize(),
    659                 copy_to_device ? "from" : "to", current_address);
    660 
    661       const u32 block_size = cs.block_control.request.GetBlockSize();
    662       u32 blocks_remaining = cs.block_control.request.GetBlockCount();
    663       TickCount ticks_remaining = GetMaxSliceTicks(g_settings.dma_max_slice_ticks);
    664 
    665       if (copy_to_device)
    666       {
    667         do
    668         {
    669           const PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK;
    670           if (CheckForBusError(channel, cs, transfer_addr, (block_size - 1) * increment)) [[unlikely]]
    671           {
    672             cs.base_address = current_address;
    673             cs.block_control.request.block_count = blocks_remaining;
    674             return true;
    675           }
    676 
    677           const TickCount ticks = TransferMemoryToDevice<channel>(transfer_addr, increment, block_size);
    678           CPU::AddPendingTicks(ticks);
    679 
    680           ticks_remaining -= ticks;
    681           blocks_remaining--;
    682 
    683           current_address = (transfer_addr + (increment * block_size));
    684         } while (cs.request && blocks_remaining > 0 && ticks_remaining > 0);
    685       }
    686       else
    687       {
    688         do
    689         {
    690           const PhysicalMemoryAddress transfer_addr = current_address & TRANSFER_ADDRESS_MASK;
    691           if (CheckForBusError(channel, cs, transfer_addr, (block_size - 1) * increment)) [[unlikely]]
    692           {
    693             cs.base_address = current_address;
    694             cs.block_control.request.block_count = blocks_remaining;
    695             return true;
    696           }
    697 
    698           const TickCount ticks = TransferDeviceToMemory<channel>(transfer_addr, increment, block_size);
    699           CPU::AddPendingTicks(ticks);
    700 
    701           ticks_remaining -= ticks;
    702           blocks_remaining--;
    703 
    704           current_address = (transfer_addr + (increment * block_size));
    705         } while (cs.request && blocks_remaining > 0 && ticks_remaining > 0);
    706       }
    707 
    708       cs.base_address = current_address;
    709       cs.block_control.request.block_count = blocks_remaining;
    710 
    711       // finish transfer later if the request was cleared
    712       if (blocks_remaining > 0)
    713       {
    714         if (cs.request)
    715         {
    716           // we got halted
    717           if (!s_state.unhalt_event.IsActive())
    718             HaltTransfer(g_settings.dma_halt_ticks);
    719 
    720           return false;
    721         }
    722 
    723         return true;
    724       }
    725 
    726       CompleteTransfer(channel, cs);
    727       return true;
    728     }
    729 
    730     default:
    731       Panic("Unimplemented sync mode");
    732   }
    733 
    734   UnreachableCode();
    735 }
    736 
    737 void DMA::HaltTransfer(TickCount duration)
    738 {
    739   s_state.halt_ticks_remaining += duration;
    740   DEBUG_LOG("Halting DMA for {} ticks", s_state.halt_ticks_remaining);
    741   if (s_state.unhalt_event.IsActive())
    742     return;
    743 
    744   DebugAssert(!s_state.unhalt_event.IsActive());
    745   s_state.unhalt_event.SetIntervalAndSchedule(s_state.halt_ticks_remaining);
    746 }
    747 
    748 void DMA::UnhaltTransfer(void*, TickCount ticks, TickCount ticks_late)
    749 {
    750   DEBUG_LOG("Resuming DMA after {} ticks, {} ticks late", ticks, -(s_state.halt_ticks_remaining - ticks));
    751   s_state.halt_ticks_remaining -= ticks;
    752   s_state.unhalt_event.Deactivate();
    753 
    754   // TODO: Use channel priority. But doing it in ascending order is probably good enough.
    755   // Main thing is that OTC happens after GPU, because otherwise it'll wipe out the LL.
    756   for (u32 i = 0; i < NUM_CHANNELS; i++)
    757   {
    758     if (CanTransferChannel(static_cast<Channel>(i), false))
    759     {
    760       if (!s_channel_transfer_functions[i]())
    761         return;
    762     }
    763   }
    764 
    765   // We didn't run too long, so reset timer.
    766   s_state.halt_ticks_remaining = 0;
    767 }
    768 
    769 template<DMA::Channel channel>
    770 TickCount DMA::TransferMemoryToDevice(u32 address, u32 increment, u32 word_count)
    771 {
    772   const u32 mask = Bus::g_ram_mask;
    773 #ifdef _DEBUG
    774   if ((address & mask) != address)
    775     DEBUG_LOG("DMA TO {} from masked RAM address 0x{:08X} => 0x{:08X}", channel, address, (address & mask));
    776 #endif
    777 
    778   address &= mask;
    779 
    780   const u32* src_pointer = reinterpret_cast<u32*>(Bus::g_ram + address);
    781   if constexpr (channel != Channel::GPU)
    782   {
    783     if (static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address) [[unlikely]]
    784     {
    785       // Use temp buffer if it's wrapping around
    786       if (s_state.transfer_buffer.size() < word_count)
    787         s_state.transfer_buffer.resize(word_count);
    788       src_pointer = s_state.transfer_buffer.data();
    789 
    790       u8* ram_pointer = Bus::g_ram;
    791       for (u32 i = 0; i < word_count; i++)
    792       {
    793         std::memcpy(&s_state.transfer_buffer[i], &ram_pointer[address], sizeof(u32));
    794         address = (address + increment) & mask;
    795       }
    796     }
    797   }
    798 
    799   switch (channel)
    800   {
    801     case Channel::GPU:
    802     {
    803       if (g_gpu->BeginDMAWrite()) [[likely]]
    804       {
    805         u8* ram_pointer = Bus::g_ram;
    806         for (u32 i = 0; i < word_count; i++)
    807         {
    808           u32 value;
    809           std::memcpy(&value, &ram_pointer[address], sizeof(u32));
    810           g_gpu->DMAWrite(address, value);
    811           address = (address + increment) & mask;
    812         }
    813         g_gpu->EndDMAWrite();
    814       }
    815     }
    816     break;
    817 
    818     case Channel::SPU:
    819       SPU::DMAWrite(src_pointer, word_count);
    820       break;
    821 
    822     case Channel::MDECin:
    823       MDEC::DMAWrite(src_pointer, word_count);
    824       break;
    825 
    826     case Channel::CDROM:
    827     case Channel::MDECout:
    828     case Channel::PIO:
    829     default:
    830       ERROR_LOG("Unhandled DMA channel {} for device write", static_cast<u32>(channel));
    831       break;
    832   }
    833 
    834   return Bus::GetDMARAMTickCount(word_count);
    835 }
    836 
    837 template<DMA::Channel channel>
    838 TickCount DMA::TransferDeviceToMemory(u32 address, u32 increment, u32 word_count)
    839 {
    840   const u32 mask = Bus::g_ram_mask;
    841 #ifdef _DEBUG
    842   if ((address & mask) != address)
    843     DEBUG_LOG("DMA FROM {} to masked RAM address 0x{:08X} => 0x{:08X}", channel, address, (address & mask));
    844 #endif
    845 
    846   // TODO: This might not be correct for OTC.
    847   address &= mask;
    848 
    849   if constexpr (channel == Channel::OTC)
    850   {
    851     // clear ordering table
    852     u8* ram_pointer = Bus::g_ram;
    853     const u32 word_count_less_1 = word_count - 1;
    854     for (u32 i = 0; i < word_count_less_1; i++)
    855     {
    856       u32 next = ((address - 4) & mask);
    857       std::memcpy(&ram_pointer[address], &next, sizeof(next));
    858       address = next;
    859     }
    860 
    861     const u32 terminator = UINT32_C(0xFFFFFF);
    862     std::memcpy(&ram_pointer[address], &terminator, sizeof(terminator));
    863     return Bus::GetDMARAMTickCount(word_count);
    864   }
    865 
    866   u32* dest_pointer = reinterpret_cast<u32*>(&Bus::g_ram[address]);
    867   if (static_cast<s32>(increment) < 0 || ((address + (increment * word_count)) & mask) <= address) [[unlikely]]
    868   {
    869     // Use temp buffer if it's wrapping around
    870     if (s_state.transfer_buffer.size() < word_count)
    871       s_state.transfer_buffer.resize(word_count);
    872     dest_pointer = s_state.transfer_buffer.data();
    873   }
    874 
    875   // Read from device.
    876   switch (channel)
    877   {
    878     case Channel::GPU:
    879       g_gpu->DMARead(dest_pointer, word_count);
    880       break;
    881 
    882     case Channel::CDROM:
    883       CDROM::DMARead(dest_pointer, word_count);
    884       break;
    885 
    886     case Channel::SPU:
    887       SPU::DMARead(dest_pointer, word_count);
    888       break;
    889 
    890     case Channel::MDECout:
    891       MDEC::DMARead(dest_pointer, word_count);
    892       break;
    893 
    894     default:
    895       ERROR_LOG("Unhandled DMA channel {} for device read", static_cast<u32>(channel));
    896       std::fill_n(dest_pointer, word_count, UINT32_C(0xFFFFFFFF));
    897       break;
    898   }
    899 
    900   if (dest_pointer == s_state.transfer_buffer.data()) [[unlikely]]
    901   {
    902     u8* ram_pointer = Bus::g_ram;
    903     for (u32 i = 0; i < word_count; i++)
    904     {
    905       std::memcpy(&ram_pointer[address], &s_state.transfer_buffer[i], sizeof(u32));
    906       address = (address + increment) & mask;
    907     }
    908   }
    909 
    910   return Bus::GetDMARAMTickCount(word_count);
    911 }
    912 
    913 void DMA::DrawDebugStateWindow()
    914 {
    915   static constexpr u32 NUM_COLUMNS = 10;
    916   static constexpr std::array<const char*, NUM_COLUMNS> column_names = {
    917     {"#", "Req", "Direction", "Chopping", "Mode", "Busy", "Enable", "Priority", "IRQ", "Flag"}};
    918   static constexpr std::array<const char*, 4> sync_mode_names = {{"Manual", "Request", "LinkedList", "Reserved"}};
    919 
    920   const float framebuffer_scale = ImGuiManager::GetGlobalScale();
    921 
    922   ImGui::SetNextWindowSize(ImVec2(850.0f * framebuffer_scale, 250.0f * framebuffer_scale), ImGuiCond_FirstUseEver);
    923   if (!ImGui::Begin("DMA State", nullptr))
    924   {
    925     ImGui::End();
    926     return;
    927   }
    928 
    929   ImGui::Columns(NUM_COLUMNS);
    930   ImGui::SetColumnWidth(0, 100.0f * framebuffer_scale);
    931   ImGui::SetColumnWidth(1, 50.0f * framebuffer_scale);
    932   ImGui::SetColumnWidth(2, 100.0f * framebuffer_scale);
    933   ImGui::SetColumnWidth(3, 150.0f * framebuffer_scale);
    934   ImGui::SetColumnWidth(4, 80.0f * framebuffer_scale);
    935   ImGui::SetColumnWidth(5, 80.0f * framebuffer_scale);
    936   ImGui::SetColumnWidth(6, 80.0f * framebuffer_scale);
    937   ImGui::SetColumnWidth(7, 80.0f * framebuffer_scale);
    938   ImGui::SetColumnWidth(8, 80.0f * framebuffer_scale);
    939   ImGui::SetColumnWidth(9, 80.0f * framebuffer_scale);
    940 
    941   for (const char* title : column_names)
    942   {
    943     ImGui::TextUnformatted(title);
    944     ImGui::NextColumn();
    945   }
    946 
    947   const ImVec4 active(1.0f, 1.0f, 1.0f, 1.0f);
    948   const ImVec4 inactive(0.5f, 0.5f, 0.5f, 1.0f);
    949 
    950   for (u32 i = 0; i < NUM_CHANNELS; i++)
    951   {
    952     const ChannelState& cs = s_state.channels[i];
    953 
    954     ImGui::TextColored(cs.channel_control.enable_busy ? active : inactive, "%u[%s]", i, s_channel_names[i]);
    955     ImGui::NextColumn();
    956     ImGui::TextColored(cs.request ? active : inactive, cs.request ? "Yes" : "No");
    957     ImGui::NextColumn();
    958     ImGui::Text("%s%s", cs.channel_control.copy_to_device ? "FromRAM" : "ToRAM",
    959                 cs.channel_control.address_step_reverse ? " Addr+" : " Addr-");
    960     ImGui::NextColumn();
    961     ImGui::TextColored(cs.channel_control.chopping_enable ? active : inactive, "%s/%u/%u",
    962                        cs.channel_control.chopping_enable ? "Yes" : "No",
    963                        cs.channel_control.chopping_cpu_window_size.GetValue(),
    964                        cs.channel_control.chopping_dma_window_size.GetValue());
    965     ImGui::NextColumn();
    966     ImGui::Text("%s", sync_mode_names[static_cast<u8>(cs.channel_control.sync_mode.GetValue())]);
    967     ImGui::NextColumn();
    968     ImGui::TextColored(cs.channel_control.enable_busy ? active : inactive, "%s%s",
    969                        cs.channel_control.enable_busy ? "Busy" : "Idle",
    970                        cs.channel_control.start_trigger ? " (Trigger)" : "");
    971     ImGui::NextColumn();
    972     ImGui::TextColored(s_state.DPCR.GetMasterEnable(static_cast<Channel>(i)) ? active : inactive,
    973                        s_state.DPCR.GetMasterEnable(static_cast<Channel>(i)) ? "Enabled" : "Disabled");
    974     ImGui::NextColumn();
    975     ImGui::TextColored(s_state.DPCR.GetMasterEnable(static_cast<Channel>(i)) ? active : inactive, "%u",
    976                        s_state.DPCR.GetPriority(static_cast<Channel>(i)));
    977     ImGui::NextColumn();
    978     ImGui::TextColored(s_state.DICR.GetIRQEnabled(static_cast<Channel>(i)) ? active : inactive,
    979                        s_state.DICR.GetIRQEnabled(static_cast<Channel>(i)) ? "Enabled" : "Disabled");
    980     ImGui::NextColumn();
    981     ImGui::TextColored(s_state.DICR.GetIRQFlag(static_cast<Channel>(i)) ? active : inactive,
    982                        s_state.DICR.GetIRQFlag(static_cast<Channel>(i)) ? "IRQ" : "");
    983     ImGui::NextColumn();
    984   }
    985 
    986   ImGui::Columns(1);
    987   ImGui::End();
    988 }
    989 
    990 // Instantiate channel functions.
    991 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count);
    992 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECin>(u32 address, u32 increment, u32 word_count);
    993 template bool DMA::TransferChannel<DMA::Channel::MDECin>();
    994 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count);
    995 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::MDECout>(u32 address, u32 increment, u32 word_count);
    996 template bool DMA::TransferChannel<DMA::Channel::MDECout>();
    997 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count);
    998 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::GPU>(u32 address, u32 increment, u32 word_count);
    999 template bool DMA::TransferChannel<DMA::Channel::GPU>();
   1000 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count);
   1001 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::CDROM>(u32 address, u32 increment, u32 word_count);
   1002 template bool DMA::TransferChannel<DMA::Channel::CDROM>();
   1003 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count);
   1004 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::SPU>(u32 address, u32 increment, u32 word_count);
   1005 template bool DMA::TransferChannel<DMA::Channel::SPU>();
   1006 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count);
   1007 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::PIO>(u32 address, u32 increment, u32 word_count);
   1008 template bool DMA::TransferChannel<DMA::Channel::PIO>();
   1009 template TickCount DMA::TransferDeviceToMemory<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count);
   1010 template TickCount DMA::TransferMemoryToDevice<DMA::Channel::OTC>(u32 address, u32 increment, u32 word_count);
   1011 template bool DMA::TransferChannel<DMA::Channel::OTC>();