diff --git a/src/core/mdec.cpp b/src/core/mdec.cpp index dc50c1c97..9be656c9c 100644 --- a/src/core/mdec.cpp +++ b/src/core/mdec.cpp @@ -28,10 +28,14 @@ bool MDEC::DoState(StateWrapper& sw) sw.Do(&m_data_in_fifo); sw.Do(&m_data_out_fifo); sw.Do(&m_command); - sw.Do(&m_command_parameter_count); + sw.Do(&m_remaining_words); sw.Do(&m_iq_uv); sw.Do(&m_iq_y); sw.Do(&m_scale_table); + sw.Do(&m_blocks); + sw.Do(&m_current_block); + sw.Do(&m_current_coefficient); + sw.Do(&m_current_q_scale); return !sw.HasError(); } @@ -115,16 +119,9 @@ void MDEC::UpdateStatusRegister() m_status.data_out_fifo_empty = m_data_out_fifo.IsEmpty(); m_status.data_in_fifo_full = m_data_in_fifo.IsFull(); - m_status.command_busy = !m_data_in_fifo.IsEmpty(); - if (!m_data_in_fifo.IsEmpty()) - { - const CommandWord cw{m_data_in_fifo.Peek(0)}; - m_status.parameter_words_remaining = Truncate16(m_command_parameter_count - m_data_in_fifo.GetSize()); - } - else - { - m_status.parameter_words_remaining = 0; - } + m_status.command_busy = m_command != Command::None; + m_status.parameter_words_remaining = Truncate16(m_remaining_words); + m_status.current_block = (m_current_block + 4) % NUM_BLOCKS; } u32 MDEC::ReadDataRegister() @@ -144,7 +141,7 @@ void MDEC::WriteCommandRegister(u32 value) { Log_DebugPrintf("MDEC command/data register <- 0x%08X", value); - if (m_data_in_fifo.IsEmpty()) + if (m_command == Command::None) { // first word const CommandWord cw{value}; @@ -156,15 +153,15 @@ void MDEC::WriteCommandRegister(u32 value) switch (cw.command) { case Command::DecodeMacroblock: - m_command_parameter_count = ZeroExtend32(cw.parameter_word_count.GetValue()); + m_remaining_words = ZeroExtend32(cw.parameter_word_count.GetValue()); break; case Command::SetIqTab: - m_command_parameter_count = 16 + (((value & 1) != 0) ? 16 : 0); + m_remaining_words = 16 + (((value & 1) != 0) ? 16 : 0); break; case Command::SetScale: - m_command_parameter_count = 32; + m_remaining_words = 32; break; default: @@ -174,76 +171,81 @@ void MDEC::WriteCommandRegister(u32 value) Log_DebugPrintf("MDEC command: 0x%08X (%u, %u words in parameter, %u expected)", cw.bits, ZeroExtend32(static_cast(cw.command.GetValue())), - ZeroExtend32(cw.parameter_word_count.GetValue()), m_command_parameter_count); + ZeroExtend32(cw.parameter_word_count.GetValue()), m_remaining_words); } - - m_data_in_fifo.Push(value); - - if (m_data_in_fifo.GetSize() <= m_command_parameter_count) + else { - UpdateStatusRegister(); - return; + DebugAssert(m_remaining_words > 0); + m_data_in_fifo.Push(Truncate16(value)); + m_data_in_fifo.Push(Truncate16(value >> 16)); + m_remaining_words--; } - // pop command - m_data_in_fifo.RemoveOne(); switch (m_command) { case Command::DecodeMacroblock: - HandleDecodeMacroblockCommand(); - break; + { + if (!HandleDecodeMacroblockCommand()) + return; + } + break; case Command::SetIqTab: - HandleSetQuantTableCommand(); - break; + { + if (!HandleSetQuantTableCommand()) + return; + } + break; case Command::SetScale: - HandleSetScaleCommand(); - break; + { + if (!HandleSetScaleCommand()) + return; + } + break; } m_data_in_fifo.Clear(); m_command = Command::None; - m_command_parameter_count = 0; + m_current_block = 0; + m_current_coefficient = 64; + m_current_q_scale = 0; UpdateStatusRegister(); } -void MDEC::HandleDecodeMacroblockCommand() +bool MDEC::HandleDecodeMacroblockCommand() { - // TODO: Remove this copy and strict aliasing violation.. - std::vector temp(m_data_in_fifo.GetSize() * 2); - m_data_in_fifo.PopRange(reinterpret_cast(temp.data()), m_data_in_fifo.GetSize()); - - const u16* src = temp.data(); - const u16* src_end = src + temp.size(); - if (m_status.data_output_depth <= DataOutputDepth_8Bit) { - while (src != src_end) + while (!m_data_in_fifo.IsEmpty()) { - src = DecodeMonoMacroblock(src, src_end); - Log_DevPrintf("Decoded mono macroblock"); + if (!DecodeMonoMacroblock()) + break; } + + return m_remaining_words == 0; } else { - while (src != src_end) + while (!m_data_in_fifo.IsEmpty()) { - u32 old_offs = static_cast(src - temp.data()); - src = DecodeColoredMacroblock(src, src_end); - Log_DevPrintf("Decoded colour macroblock, ptr was %u, now %u", old_offs, static_cast(src - temp.data())); + if (!DecodeColoredMacroblock()) + break; } + + return m_remaining_words == 0; } } -const u16* MDEC::DecodeMonoMacroblock(const u16* src, const u16* src_end) +bool MDEC::DecodeMonoMacroblock() { - std::array Yblk; - if (!rl_decode_block(Yblk.data(), src, src_end, m_iq_y.data())) - return src_end; + if (!rl_decode_block(m_blocks[0].data(), m_iq_y.data())) + return false; + + IDCT(m_blocks[0].data()); std::array out_r; - y_to_mono(Yblk, out_r); + y_to_mono(m_blocks[0], out_r); switch (m_status.data_output_depth) { @@ -283,30 +285,28 @@ const u16* MDEC::DecodeMonoMacroblock(const u16* src, const u16* src_end) break; } - return src; + return true; } -const u16* MDEC::DecodeColoredMacroblock(const u16* src, const u16* src_end) +bool MDEC::DecodeColoredMacroblock() { - std::array Crblk; - std::array Cbblk; - std::array, 4> Yblk; std::array out_rgb; - if (!rl_decode_block(Crblk.data(), src, src_end, m_iq_uv.data()) || - !rl_decode_block(Cbblk.data(), src, src_end, m_iq_uv.data()) || - !rl_decode_block(Yblk[0].data(), src, src_end, m_iq_y.data()) || - !rl_decode_block(Yblk[1].data(), src, src_end, m_iq_y.data()) || - !rl_decode_block(Yblk[2].data(), src, src_end, m_iq_y.data()) || - !rl_decode_block(Yblk[3].data(), src, src_end, m_iq_y.data())) + for (; m_current_block < NUM_BLOCKS; m_current_block++) { - return src_end; + if (!rl_decode_block(m_blocks[m_current_block].data(), (m_current_block >= 2) ? m_iq_y.data() : m_iq_uv.data())) + return false; + + IDCT(m_blocks[m_current_block].data()); } - yuv_to_rgb(0, 0, Crblk, Cbblk, Yblk[0], out_rgb); - yuv_to_rgb(8, 0, Crblk, Cbblk, Yblk[1], out_rgb); - yuv_to_rgb(0, 8, Crblk, Cbblk, Yblk[2], out_rgb); - yuv_to_rgb(8, 8, Crblk, Cbblk, Yblk[3], out_rgb); + // done decoding + m_current_block = 0; + + yuv_to_rgb(0, 0, m_blocks[0], m_blocks[1], m_blocks[2], out_rgb); + yuv_to_rgb(8, 0, m_blocks[0], m_blocks[1], m_blocks[3], out_rgb); + yuv_to_rgb(0, 8, m_blocks[0], m_blocks[1], m_blocks[4], out_rgb); + yuv_to_rgb(8, 8, m_blocks[0], m_blocks[1], m_blocks[5], out_rgb); switch (m_status.data_output_depth) { @@ -375,7 +375,7 @@ const u16* MDEC::DecodeColoredMacroblock(const u16* src, const u16* src_end) break; } - return src; + return true; } static constexpr std::array zigzag = {{0, 1, 5, 6, 14, 15, 27, 28, 2, 4, 7, 13, 16, 26, 29, 42, @@ -387,64 +387,68 @@ static constexpr std::array zagzig = {{0, 1, 8, 16, 9, 2, 3, 10, 1 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63}}; -bool MDEC::rl_decode_block(s16* blk, const u16*& src, const u16* src_end, const u8* qt) +bool MDEC::rl_decode_block(s16* blk, const u8* qt) { - std::fill_n(blk, 64, s16(0)); - // skip padding - u16 n; - for (;;) + if (m_current_coefficient == 64) { - if (src == src_end) - return false; + std::fill_n(blk, 64, s16(0)); - n = *(src++); - if (n == 0xFE00) - continue; - else - break; + // skip padding at start + u16 n; + for (;;) + { + if (m_data_in_fifo.IsEmpty()) + return false; + + n = m_data_in_fifo.Pop(); + if (n == 0xFE00) + continue; + else + break; + } + + m_current_coefficient = 0; + m_current_q_scale = (n >> 10) & 0x3F; + s32 val = + SignExtendN<10, s32>(static_cast(n & 0x3FF)) * static_cast(ZeroExtend32(qt[m_current_coefficient])); + + if (m_current_q_scale == 0) + val = SignExtendN<10, s32>(static_cast(n & 0x3FF)) * 2; + + val = std::clamp(val, -0x400, 0x3FF); + if (m_current_q_scale > 0) + blk[zagzig[m_current_coefficient]] = static_cast(val); + else if (m_current_q_scale == 0) + blk[m_current_coefficient] = static_cast(val); } - u32 k = 0; - u16 q_scale = (n >> 10) & 0x3F; - s32 val = SignExtendN<10, s32>(static_cast(n & 0x3FF)) * static_cast(ZeroExtend32(qt[k])); - - for (;;) + while (!m_data_in_fifo.IsEmpty()) { - if (q_scale == 0) + u16 n = m_data_in_fifo.Pop(); + m_current_coefficient += ((n >> 10) & 0x3F) + 1; + if (m_current_coefficient >= 64) + { + m_current_coefficient = 64; + return true; + } + + s32 val = (SignExtendN<10, s32>(static_cast(n & 0x3FF)) * + static_cast(ZeroExtend32(qt[m_current_coefficient])) * static_cast(m_current_q_scale) + + 4) / + 8; + + if (m_current_q_scale == 0) val = SignExtendN<10, s32>(static_cast(n & 0x3FF)) * 2; val = std::clamp(val, -0x400, 0x3FF); // val = val * static_cast(ZeroExtend32(scalezag[i])); - if (q_scale > 0) - blk[zagzig[k]] = static_cast(val); - else if (q_scale == 0) - blk[k] = static_cast(val); - - if (src == src_end) - break; - - n = *(src++); - k += ((n >> 10) & 0x3F) + 1; - if (k >= 64) - break; - - val = (SignExtendN<10, s32>(static_cast(n & 0x3FF)) * static_cast(ZeroExtend32(qt[k])) * - static_cast(q_scale) + - 4) / - 8; + if (m_current_q_scale > 0) + blk[zagzig[m_current_coefficient]] = static_cast(val); + else if (m_current_q_scale == 0) + blk[m_current_coefficient] = static_cast(val); } -#undef READ_SRC - - // insufficient coefficients - if (k < 64) - { - Log_DebugPrintf("Only %u of 64 coefficients in block, skipping", k); - return false; - } - - IDCT(blk); - return true; + return false; } void MDEC::IDCT(s16* blk) @@ -517,10 +521,16 @@ void MDEC::y_to_mono(const std::array& Yblk, std::array& r_out) } } -void MDEC::HandleSetQuantTableCommand() +bool MDEC::HandleSetQuantTableCommand() { + if (m_remaining_words > 0) + { + UpdateStatusRegister(); + return false; + } + // TODO: Remove extra copies.. - std::array packed_data; + std::array packed_data; m_data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); std::memcpy(m_iq_y.data(), packed_data.data(), m_iq_y.size()); @@ -529,12 +539,21 @@ void MDEC::HandleSetQuantTableCommand() m_data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); std::memcpy(m_iq_uv.data(), packed_data.data(), m_iq_uv.size()); } + + return true; } -void MDEC::HandleSetScaleCommand() +bool MDEC::HandleSetScaleCommand() { + if (m_remaining_words > 0) + { + UpdateStatusRegister(); + return false; + } + // TODO: Remove extra copies.. - std::array packed_data; + std::array packed_data; m_data_in_fifo.PopRange(packed_data.data(), static_cast(packed_data.size())); std::memcpy(m_scale_table.data(), packed_data.data(), m_scale_table.size() * sizeof(s16)); + return true; } diff --git a/src/core/mdec.h b/src/core/mdec.h index 77a5d8aa1..684232a84 100644 --- a/src/core/mdec.h +++ b/src/core/mdec.h @@ -29,6 +29,7 @@ public: private: static constexpr u32 DATA_IN_FIFO_SIZE = 1048576; static constexpr u32 DATA_OUT_FIFO_SIZE = 1048576; + static constexpr u32 NUM_BLOCKS = 6; enum DataOutputDepth : u8 { @@ -87,15 +88,15 @@ private: u32 ReadDataRegister(); void WriteCommandRegister(u32 value); - void HandleDecodeMacroblockCommand(); - void HandleSetQuantTableCommand(); - void HandleSetScaleCommand(); + bool HandleDecodeMacroblockCommand(); + bool HandleSetQuantTableCommand(); + bool HandleSetScaleCommand(); - const u16* DecodeColoredMacroblock(const u16* src, const u16* src_end); - const u16* DecodeMonoMacroblock(const u16* src, const u16* src_end); + bool DecodeColoredMacroblock(); + bool DecodeMonoMacroblock(); // from nocash spec - bool rl_decode_block(s16* blk, const u16*& src, const u16* src_end, const u8* qt); + bool rl_decode_block(s16* blk, const u8* qt); void IDCT(s16* blk); void yuv_to_rgb(u32 xx, u32 yy, const std::array& Crblk, const std::array& Cbblk, const std::array& Yblk, std::array& rgb_out); @@ -106,13 +107,20 @@ private: StatusRegister m_status = {}; - InlineFIFOQueue m_data_in_fifo; + // Even though the DMA is in words, we access the FIFO as halfwords. + InlineFIFOQueue m_data_in_fifo; InlineFIFOQueue m_data_out_fifo; Command m_command = Command::None; - u32 m_command_parameter_count = 0; + u32 m_remaining_words = 0; std::array m_iq_uv{}; std::array m_iq_y{}; std::array m_scale_table{}; + + // blocks, for colour: 0 - Crblk, 1 - Cbblk, 2-5 - Y 1-4 + std::array, NUM_BLOCKS> m_blocks; + u32 m_current_block = 0; // block (0-5) + u32 m_current_coefficient = 64; // k (in block) + u16 m_current_q_scale = 0; };