diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 086a5a76a..5da789715 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -30,6 +30,8 @@ add_library(core
dma.h
gpu.cpp
gpu.h
+ gpu_backend.cpp
+ gpu_backend.h
gpu_commands.cpp
gpu_hw.cpp
gpu_hw.h
@@ -41,6 +43,8 @@ add_library(core
gpu_hw_vulkan.h
gpu_sw.cpp
gpu_sw.h
+ gpu_sw_backend.cpp
+ gpu_sw_backend.h
gpu_types.h
gte.cpp
gte.h
diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj
index bd9dce6df..a0fc019ce 100644
--- a/src/core/core.vcxproj
+++ b/src/core/core.vcxproj
@@ -115,11 +115,13 @@
+
+
@@ -185,10 +187,12 @@
true
+
+
diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters
index bd9fa7850..d2c2ce6d5 100644
--- a/src/core/core.vcxproj.filters
+++ b/src/core/core.vcxproj.filters
@@ -51,6 +51,8 @@
+
+
@@ -105,5 +107,7 @@
+
+
diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp
index 05cb6ae99..c4f5e920c 100644
--- a/src/core/gpu.cpp
+++ b/src/core/gpu.cpp
@@ -129,10 +129,10 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_draw_mode.texture_page_y);
sw.Do(&m_draw_mode.texture_palette_x);
sw.Do(&m_draw_mode.texture_palette_y);
- sw.Do(&m_draw_mode.texture_window_and_x);
- sw.Do(&m_draw_mode.texture_window_and_y);
- sw.Do(&m_draw_mode.texture_window_or_x);
- sw.Do(&m_draw_mode.texture_window_or_y);
+ sw.Do(&m_draw_mode.texture_window.and_x);
+ sw.Do(&m_draw_mode.texture_window.and_y);
+ sw.Do(&m_draw_mode.texture_window.or_x);
+ sw.Do(&m_draw_mode.texture_window.or_y);
sw.Do(&m_draw_mode.texture_x_flip);
sw.Do(&m_draw_mode.texture_y_flip);
@@ -1358,10 +1358,10 @@ void GPU::SetTextureWindow(u32 value)
const u8 offset_y = Truncate8((value >> 15) & UINT32_C(0x1F));
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", mask_x, mask_y, offset_x, offset_y);
- m_draw_mode.texture_window_and_x = ~(mask_x * 8);
- m_draw_mode.texture_window_and_y = ~(mask_y * 8);
- m_draw_mode.texture_window_or_x = (offset_x & mask_x) * 8u;
- m_draw_mode.texture_window_or_y = (offset_y & mask_y) * 8u;
+ m_draw_mode.texture_window.and_x = ~(mask_x * 8);
+ m_draw_mode.texture_window.and_y = ~(mask_y * 8);
+ m_draw_mode.texture_window.or_x = (offset_x & mask_x) * 8u;
+ m_draw_mode.texture_window.or_y = (offset_y & mask_y) * 8u;
m_draw_mode.texture_window_value = value;
m_draw_mode.texture_window_changed = true;
}
diff --git a/src/core/gpu.h b/src/core/gpu.h
index 979318326..459b2fbff 100644
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@@ -159,9 +159,6 @@ protected:
ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; }
// Helper/format conversion functions.
- static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); }
- static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); }
-
static constexpr u32 RGBA5551ToRGBA8888(u16 color)
{
u8 r = Truncate8(color & 31);
@@ -197,68 +194,10 @@ protected:
{
return std::make_tuple(static_cast(rgb24), static_cast(rgb24 >> 8), static_cast(rgb24 >> 16));
}
- static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b)
- {
- return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16);
- }
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
bool remove_alpha);
- union VRAMPixel
- {
- u16 bits;
-
- BitField r;
- BitField g;
- BitField b;
- BitField c;
-
- u8 GetR8() const { return Convert5To8(r); }
- u8 GetG8() const { return Convert5To8(g); }
- u8 GetB8() const { return Convert5To8(b); }
-
- void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
- {
- bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast(c_) << 15);
- }
-
- void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
- {
- Set(std::min(r_, 0x1F), std::min(g_, 0x1F), std::min(b_, 0x1F), c_);
- }
-
- void SetRGB24(u32 rgb24, bool c_ = false)
- {
- bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
- (static_cast(c_) << 15);
- }
-
- void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
- {
- bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
- (static_cast(c_) << 15);
- }
-
- void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
- {
- const s32 offset = DITHER_MATRIX[y & 3][x & 3];
- r8 = static_cast(std::clamp(static_cast(ZeroExtend32(r8)) + offset, 0, 255));
- g8 = static_cast(std::clamp(static_cast(ZeroExtend32(g8)) + offset, 0, 255));
- b8 = static_cast(std::clamp(static_cast(ZeroExtend32(b8)) + offset, 0, 255));
- SetRGB24(r8, g8, b8, c_);
- }
-
- u32 ToRGB24() const
- {
- const u32 r_ = ZeroExtend32(r.GetValue());
- const u32 g_ = ZeroExtend32(g.GetValue());
- const u32 b_ = ZeroExtend32(b.GetValue());
-
- return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
- }
- };
-
void SoftReset();
// Sets dots per scanline
@@ -464,10 +403,7 @@ protected:
u32 texture_page_y;
u32 texture_palette_x;
u32 texture_palette_y;
- u8 texture_window_and_x;
- u8 texture_window_and_y;
- u8 texture_window_or_x;
- u8 texture_window_or_y;
+ GPUTextureWindow texture_window;
bool texture_x_flip;
bool texture_y_flip;
bool texture_page_changed;
diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp
new file mode 100644
index 000000000..7bb67df8a
--- /dev/null
+++ b/src/core/gpu_backend.cpp
@@ -0,0 +1,327 @@
+#include "gpu_backend.h"
+#include "common/log.h"
+#include "common/state_wrapper.h"
+#include "settings.h"
+Log_SetChannel(GPUBackend);
+
+std::unique_ptr g_gpu_backend;
+
+GPUBackend::GPUBackend() = default;
+
+GPUBackend::~GPUBackend() = default;
+
+bool GPUBackend::Initialize()
+{
+ if (g_settings.gpu_use_thread)
+ StartGPUThread();
+
+ return true;
+}
+
+void GPUBackend::Reset()
+{
+ Sync();
+ m_drawing_area = {};
+}
+
+void GPUBackend::UpdateSettings()
+{
+ Sync();
+
+ if (m_use_gpu_thread != g_settings.gpu_use_thread)
+ {
+ if (!g_settings.gpu_use_thread)
+ StopGPUThread();
+ else
+ StartGPUThread();
+ }
+}
+
+void GPUBackend::Shutdown()
+{
+ StopGPUThread();
+}
+
+GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand()
+{
+ GPUBackendFillVRAMCommand* cmd =
+ static_cast(AllocateCommand(sizeof(GPUBackendFillVRAMCommand)));
+ cmd->type = GPUBackendCommandType::FillVRAM;
+ cmd->size = cmd->Size();
+ return cmd;
+}
+
+GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words)
+{
+ const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16));
+ GPUBackendUpdateVRAMCommand* cmd = static_cast(AllocateCommand(size));
+ cmd->type = GPUBackendCommandType::UpdateVRAM;
+ cmd->size = size;
+ return cmd;
+}
+
+GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand()
+{
+ GPUBackendCopyVRAMCommand* cmd =
+ static_cast(AllocateCommand(sizeof(GPUBackendCopyVRAMCommand)));
+ cmd->type = GPUBackendCommandType::CopyVRAM;
+ cmd->size = cmd->Size();
+ return cmd;
+}
+
+GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand()
+{
+ GPUBackendSetDrawingAreaCommand* cmd =
+ static_cast(AllocateCommand(sizeof(GPUBackendSetDrawingAreaCommand)));
+ cmd->type = GPUBackendCommandType::SetDrawingArea;
+ cmd->size = cmd->Size();
+ return cmd;
+}
+
+GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices)
+{
+ const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex));
+ GPUBackendDrawPolygonCommand* cmd = static_cast(AllocateCommand(size));
+ cmd->type = GPUBackendCommandType::DrawPolygon;
+ cmd->size = size;
+ cmd->num_vertices = Truncate16(num_vertices);
+ return cmd;
+}
+
+GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand()
+{
+ GPUBackendDrawRectangleCommand* cmd =
+ static_cast(AllocateCommand(sizeof(GPUBackendDrawRectangleCommand)));
+ cmd->type = GPUBackendCommandType::DrawRectangle;
+ cmd->size = cmd->Size();
+ return cmd;
+}
+
+GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices)
+{
+ const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex));
+ GPUBackendDrawLineCommand* cmd = static_cast(AllocateCommand(size));
+ cmd->type = GPUBackendCommandType::DrawLine;
+ cmd->size = size;
+ cmd->num_vertices = Truncate16(num_vertices);
+ return cmd;
+}
+
+void* GPUBackend::AllocateCommand(u32 size)
+{
+ for (;;)
+ {
+ u32 read_ptr = m_command_fifo_read_ptr.load();
+ u32 write_ptr = m_command_fifo_write_ptr.load();
+ if (read_ptr > write_ptr)
+ {
+ u32 available_size = read_ptr - write_ptr;
+ while (available_size < (size + sizeof(GPUBackendCommandType)))
+ {
+ WakeGPUThread();
+ read_ptr = m_command_fifo_read_ptr.load();
+ available_size = (read_ptr > write_ptr) ? (read_ptr - write_ptr) : (COMMAND_QUEUE_SIZE - write_ptr);
+ }
+ }
+ else
+ {
+ const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr;
+ if ((size + sizeof(GPUBackendCommand)) > available_size)
+ {
+ // allocate a dummy command to wrap the buffer around
+ GPUBackendCommand* dummy_cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]);
+ dummy_cmd->type = GPUBackendCommandType::Wraparound;
+ dummy_cmd->size = available_size;
+ dummy_cmd->params.bits = 0;
+ m_command_fifo_write_ptr.store(0);
+ continue;
+ }
+ }
+
+ return &m_command_fifo_data[write_ptr];
+ }
+}
+
+u32 GPUBackend::GetPendingCommandSize() const
+{
+ const u32 read_ptr = m_command_fifo_read_ptr.load();
+ const u32 write_ptr = m_command_fifo_write_ptr.load();
+ return (write_ptr >= read_ptr) ? (write_ptr - read_ptr) : (COMMAND_QUEUE_SIZE - read_ptr + write_ptr);
+}
+
+void GPUBackend::PushCommand(GPUBackendCommand* cmd)
+{
+ if (!m_use_gpu_thread)
+ {
+ // single-thread mode
+ if (cmd->type != GPUBackendCommandType::Sync)
+ HandleCommand(cmd);
+ }
+ else
+ {
+ const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size;
+ DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE);
+ if (GetPendingCommandSize() >= THRESHOLD_TO_WAKE_GPU)
+ WakeGPUThread();
+ }
+}
+
+void GPUBackend::WakeGPUThread()
+{
+ std::unique_lock lock(m_sync_mutex);
+ if (!m_gpu_thread_sleeping.load())
+ return;
+
+ m_wake_gpu_thread_cv.notify_one();
+}
+
+void GPUBackend::StartGPUThread()
+{
+ m_gpu_loop_done.store(false);
+ m_use_gpu_thread = true;
+ m_gpu_thread = std::thread(&GPUBackend::RunGPULoop, this);
+ Log_InfoPrint("GPU thread started.");
+}
+
+void GPUBackend::StopGPUThread()
+{
+ if (!m_use_gpu_thread)
+ return;
+
+ m_gpu_loop_done.store(true);
+ WakeGPUThread();
+ m_gpu_thread.join();
+ m_use_gpu_thread = false;
+ Log_InfoPrint("GPU thread stopped.");
+}
+
+void GPUBackend::Sync()
+{
+ if (!m_use_gpu_thread)
+ return;
+
+ GPUBackendSyncCommand* cmd = static_cast(AllocateCommand(sizeof(GPUBackendSyncCommand)));
+ cmd->type = GPUBackendCommandType::Sync;
+ cmd->size = sizeof(GPUBackendSyncCommand);
+ PushCommand(cmd);
+ WakeGPUThread();
+
+ m_sync_event.Wait();
+ m_sync_event.Reset();
+}
+
+void GPUBackend::RunGPULoop()
+{
+ for (;;)
+ {
+ u32 write_ptr = m_command_fifo_write_ptr.load();
+ u32 read_ptr = m_command_fifo_read_ptr.load();
+ if (read_ptr == write_ptr)
+ {
+ std::unique_lock lock(m_sync_mutex);
+ m_gpu_thread_sleeping.store(true);
+ m_wake_gpu_thread_cv.wait(lock, [this]() { return m_gpu_loop_done.load() || GetPendingCommandSize() > 0; });
+ m_gpu_thread_sleeping.store(false);
+
+ if (m_gpu_loop_done.load())
+ break;
+ else
+ continue;
+ }
+
+ if (write_ptr < read_ptr)
+ write_ptr = COMMAND_QUEUE_SIZE;
+
+ while (read_ptr < write_ptr)
+ {
+ const GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[read_ptr]);
+ read_ptr += cmd->size;
+
+ switch (cmd->type)
+ {
+ case GPUBackendCommandType::Wraparound:
+ {
+ DebugAssert(read_ptr == COMMAND_QUEUE_SIZE);
+ write_ptr = m_command_fifo_write_ptr.load();
+ read_ptr = 0;
+ }
+ break;
+
+ case GPUBackendCommandType::Sync:
+ {
+ DebugAssert(read_ptr == write_ptr);
+ m_sync_event.Signal();
+ }
+ break;
+
+ default:
+ HandleCommand(cmd);
+ break;
+ }
+ }
+
+ m_command_fifo_read_ptr.store(read_ptr);
+ }
+}
+
+void GPUBackend::HandleCommand(const GPUBackendCommand* cmd)
+{
+ switch (cmd->type)
+ {
+ case GPUBackendCommandType::FillVRAM:
+ {
+ FlushRender();
+ const GPUBackendFillVRAMCommand* ccmd = static_cast(cmd);
+ FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
+ ccmd->color, ccmd->params);
+ }
+ break;
+
+ case GPUBackendCommandType::UpdateVRAM:
+ {
+ FlushRender();
+ const GPUBackendUpdateVRAMCommand* ccmd = static_cast(cmd);
+ UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
+ ccmd->data, ccmd->params);
+ }
+ break;
+
+ case GPUBackendCommandType::CopyVRAM:
+ {
+ FlushRender();
+ const GPUBackendCopyVRAMCommand* ccmd = static_cast(cmd);
+ CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x),
+ ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params);
+ }
+ break;
+
+ case GPUBackendCommandType::SetDrawingArea:
+ {
+ FlushRender();
+ m_drawing_area = static_cast(cmd)->new_area;
+ DrawingAreaChanged();
+ }
+ break;
+
+ case GPUBackendCommandType::DrawPolygon:
+ {
+ DrawPolygon(static_cast(cmd));
+ }
+ break;
+
+ case GPUBackendCommandType::DrawRectangle:
+ {
+ DrawRectangle(static_cast(cmd));
+ }
+ break;
+
+ case GPUBackendCommandType::DrawLine:
+ {
+ DrawLine(static_cast(cmd));
+ }
+ break;
+
+ default:
+ break;
+ }
+}
diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h
new file mode 100644
index 000000000..2590b76b9
--- /dev/null
+++ b/src/core/gpu_backend.h
@@ -0,0 +1,91 @@
+#pragma once
+#include "common/event.h"
+#include "common/heap_array.h"
+#include "gpu_types.h"
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4324) // warning C4324: 'GPUBackend': structure was padded due to alignment specifier
+#endif
+
+class GPUBackend
+{
+public:
+ GPUBackend();
+ virtual ~GPUBackend();
+
+ ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
+
+ virtual bool Initialize();
+ virtual void UpdateSettings();
+ virtual void Reset();
+ virtual void Shutdown();
+
+ GPUBackendFillVRAMCommand* NewFillVRAMCommand();
+ GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words);
+ GPUBackendCopyVRAMCommand* NewCopyVRAMCommand();
+ GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand();
+ GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices);
+ GPUBackendDrawRectangleCommand* NewDrawRectangleCommand();
+ GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices);
+
+ void PushCommand(GPUBackendCommand* cmd);
+ void Sync();
+
+ /// Processes all pending GPU commands.
+ void RunGPULoop();
+
+protected:
+ void* AllocateCommand(u32 size);
+ u32 GetPendingCommandSize() const;
+ void WakeGPUThread();
+ void StartGPUThread();
+ void StopGPUThread();
+
+ virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0;
+ virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
+ GPUBackendCommandParameters params) = 0;
+ virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
+ GPUBackendCommandParameters params) = 0;
+ virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0;
+ virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0;
+ virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0;
+ virtual void FlushRender() = 0;
+ virtual void DrawingAreaChanged() = 0;
+
+ void HandleCommand(const GPUBackendCommand* cmd);
+
+ u16* m_vram_ptr = nullptr;
+
+ Common::Rectangle m_drawing_area{};
+
+ Common::Event m_sync_event;
+ std::atomic_bool m_gpu_thread_sleeping{false};
+ std::atomic_bool m_gpu_loop_done{false};
+ std::thread m_gpu_thread;
+ bool m_use_gpu_thread = false;
+
+ std::mutex m_sync_mutex;
+ std::condition_variable m_sync_cpu_thread_cv;
+ std::condition_variable m_wake_gpu_thread_cv;
+ bool m_sync_done = false;
+
+ enum : u32
+ {
+ COMMAND_QUEUE_SIZE = 4 * 1024 * 1024,
+ THRESHOLD_TO_WAKE_GPU = 256
+ };
+
+ HeapArray m_command_fifo_data;
+ alignas(64) std::atomic m_command_fifo_read_ptr{0};
+ alignas(64) std::atomic m_command_fifo_write_ptr{0};
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp
index 0145a8da0..63b007cf6 100644
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@@ -1004,10 +1004,10 @@ void GPU_HW::DispatchRenderCommand()
{
m_draw_mode.ClearTextureWindowChangedFlag();
- m_batch_ubo_data.u_texture_window_and[0] = ZeroExtend32(m_draw_mode.texture_window_and_x);
- m_batch_ubo_data.u_texture_window_and[1] = ZeroExtend32(m_draw_mode.texture_window_and_y);
- m_batch_ubo_data.u_texture_window_or[0] = ZeroExtend32(m_draw_mode.texture_window_or_x);
- m_batch_ubo_data.u_texture_window_or[1] = ZeroExtend32(m_draw_mode.texture_window_or_y);
+ m_batch_ubo_data.u_texture_window_and[0] = ZeroExtend32(m_draw_mode.texture_window.and_x);
+ m_batch_ubo_data.u_texture_window_and[1] = ZeroExtend32(m_draw_mode.texture_window.and_y);
+ m_batch_ubo_data.u_texture_window_or[0] = ZeroExtend32(m_draw_mode.texture_window.or_x);
+ m_batch_ubo_data.u_texture_window_or[1] = ZeroExtend32(m_draw_mode.texture_window.or_y);
m_batch_ubo_dirty = true;
}
diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp
index 3d383f053..5a69c1cb8 100644
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@@ -19,14 +19,23 @@ Log_SetChannel(GPU_SW);
#endif
#endif
+template
+ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2)
+{
+ if (v1 > v2)
+ return std::tie(v2, v1);
+ else
+ return std::tie(v1, v2);
+}
+
GPU_SW::GPU_SW()
{
- m_vram.fill(0);
- m_vram_ptr = m_vram.data();
+ m_vram_ptr = m_backend.GetVRAM();
}
GPU_SW::~GPU_SW()
{
+ m_backend.Shutdown();
if (m_host_display)
m_host_display->ClearDisplayTexture();
}
@@ -38,7 +47,7 @@ bool GPU_SW::IsHardwareRenderer() const
bool GPU_SW::Initialize(HostDisplay* host_display)
{
- if (!GPU::Initialize(host_display))
+ if (!GPU::Initialize(host_display) || !m_backend.Initialize())
return false;
static constexpr auto formats_for_16bit = make_array(HostDisplayPixelFormat::RGB565, HostDisplayPixelFormat::RGBA5551,
@@ -70,7 +79,13 @@ void GPU_SW::Reset()
{
GPU::Reset();
- m_vram.fill(0);
+ m_backend.Reset();
+}
+
+void GPU_SW::UpdateSettings()
+{
+ GPU::UpdateSettings();
+ m_backend.UpdateSettings();
}
template
@@ -248,7 +263,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
const u32 rows = height >> interlaced_shift;
dst_stride <<= interlaced_shift;
- const u16* src_ptr = &m_vram[src_y * VRAM_WIDTH + src_x];
+ const u16* src_ptr = &m_vram_ptr[src_y * VRAM_WIDTH + src_x];
const u32 src_step = VRAM_WIDTH << interleaved_shift;
for (u32 row = 0; row < rows; row++)
{
@@ -265,7 +280,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
const u32 end_x = src_x + width;
for (u32 row = 0; row < rows; row++)
{
- const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
+ const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
OutputPixelType* dst_row_ptr = reinterpret_cast(dst_ptr);
for (u32 col = src_x; col < end_x; col++)
{
@@ -340,7 +355,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh
if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT)
{
- const u8* src_ptr = reinterpret_cast(&m_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
+ const u8* src_ptr = reinterpret_cast(&m_vram_ptr[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16);
for (u32 row = 0; row < rows; row++)
{
@@ -400,7 +415,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh
{
for (u32 row = 0; row < rows; row++)
{
- const u16* src_row_ptr = &m_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
+ const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
OutputPixelType* dst_row_ptr = reinterpret_cast(dst_ptr);
for (u32 col = 0; col < width; col++)
@@ -475,6 +490,8 @@ void GPU_SW::ClearDisplay()
void GPU_SW::UpdateDisplay()
{
// fill display texture
+ m_backend.Sync();
+
if (!g_settings.debugging.show_vram)
{
if (IsDisplayDisabled())
@@ -530,8 +547,34 @@ void GPU_SW::UpdateDisplay()
}
}
+void GPU_SW::FillBackendCommandParameters(GPUBackendCommand* cmd)
+{
+ cmd->params.bits = 0;
+ cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
+ cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
+ cmd->params.active_line_lsb = m_crtc_state.active_line_lsb;
+ cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled();
+}
+
+void GPU_SW::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc)
+{
+ FillBackendCommandParameters(cmd);
+ cmd->rc.bits = rc.bits;
+ cmd->draw_mode.bits = m_draw_mode.mode_reg.bits;
+ cmd->palette.bits = m_draw_mode.palette_reg;
+ cmd->window = m_draw_mode.texture_window;
+}
+
void GPU_SW::DispatchRenderCommand()
{
+ if (m_drawing_area_changed)
+ {
+ GPUBackendSetDrawingAreaCommand* cmd = m_backend.NewSetDrawingAreaCommand();
+ cmd->new_area = m_drawing_area;
+ m_backend.PushCommand(cmd);
+ m_drawing_area_changed = false;
+ }
+
const GPURenderCommand rc{m_render_command.bits};
const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable;
@@ -539,80 +582,119 @@ void GPU_SW::DispatchRenderCommand()
{
case GPUPrimitive::Polygon:
{
+ const u32 num_vertices = rc.quad_polygon ? 4 : 3;
+ GPUBackendDrawPolygonCommand* cmd = m_backend.NewDrawPolygonCommand(num_vertices);
+ FillDrawCommand(cmd, rc);
+
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
-
- const u32 num_vertices = rc.quad_polygon ? 4 : 3;
- std::array vertices;
for (u32 i = 0; i < num_vertices; i++)
{
- SWVertex& vert = vertices[i];
- const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
- vert.r = Truncate8(color_rgb);
- vert.g = Truncate8(color_rgb >> 8);
- vert.b = Truncate8(color_rgb >> 16);
-
- const GPUVertexPosition vp{FifoPop()};
- vert.x = m_drawing_offset.x + vp.x;
- vert.y = m_drawing_offset.y + vp.y;
-
- if (textured)
- {
- std::tie(vert.u, vert.v) = UnpackTexcoord(Truncate16(FifoPop()));
- }
- else
- {
- vert.u = 0;
- vert.v = 0;
- }
+ GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i];
+ vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
+ const u64 maddr_and_pos = m_fifo.Pop();
+ const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
+ vert->x = m_drawing_offset.x + vp.x;
+ vert->y = m_drawing_offset.y + vp.y;
+ vert->texcoord = textured ? Truncate16(FifoPop()) : 0;
}
if (!IsDrawingAreaIsValid())
return;
- const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction(
- rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable);
+ // Cull polygons which are too large.
+ const auto [min_x_12, max_x_12] = MinMax(cmd->vertices[1].x, cmd->vertices[2].x);
+ const auto [min_y_12, max_y_12] = MinMax(cmd->vertices[1].y, cmd->vertices[2].y);
+ const s32 min_x = std::min(min_x_12, cmd->vertices[0].x);
+ const s32 max_x = std::max(max_x_12, cmd->vertices[0].x);
+ const s32 min_y = std::min(min_y_12, cmd->vertices[0].y);
+ const s32 max_y = std::max(max_y_12, cmd->vertices[0].y);
- (this->*DrawFunction)(&vertices[0], &vertices[1], &vertices[2]);
- if (num_vertices > 3)
- (this->*DrawFunction)(&vertices[2], &vertices[1], &vertices[3]);
+ if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
+ {
+ Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", cmd->vertices[0].x, cmd->vertices[0].y,
+ cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y);
+ }
+ else
+ {
+ AddDrawTriangleTicks(cmd->vertices[0].x, cmd->vertices[0].y, cmd->vertices[1].x, cmd->vertices[1].y,
+ cmd->vertices[2].x, cmd->vertices[2].y, rc.shading_enable, rc.texture_enable,
+ rc.transparency_enable);
+ }
+
+ // quads
+ if (rc.quad_polygon)
+ {
+ const s32 min_x_123 = std::min(min_x_12, cmd->vertices[3].x);
+ const s32 max_x_123 = std::max(max_x_12, cmd->vertices[3].x);
+ const s32 min_y_123 = std::min(min_y_12, cmd->vertices[3].y);
+ const s32 max_y_123 = std::max(max_y_12, cmd->vertices[3].y);
+
+ // Cull polygons which are too large.
+ if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT)
+ {
+ Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", cmd->vertices[2].x,
+ cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x,
+ cmd->vertices[0].y);
+ }
+ else
+ {
+ AddDrawTriangleTicks(cmd->vertices[2].x, cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y,
+ cmd->vertices[3].x, cmd->vertices[3].y, rc.shading_enable, rc.texture_enable,
+ rc.transparency_enable);
+ }
+ }
+
+ m_backend.PushCommand(cmd);
}
break;
case GPUPrimitive::Rectangle:
{
- const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex);
- const GPUVertexPosition vp{FifoPop()};
- const u32 texcoord_and_palette = rc.texture_enable ? FifoPop() : 0;
- const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette));
+ GPUBackendDrawRectangleCommand* cmd = m_backend.NewDrawRectangleCommand();
+ FillDrawCommand(cmd, rc);
+ cmd->color = rc.color_for_first_vertex;
+
+ const GPUVertexPosition vp{FifoPop()};
+ cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x);
+ cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y);
+
+ if (rc.texture_enable)
+ {
+ const u32 texcoord_and_palette = FifoPop();
+ cmd->palette.bits = Truncate16(texcoord_and_palette >> 16);
+ cmd->texcoord = Truncate16(texcoord_and_palette);
+ }
+ else
+ {
+ cmd->palette.bits = 0;
+ cmd->texcoord = 0;
+ }
- u32 width;
- u32 height;
switch (rc.rectangle_size)
{
case GPUDrawRectangleSize::R1x1:
- width = 1;
- height = 1;
+ cmd->width = 1;
+ cmd->height = 1;
break;
case GPUDrawRectangleSize::R8x8:
- width = 8;
- height = 8;
+ cmd->width = 8;
+ cmd->height = 8;
break;
case GPUDrawRectangleSize::R16x16:
- width = 16;
- height = 16;
+ cmd->width = 16;
+ cmd->height = 16;
break;
default:
{
const u32 width_and_height = FifoPop();
- width = static_cast(width_and_height & VRAM_WIDTH_MASK);
- height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK);
+ cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK);
+ cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK);
- if (width >= MAX_PRIMITIVE_WIDTH || height >= MAX_PRIMITIVE_HEIGHT)
+ if (cmd->width >= MAX_PRIMITIVE_WIDTH || cmd->height >= MAX_PRIMITIVE_HEIGHT)
{
- Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", vp.x.GetValue(), vp.y.GetValue(), width,
- height);
+ Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", cmd->x, cmd->y, cmd->width, cmd->height);
return;
}
}
@@ -622,51 +704,123 @@ void GPU_SW::DispatchRenderCommand()
if (!IsDrawingAreaIsValid())
return;
- const DrawRectangleFunction DrawFunction =
- GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
+ const u32 clip_left = static_cast(std::clamp(cmd->x, m_drawing_area.left, m_drawing_area.right));
+ const u32 clip_right =
+ static_cast(std::clamp(cmd->x + cmd->width, m_drawing_area.left, m_drawing_area.right)) + 1u;
+ const u32 clip_top = static_cast(std::clamp(cmd->y, m_drawing_area.top, m_drawing_area.bottom));
+ const u32 clip_bottom =
+ static_cast(std::clamp(cmd->y + cmd->height, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
- (this->*DrawFunction)(vp.x, vp.y, width, height, r, g, b, texcoord_x, texcoord_y);
+ // cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
+ AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable);
+
+ m_backend.PushCommand(cmd);
}
break;
case GPUPrimitive::Line:
{
- const u32 first_color = rc.color_for_first_vertex;
- const bool shaded = rc.shading_enable;
-
- const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable);
-
- std::array vertices = {};
- u32 buffer_pos = 0;
-
- // first vertex
- SWVertex* p0 = &vertices[0];
- SWVertex* p1 = &vertices[1];
- p0->SetPosition(GPUVertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())},
- m_drawing_offset.x, m_drawing_offset.y);
- p0->SetColorRGB24(first_color);
-
- // remaining vertices in line strip
- const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2;
- for (u32 i = 1; i < num_vertices; i++)
+ if (!rc.polyline)
{
- if (rc.polyline)
+ GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(2);
+ FillDrawCommand(cmd, rc);
+ cmd->palette.bits = 0;
+
+ if (rc.shading_enable)
{
- p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color);
- p1->SetPosition(GPUVertexPosition{m_blit_buffer[buffer_pos++]}, m_drawing_offset.x, m_drawing_offset.y);
+ cmd->vertices[0].color = rc.color_for_first_vertex;
+ const GPUVertexPosition start_pos{FifoPop()};
+ cmd->vertices[0].x = m_drawing_offset.x + start_pos.x;
+ cmd->vertices[0].y = m_drawing_offset.y + start_pos.y;
+
+ cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF);
+ const GPUVertexPosition end_pos{FifoPop()};
+ cmd->vertices[1].x = m_drawing_offset.x + end_pos.x;
+ cmd->vertices[1].y = m_drawing_offset.y + end_pos.y;
}
else
{
- p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color);
- p1->SetPosition(GPUVertexPosition{Truncate32(FifoPop())}, m_drawing_offset.x, m_drawing_offset.y);
+ cmd->vertices[0].color = rc.color_for_first_vertex;
+ cmd->vertices[1].color = rc.color_for_first_vertex;
+
+ const GPUVertexPosition start_pos{FifoPop()};
+ cmd->vertices[0].x = m_drawing_offset.x + start_pos.x;
+ cmd->vertices[0].y = m_drawing_offset.y + start_pos.y;
+
+ const GPUVertexPosition end_pos{FifoPop()};
+ cmd->vertices[1].x = m_drawing_offset.x + end_pos.x;
+ cmd->vertices[1].y = m_drawing_offset.y + end_pos.y;
}
- // down here because of the FIFO pops
- if (IsDrawingAreaIsValid())
- (this->*DrawFunction)(p0, p1);
+ if (!IsDrawingAreaIsValid())
+ return;
- // swap p0/p1 so that the last vertex is used as the first for the next line
- std::swap(p0, p1);
+ const auto [min_x, max_x] = MinMax(cmd->vertices[0].x, cmd->vertices[1].x);
+ const auto [min_y, max_y] = MinMax(cmd->vertices[0].y, cmd->vertices[1].y);
+ if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
+ {
+ Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[0].y, cmd->vertices[0].y,
+ cmd->vertices[1].x, cmd->vertices[1].y);
+ return;
+ }
+
+ const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left));
+ const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
+ const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom));
+ const u32 clip_bottom =
+ static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
+ // cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right),
+ // Truncate16(clip_bottom));
+ AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
+
+ m_backend.PushCommand(cmd);
+ }
+ else
+ {
+ const u32 num_vertices = GetPolyLineVertexCount();
+
+ GPUBackendDrawLineCommand* cmd = m_backend.NewDrawLineCommand(num_vertices);
+ FillDrawCommand(cmd, m_render_command);
+
+ u32 buffer_pos = 0;
+ const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]};
+ cmd->vertices[0].x = start_vp.x + m_drawing_offset.x;
+ cmd->vertices[0].y = start_vp.y + m_drawing_offset.y;
+ cmd->vertices[0].color = m_render_command.color_for_first_vertex;
+ // cmd->bounds.SetInvalid();
+
+ const bool shaded = m_render_command.shading_enable;
+ for (u32 i = 1; i < num_vertices; i++)
+ {
+ cmd->vertices[i].color =
+ shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex;
+ const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]};
+ cmd->vertices[i].x = m_drawing_offset.x + vp.x;
+ cmd->vertices[i].y = m_drawing_offset.y + vp.y;
+
+ const auto [min_x, max_x] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y);
+ const auto [min_y, max_y] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y);
+ if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
+ {
+ Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[i - 1].x, cmd->vertices[i - 1].y,
+ cmd->vertices[i].x, cmd->vertices[i].y);
+ }
+ else
+ {
+ const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left));
+ const u32 clip_right =
+ static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
+ const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom));
+ const u32 clip_bottom =
+ static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
+
+ // cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_right), Truncate16(clip_top),
+ // Truncate16(clip_bottom));
+ AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, m_render_command.shading_enable);
+ }
+ }
+
+ m_backend.PushCommand(cmd);
}
}
break;
@@ -677,731 +831,50 @@ void GPU_SW::DispatchRenderCommand()
}
}
-constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
+void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
- DitherLUT lut = {};
- for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++)
- {
- for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++)
- {
- for (s32 value = 0; value < DITHER_LUT_SIZE; value++)
- {
- const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3;
- lut[i][j][value] = static_cast((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
- }
- }
- }
- return lut;
+ m_backend.Sync();
}
-static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT();
-
-template
-void ALWAYS_INLINE_RELEASE GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x,
- u8 texcoord_y)
+void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
- VRAMPixel color;
- bool transparent;
- if constexpr (texture_enable)
- {
- // Apply texture window
- // TODO: Precompute the second half
- texcoord_x = (texcoord_x & m_draw_mode.texture_window_and_x) | m_draw_mode.texture_window_or_x;
- texcoord_y = (texcoord_y & m_draw_mode.texture_window_and_y) | m_draw_mode.texture_window_or_y;
-
- VRAMPixel texture_color;
- switch (m_draw_mode.mode_reg.texture_mode)
- {
- case GPUTextureMode::Palette4Bit:
- {
- const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
- (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
- const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
- texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH,
- m_draw_mode.texture_palette_y);
- }
- break;
-
- case GPUTextureMode::Palette8Bit:
- {
- const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
- (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
- const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
- texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH,
- m_draw_mode.texture_palette_y);
- }
- break;
-
- default:
- {
- texture_color.bits = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x)) % VRAM_WIDTH,
- (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
- }
- break;
- }
-
- if (texture_color.bits == 0)
- return;
-
- transparent = texture_color.c;
-
- if constexpr (raw_texture_enable)
- {
- color.bits = texture_color.bits;
- }
- else
- {
- const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
- const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
-
- color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) |
- (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) |
- (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) |
- (texture_color.bits & 0x8000u);
- }
- }
- else
- {
- transparent = true;
-
- const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
- const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
-
- color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) |
- (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) |
- (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10);
- }
-
- const VRAMPixel bg_color{GetPixel(static_cast(x), static_cast(y))};
- if constexpr (transparency_enable)
- {
- if (transparent)
- {
-#define BLEND_AVERAGE(bg, fg) Truncate8(std::min((ZeroExtend32(bg) / 2) + (ZeroExtend32(fg) / 2), 0x1F))
-#define BLEND_ADD(bg, fg) Truncate8(std::min(ZeroExtend32(bg) + ZeroExtend32(fg), 0x1F))
-#define BLEND_SUBTRACT(bg, fg) Truncate8((bg > fg) ? ((bg) - (fg)) : 0)
-#define BLEND_QUARTER(bg, fg) Truncate8(std::min(ZeroExtend32(bg) + ZeroExtend32(fg / 4), 0x1F))
-
-#define BLEND_RGB(func) \
- color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \
- func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue())
-
- switch (m_draw_mode.mode_reg.transparency_mode)
- {
- case GPUTransparencyMode::HalfBackgroundPlusHalfForeground:
- BLEND_RGB(BLEND_AVERAGE);
- break;
- case GPUTransparencyMode::BackgroundPlusForeground:
- BLEND_RGB(BLEND_ADD);
- break;
- case GPUTransparencyMode::BackgroundMinusForeground:
- BLEND_RGB(BLEND_SUBTRACT);
- break;
- case GPUTransparencyMode::BackgroundPlusQuarterForeground:
- BLEND_RGB(BLEND_QUARTER);
- break;
- default:
- break;
- }
-
-#undef BLEND_RGB
-
-#undef BLEND_QUARTER
-#undef BLEND_SUBTRACT
-#undef BLEND_ADD
-#undef BLEND_AVERAGE
- }
- }
- else
- {
- UNREFERENCED_VARIABLE(transparent);
- }
-
- const u16 mask_and = m_GPUSTAT.GetMaskAND();
- if ((bg_color.bits & mask_and) != 0)
- return;
-
- SetPixel(static_cast(x), static_cast(y), color.bits | m_GPUSTAT.GetMaskOR());
+ GPUBackendFillVRAMCommand* cmd = m_backend.NewFillVRAMCommand();
+ FillBackendCommandParameters(cmd);
+ cmd->x = static_cast(x);
+ cmd->y = static_cast(y);
+ cmd->width = static_cast(width);
+ cmd->height = static_cast(height);
+ cmd->color = color;
+ m_backend.PushCommand(cmd);
}
-template
-void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
- u8 origin_texcoord_y)
+void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
{
- const s32 start_x = TruncateGPUVertexPosition(m_drawing_offset.x + origin_x);
- const s32 start_y = TruncateGPUVertexPosition(m_drawing_offset.y + origin_y);
+ const u32 num_words = width * height;
+ GPUBackendUpdateVRAMCommand* cmd = m_backend.NewUpdateVRAMCommand(num_words);
+ FillBackendCommandParameters(cmd);
+ cmd->x = static_cast(x);
+ cmd->y = static_cast(y);
+ cmd->width = static_cast(width);
+ cmd->height = static_cast(height);
+ std::memcpy(cmd->data, data, sizeof(u16) * num_words);
+ m_backend.PushCommand(cmd);
+}
- {
- const u32 clip_left = static_cast(std::clamp(start_x, m_drawing_area.left, m_drawing_area.right));
- const u32 clip_right =
- static_cast(std::clamp(start_x + static_cast(width), m_drawing_area.left, m_drawing_area.right)) +
- 1u;
- const u32 clip_top = static_cast(std::clamp(start_y, m_drawing_area.top, m_drawing_area.bottom));
- const u32 clip_bottom =
- static_cast(std::clamp(start_y + static_cast(height), m_drawing_area.top, m_drawing_area.bottom)) +
- 1u;
- AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable);
- }
-
- for (u32 offset_y = 0; offset_y < height; offset_y++)
- {
- const s32 y = start_y + static_cast(offset_y);
- if (y < static_cast(m_drawing_area.top) || y > static_cast(m_drawing_area.bottom) ||
- (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast(y) & 1u)))
- {
- continue;
- }
-
- const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
-
- for (u32 offset_x = 0; offset_x < width; offset_x++)
- {
- const s32 x = start_x + static_cast(offset_x);
- if (x < static_cast(m_drawing_area.left) || x > static_cast(m_drawing_area.right))
- continue;
-
- const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
-
- ShadePixel(
- static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y);
- }
- }
+void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
+{
+ GPUBackendCopyVRAMCommand* cmd = m_backend.NewCopyVRAMCommand();
+ FillBackendCommandParameters(cmd);
+ cmd->src_x = static_cast(src_x);
+ cmd->src_y = static_cast(src_y);
+ cmd->dst_x = static_cast(dst_x);
+ cmd->dst_y = static_cast(dst_y);
+ cmd->width = static_cast(width);
+ cmd->height = static_cast(height);
+ m_backend.PushCommand(cmd);
}
std::unique_ptr GPU::CreateSoftwareRenderer()
{
return std::make_unique();
}
-
-//////////////////////////////////////////////////////////////////////////
-// Polygon and line rasterization ported from Mednafen
-//////////////////////////////////////////////////////////////////////////
-
-#define COORD_FBS 12
-#define COORD_MF_INT(n) ((n) << COORD_FBS)
-#define COORD_POST_PADDING 12
-
-static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x)
-{
- return ((u64)x << 32) + ((1ULL << 32) - (1 << 11));
-}
-
-static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy)
-{
- s64 ret;
- s64 dx_ex = (u64)dx << 32;
-
- if (dx_ex < 0)
- dx_ex -= dy - 1;
-
- if (dx_ex > 0)
- dx_ex += dy - 1;
-
- ret = dx_ex / dy;
-
- return (ret);
-}
-
-static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp)
-{
- return (xfp >> 32);
-}
-
-template
-bool ALWAYS_INLINE_RELEASE GPU_SW::CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C)
-{
-#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y)))
-
- s32 denom = CALCIS(x, y);
-
- if (!denom)
- return false;
-
- if constexpr (shading_enable)
- {
- idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
- idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
-
- idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
- idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
-
- idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
- idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
- }
-
- if constexpr (texture_enable)
- {
- idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
- idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
-
- idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
- idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
- }
-
- return true;
-
-#undef CALCIS
-}
-
-template
-void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
-{
- if constexpr (shading_enable)
- {
- ig.r += idl.dr_dx * count;
- ig.g += idl.dg_dx * count;
- ig.b += idl.db_dx * count;
- }
-
- if constexpr (texture_enable)
- {
- ig.u += idl.du_dx * count;
- ig.v += idl.dv_dx * count;
- }
-}
-
-template
-void ALWAYS_INLINE_RELEASE GPU_SW::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
-{
- if constexpr (shading_enable)
- {
- ig.r += idl.dr_dy * count;
- ig.g += idl.dg_dy * count;
- ig.b += idl.db_dy * count;
- }
-
- if constexpr (texture_enable)
- {
- ig.u += idl.du_dy * count;
- ig.v += idl.dv_dy * count;
- }
-}
-
-template
-void GPU_SW::DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl)
-{
- if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast(y) & 1u))
- return;
-
- s32 x_ig_adjust = x_start;
- s32 w = x_bound - x_start;
- s32 x = TruncateGPUVertexPosition(x_start);
-
- if (x < static_cast(m_drawing_area.left))
- {
- s32 delta = static_cast(m_drawing_area.left) - x;
- x_ig_adjust += delta;
- x += delta;
- w -= delta;
- }
-
- if ((x + w) > (static_cast(m_drawing_area.right) + 1))
- w = static_cast(m_drawing_area.right) + 1 - x;
-
- if (w <= 0)
- return;
-
- AddIDeltas_DX(ig, idl, x_ig_adjust);
- AddIDeltas_DY(ig, idl, y);
-
- do
- {
- const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING);
- const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING);
- const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING);
- const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING);
- const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING);
-
- ShadePixel(
- static_cast(x), static_cast(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u), Truncate8(v));
-
- x++;
- AddIDeltas_DX(ig, idl);
- } while (--w > 0);
-}
-
-template
-void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2)
-{
- u32 core_vertex;
- {
- u32 cvtemp = 0;
-
- if (v1->x <= v0->x)
- {
- if (v2->x <= v1->x)
- cvtemp = (1 << 2);
- else
- cvtemp = (1 << 1);
- }
- else if (v2->x < v0->x)
- cvtemp = (1 << 2);
- else
- cvtemp = (1 << 0);
-
- if (v2->y < v1->y)
- {
- std::swap(v2, v1);
- cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
- }
-
- if (v1->y < v0->y)
- {
- std::swap(v1, v0);
- cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4);
- }
-
- if (v2->y < v1->y)
- {
- std::swap(v2, v1);
- cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
- }
-
- core_vertex = cvtemp >> 1;
- }
-
- if (v0->y == v2->y)
- return;
-
- if (static_cast(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
- static_cast(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH ||
- static_cast(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
- static_cast(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT)
- {
- return;
- }
-
- AddDrawTriangleTicks(v0->x, v0->y, v1->x, v1->y, v2->x, v2->y, shading_enable, texture_enable, transparency_enable);
-
- s64 base_coord = MakePolyXFP(v0->x);
- s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y));
- s64 bound_coord_us;
- s64 bound_coord_ls;
- bool right_facing;
-
- if (v1->y == v0->y)
- {
- bound_coord_us = 0;
- right_facing = (bool)(v1->x > v0->x);
- }
- else
- {
- bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y));
- right_facing = (bool)(bound_coord_us > base_step);
- }
-
- if (v2->y == v1->y)
- bound_coord_ls = 0;
- else
- bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y));
-
- i_deltas idl;
- if (!CalcIDeltas(idl, v0, v1, v2))
- return;
-
- const SWVertex* vertices[3] = {v0, v1, v2};
-
- i_group ig;
- if constexpr (texture_enable)
- {
- ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
- ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
- }
-
- ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
- ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
- ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
-
- AddIDeltas_DX(ig, idl, -vertices[core_vertex]->x);
- AddIDeltas_DY(ig, idl, -vertices[core_vertex]->y);
-
- struct TriangleHalf
- {
- u64 x_coord[2];
- u64 x_step[2];
-
- s32 y_coord;
- s32 y_bound;
-
- bool dec_mode;
- } tripart[2];
-
- u32 vo = 0;
- u32 vp = 0;
- if (core_vertex != 0)
- vo = 1;
- if (core_vertex == 2)
- vp = 3;
-
- {
- TriangleHalf* tp = &tripart[vo];
- tp->y_coord = vertices[0 ^ vo]->y;
- tp->y_bound = vertices[1 ^ vo]->y;
- tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x);
- tp->x_step[right_facing] = bound_coord_us;
- tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step);
- tp->x_step[!right_facing] = base_step;
- tp->dec_mode = vo;
- }
-
- {
- TriangleHalf* tp = &tripart[vo ^ 1];
- tp->y_coord = vertices[1 ^ vp]->y;
- tp->y_bound = vertices[2 ^ vp]->y;
- tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x);
- tp->x_step[right_facing] = bound_coord_ls;
- tp->x_coord[!right_facing] =
- base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) *
- base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step);
- tp->x_step[!right_facing] = base_step;
- tp->dec_mode = vp;
- }
-
- for (u32 i = 0; i < 2; i++)
- {
- s32 yi = tripart[i].y_coord;
- s32 yb = tripart[i].y_bound;
-
- u64 lc = tripart[i].x_coord[0];
- u64 ls = tripart[i].x_step[0];
-
- u64 rc = tripart[i].x_coord[1];
- u64 rs = tripart[i].x_step[1];
-
- if (tripart[i].dec_mode)
- {
- while (yi > yb)
- {
- yi--;
- lc -= ls;
- rc -= rs;
-
- s32 y = TruncateGPUVertexPosition(yi);
-
- if (y < static_cast(m_drawing_area.top))
- break;
-
- if (y > static_cast(m_drawing_area.bottom))
- continue;
-
- DrawSpan(
- yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
- }
- }
- else
- {
- while (yi < yb)
- {
- s32 y = TruncateGPUVertexPosition(yi);
-
- if (y > static_cast(m_drawing_area.bottom))
- break;
-
- if (y >= static_cast(m_drawing_area.top))
- {
-
- DrawSpan(
- yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
- }
-
- yi++;
- lc += ls;
- rc += rs;
- }
- }
- }
-}
-
-GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable, bool texture_enable,
- bool raw_texture_enable, bool transparency_enable,
- bool dithering_enable)
-{
-#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \
- &GPU_SW::DrawTriangle
-
- static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = {
- {{{{F(false, false, false, false, false), F(false, false, false, false, true)},
- {F(false, false, false, true, false), F(false, false, false, true, true)}},
- {{F(false, false, true, false, false), F(false, false, true, false, true)},
- {F(false, false, true, true, false), F(false, false, true, true, true)}}},
- {{{F(false, true, false, false, false), F(false, true, false, false, true)},
- {F(false, true, false, true, false), F(false, true, false, true, true)}},
- {{F(false, true, true, false, false), F(false, true, true, false, true)},
- {F(false, true, true, true, false), F(false, true, true, true, true)}}}},
- {{{{F(true, false, false, false, false), F(true, false, false, false, true)},
- {F(true, false, false, true, false), F(true, false, false, true, true)}},
- {{F(true, false, true, false, false), F(true, false, true, false, true)},
- {F(true, false, true, true, false), F(true, false, true, true, true)}}},
- {{{F(true, true, false, false, false), F(true, true, false, false, true)},
- {F(true, true, false, true, false), F(true, true, false, true, true)}},
- {{F(true, true, true, false, false), F(true, true, true, false, true)},
- {F(true, true, true, true, false), F(true, true, true, true, true)}}}}};
-
-#undef F
-
- return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]
- [u8(dithering_enable)];
-}
-
-enum
-{
- Line_XY_FractBits = 32
-};
-enum
-{
- Line_RGB_FractBits = 12
-};
-
-struct line_fxp_coord
-{
- u64 x, y;
- u32 r, g, b;
-};
-
-struct line_fxp_step
-{
- s64 dx_dk, dy_dk;
- s32 dr_dk, dg_dk, db_dk;
-};
-
-static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk)
-{
- delta = (u64)delta << Line_XY_FractBits;
-
- if (delta < 0)
- delta -= dk - 1;
- if (delta > 0)
- delta += dk - 1;
-
- return (delta / dk);
-}
-
-template
-void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
-{
- const s32 i_dx = std::abs(p1->x - p0->x);
- const s32 i_dy = std::abs(p1->y - p0->y);
- const s32 k = (i_dx > i_dy) ? i_dx : i_dy;
- if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT)
- return;
-
- {
- // TODO: Move to base class
- const u32 clip_left =
- static_cast(std::clamp(std::min(p0->x, p1->x), m_drawing_area.left, m_drawing_area.left));
- const u32 clip_right =
- static_cast(std::clamp(std::max(p0->x, p1->x), m_drawing_area.left, m_drawing_area.right)) + 1u;
- const u32 clip_top =
- static_cast(std::clamp(std::min(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom));
- const u32 clip_bottom =
- static_cast(std::clamp(std::max(p0->y, p1->y), m_drawing_area.top, m_drawing_area.bottom)) + 1u;
-
- AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable);
- }
-
- if (p0->x >= p1->x && k > 0)
- std::swap(p0, p1);
-
- line_fxp_step step;
- if (k == 0)
- {
- step.dx_dk = 0;
- step.dy_dk = 0;
-
- if constexpr (shading_enable)
- {
- step.dr_dk = 0;
- step.dg_dk = 0;
- step.db_dk = 0;
- }
- }
- else
- {
- step.dx_dk = LineDivide(p1->x - p0->x, k);
- step.dy_dk = LineDivide(p1->y - p0->y, k);
-
- if constexpr (shading_enable)
- {
- step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k;
- step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k;
- step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k;
- }
- }
-
- line_fxp_coord cur_point;
- cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
- cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
-
- cur_point.x -= 1024;
-
- if (step.dy_dk < 0)
- cur_point.y -= 1024;
-
- if constexpr (shading_enable)
- {
- cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
- cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
- cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
- }
-
- for (s32 i = 0; i <= k; i++)
- {
- // Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain.
- const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047;
- const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047;
-
- if (!IsInterlacedRenderingEnabled() || GetActiveLineLSB() != (static_cast(y) & 1u))
- {
- const u8 r = shading_enable ? static_cast(cur_point.r >> Line_RGB_FractBits) : p0->r;
- const u8 g = shading_enable ? static_cast(cur_point.g >> Line_RGB_FractBits) : p0->g;
- const u8 b = shading_enable ? static_cast(cur_point.b >> Line_RGB_FractBits) : p0->b;
-
- if (x >= static_cast(m_drawing_area.left) && x <= static_cast(m_drawing_area.right) &&
- y >= static_cast(m_drawing_area.top) && y <= static_cast(m_drawing_area.bottom))
- {
- ShadePixel(static_cast(x), static_cast(y), r, g,
- b, 0, 0);
- }
- }
-
- cur_point.x += step.dx_dk;
- cur_point.y += step.dy_dk;
-
- if constexpr (shading_enable)
- {
- cur_point.r += step.dr_dk;
- cur_point.g += step.dg_dk;
- cur_point.b += step.db_dk;
- }
- }
-}
-
-GPU_SW::DrawLineFunction GPU_SW::GetDrawLineFunction(bool shading_enable, bool transparency_enable,
- bool dithering_enable)
-{
-#define F(SHADING, TRANSPARENCY, DITHERING) &GPU_SW::DrawLine
-
- static constexpr DrawLineFunction funcs[2][2][2] = {
- {{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}},
- {{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}};
-
-#undef F
-
- return funcs[u8(shading_enable)][u8(transparency_enable)][u8(dithering_enable)];
-}
-
-GPU_SW::DrawRectangleFunction GPU_SW::GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
- bool transparency_enable)
-{
-#define F(TEXTURE, RAW_TEXTURE, TRANSPARENCY) &GPU_SW::DrawRectangle
-
- static constexpr DrawRectangleFunction funcs[2][2][2] = {
- {{F(false, false, false), F(false, false, true)}, {F(false, true, false), F(false, true, true)}},
- {{F(true, false, false), F(true, false, true)}, {F(true, true, false), F(true, true, true)}}};
-
-#undef F
-
- return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)];
-}
diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h
index 0b9326db4..f9b910154 100644
--- a/src/core/gpu_sw.h
+++ b/src/core/gpu_sw.h
@@ -1,6 +1,7 @@
#pragma once
#include "common/heap_array.h"
#include "gpu.h"
+#include "gpu_sw_backend.h"
#include "host_display.h"
#include
#include
@@ -18,37 +19,14 @@ public:
bool Initialize(HostDisplay* host_display) override;
void Reset() override;
-
- ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
- ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; }
- ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
- ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
-
- // this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
- static constexpr u32 DITHER_LUT_SIZE = 512;
- using DitherLUT = std::array, DITHER_MATRIX_SIZE>, DITHER_MATRIX_SIZE>;
- static constexpr DitherLUT ComputeDitherLUT();
+ void UpdateSettings() override;
protected:
- struct SWVertex
- {
- s32 x, y;
- u8 r, g, b;
- u8 u, v;
+ void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
+ void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
+ void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
+ void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
- ALWAYS_INLINE void SetPosition(GPUVertexPosition p, s32 offset_x, s32 offset_y)
- {
- x = TruncateGPUVertexPosition(offset_x + p.x);
- y = TruncateGPUVertexPosition(offset_y + p.y);
- }
-
- ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(r, g, b) = UnpackColorRGB24(color); }
- ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(u, v) = UnpackTexcoord(value); }
- };
-
- //////////////////////////////////////////////////////////////////////////
- // Scanout
- //////////////////////////////////////////////////////////////////////////
template
void CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field, bool interlaced, bool interleaved);
void CopyOut15Bit(HostDisplayPixelFormat display_format, u32 src_x, u32 src_y, u32 width, u32 height, u32 field,
@@ -63,71 +41,14 @@ protected:
void ClearDisplay() override;
void UpdateDisplay() override;
- //////////////////////////////////////////////////////////////////////////
- // Rasterization
- //////////////////////////////////////////////////////////////////////////
-
void DispatchRenderCommand() override;
- template
- void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y);
+ void FillBackendCommandParameters(GPUBackendCommand* cmd);
+ void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc);
- template
- void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
- u8 origin_texcoord_y);
-
- using DrawRectangleFunction = void (GPU_SW::*)(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b,
- u8 origin_texcoord_x, u8 origin_texcoord_y);
- DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
- bool transparency_enable);
-
- //////////////////////////////////////////////////////////////////////////
- // Polygon and line rasterization ported from Mednafen
- //////////////////////////////////////////////////////////////////////////
- struct i_deltas
- {
- u32 du_dx, dv_dx;
- u32 dr_dx, dg_dx, db_dx;
-
- u32 du_dy, dv_dy;
- u32 dr_dy, dg_dy, db_dy;
- };
-
- struct i_group
- {
- u32 u, v;
- u32 r, g, b;
- };
-
- template
- bool CalcIDeltas(i_deltas& idl, const SWVertex* A, const SWVertex* B, const SWVertex* C);
-
- template
- void AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count = 1);
-
- template
- void AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count = 1);
-
- template
- void DrawSpan(s32 y, s32 x_start, s32 x_bound, i_group ig, const i_deltas& idl);
-
- template
- void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
-
- using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
- DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
- bool transparency_enable, bool dithering_enable);
-
- template
- void DrawLine(const SWVertex* p0, const SWVertex* p1);
-
- using DrawLineFunction = void (GPU_SW::*)(const SWVertex* p0, const SWVertex* p1);
- DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
-
- std::array m_vram;
HeapArray m_display_texture_buffer;
HostDisplayPixelFormat m_16bit_display_format = HostDisplayPixelFormat::RGB565;
HostDisplayPixelFormat m_24bit_display_format = HostDisplayPixelFormat::RGBA8;
+
+ GPU_SW_Backend m_backend;
};
diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp
new file mode 100644
index 000000000..fc4d2d2f6
--- /dev/null
+++ b/src/core/gpu_sw_backend.cpp
@@ -0,0 +1,928 @@
+#include "gpu_sw_backend.h"
+#include "common/assert.h"
+#include "common/log.h"
+#include "gpu_sw_backend.h"
+#include "host_display.h"
+#include "system.h"
+#include
+Log_SetChannel(GPU_SW_Backend);
+
+GPU_SW_Backend::GPU_SW_Backend() : GPUBackend()
+{
+ m_vram.fill(0);
+ m_vram_ptr = m_vram.data();
+}
+
+GPU_SW_Backend::~GPU_SW_Backend() = default;
+
+bool GPU_SW_Backend::Initialize()
+{
+ return GPUBackend::Initialize();
+}
+
+void GPU_SW_Backend::Reset()
+{
+ GPUBackend::Reset();
+
+ m_vram.fill(0);
+}
+
+void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
+{
+ const GPURenderCommand rc{cmd->rc.bits};
+ const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
+
+ const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction(
+ rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable);
+
+ (this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]);
+ if (rc.quad_polygon)
+ (this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]);
+}
+
+void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
+{
+ const GPURenderCommand rc{cmd->rc.bits};
+ const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
+
+ const DrawRectangleFunction DrawFunction =
+ GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
+
+ (this->*DrawFunction)(cmd);
+}
+
+void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd)
+{
+ const DrawLineFunction DrawFunction =
+ GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled());
+
+ for (u16 i = 1; i < cmd->num_vertices; i++)
+ (this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]);
+}
+
+constexpr GPU_SW_Backend::DitherLUT GPU_SW_Backend::ComputeDitherLUT()
+{
+ DitherLUT lut = {};
+ for (u32 i = 0; i < DITHER_MATRIX_SIZE; i++)
+ {
+ for (u32 j = 0; j < DITHER_MATRIX_SIZE; j++)
+ {
+ for (s32 value = 0; value < DITHER_LUT_SIZE; value++)
+ {
+ const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3;
+ lut[i][j][value] = static_cast((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
+ }
+ }
+ }
+ return lut;
+}
+
+static constexpr GPU_SW_Backend::DitherLUT s_dither_lut = GPU_SW_Backend::ComputeDitherLUT();
+
+template
+void ALWAYS_INLINE_RELEASE GPU_SW_Backend::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r,
+ u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
+{
+ VRAMPixel color;
+ bool transparent;
+ if constexpr (texture_enable)
+ {
+ // Apply texture window
+ // TODO: Precompute the second half
+ texcoord_x = (texcoord_x & cmd->window.and_x) | cmd->window.or_x;
+ texcoord_y = (texcoord_y & cmd->window.and_y) | cmd->window.or_y;
+
+ VRAMPixel texture_color;
+ switch (cmd->draw_mode.texture_mode)
+ {
+ case GPUTextureMode::Palette4Bit:
+ {
+ const u16 palette_value =
+ GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
+ (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
+ const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
+
+ const u32 px = (cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH;
+ const u32 py = cmd->palette.GetYBase();
+ texture_color.bits =
+ GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
+ }
+ break;
+
+ case GPUTextureMode::Palette8Bit:
+ {
+ const u16 palette_value =
+ GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
+ (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
+ const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
+ texture_color.bits =
+ GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
+ }
+ break;
+
+ default:
+ {
+ texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH,
+ (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
+ }
+ break;
+ }
+
+ if (texture_color.bits == 0)
+ return;
+
+ transparent = texture_color.c;
+
+ if constexpr (raw_texture_enable)
+ {
+ color.bits = texture_color.bits;
+ }
+ else
+ {
+ const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
+ const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
+
+ color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.r) * u16(color_r)) >> 4]) << 0) |
+ (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.g) * u16(color_g)) >> 4]) << 5) |
+ (ZeroExtend16(s_dither_lut[dither_y][dither_x][(u16(texture_color.b) * u16(color_b)) >> 4]) << 10) |
+ (texture_color.bits & 0x8000u);
+ }
+ }
+ else
+ {
+ transparent = true;
+
+ const u32 dither_y = (dithering_enable) ? (y & 3u) : 2u;
+ const u32 dither_x = (dithering_enable) ? (x & 3u) : 3u;
+
+ color.bits = (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_r]) << 0) |
+ (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_g]) << 5) |
+ (ZeroExtend16(s_dither_lut[dither_y][dither_x][color_b]) << 10);
+ }
+
+ const VRAMPixel bg_color{GetPixel(static_cast(x), static_cast(y))};
+ if constexpr (transparency_enable)
+ {
+ if (transparent)
+ {
+#define BLEND_AVERAGE(bg, fg) Truncate8(std::min((ZeroExtend32(bg) / 2) + (ZeroExtend32(fg) / 2), 0x1F))
+#define BLEND_ADD(bg, fg) Truncate8(std::min(ZeroExtend32(bg) + ZeroExtend32(fg), 0x1F))
+#define BLEND_SUBTRACT(bg, fg) Truncate8((bg > fg) ? ((bg) - (fg)) : 0)
+#define BLEND_QUARTER(bg, fg) Truncate8(std::min(ZeroExtend32(bg) + ZeroExtend32(fg / 4), 0x1F))
+
+#define BLEND_RGB(func) \
+ color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \
+ func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue())
+
+ switch (cmd->draw_mode.transparency_mode)
+ {
+ case GPUTransparencyMode::HalfBackgroundPlusHalfForeground:
+ BLEND_RGB(BLEND_AVERAGE);
+ break;
+ case GPUTransparencyMode::BackgroundPlusForeground:
+ BLEND_RGB(BLEND_ADD);
+ break;
+ case GPUTransparencyMode::BackgroundMinusForeground:
+ BLEND_RGB(BLEND_SUBTRACT);
+ break;
+ case GPUTransparencyMode::BackgroundPlusQuarterForeground:
+ BLEND_RGB(BLEND_QUARTER);
+ break;
+ default:
+ break;
+ }
+
+#undef BLEND_RGB
+
+#undef BLEND_QUARTER
+#undef BLEND_SUBTRACT
+#undef BLEND_ADD
+#undef BLEND_AVERAGE
+ }
+ }
+ else
+ {
+ UNREFERENCED_VARIABLE(transparent);
+ }
+
+ const u16 mask_and = cmd->params.GetMaskAND();
+ if ((bg_color.bits & mask_and) != 0)
+ return;
+
+ SetPixel(static_cast(x), static_cast(y), color.bits | cmd->params.GetMaskOR());
+}
+
+template
+void GPU_SW_Backend::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
+{
+ const s32 origin_x = cmd->x;
+ const s32 origin_y = cmd->y;
+ const auto [r, g, b] = UnpackColorRGB24(cmd->color);
+ const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord);
+
+ for (u32 offset_y = 0; offset_y < cmd->height; offset_y++)
+ {
+ const s32 y = origin_y + static_cast(offset_y);
+ if (y < static_cast(m_drawing_area.top) || y > static_cast(m_drawing_area.bottom) ||
+ (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast(y)) & 1u)))
+ {
+ continue;
+ }
+
+ const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
+
+ for (u32 offset_x = 0; offset_x < cmd->width; offset_x++)
+ {
+ const s32 x = origin_x + static_cast(offset_x);
+ if (x < static_cast(m_drawing_area.left) || x > static_cast(m_drawing_area.right))
+ continue;
+
+ const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
+
+ ShadePixel(
+ cmd, static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y);
+ }
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Polygon and line rasterization ported from Mednafen
+//////////////////////////////////////////////////////////////////////////
+
+#define COORD_FBS 12
+#define COORD_MF_INT(n) ((n) << COORD_FBS)
+#define COORD_POST_PADDING 12
+
+static ALWAYS_INLINE_RELEASE s64 MakePolyXFP(s32 x)
+{
+ return ((u64)x << 32) + ((1ULL << 32) - (1 << 11));
+}
+
+static ALWAYS_INLINE_RELEASE s64 MakePolyXFPStep(s32 dx, s32 dy)
+{
+ s64 ret;
+ s64 dx_ex = (u64)dx << 32;
+
+ if (dx_ex < 0)
+ dx_ex -= dy - 1;
+
+ if (dx_ex > 0)
+ dx_ex += dy - 1;
+
+ ret = dx_ex / dy;
+
+ return (ret);
+}
+
+static ALWAYS_INLINE_RELEASE s32 GetPolyXFP_Int(s64 xfp)
+{
+ return (xfp >> 32);
+}
+
+template
+bool ALWAYS_INLINE_RELEASE GPU_SW_Backend::CalcIDeltas(i_deltas& idl, const GPUBackendDrawPolygonCommand::Vertex* A,
+ const GPUBackendDrawPolygonCommand::Vertex* B,
+ const GPUBackendDrawPolygonCommand::Vertex* C)
+{
+#define CALCIS(x, y) (((B->x - A->x) * (C->y - B->y)) - ((C->x - B->x) * (B->y - A->y)))
+
+ s32 denom = CALCIS(x, y);
+
+ if (!denom)
+ return false;
+
+ if constexpr (shading_enable)
+ {
+ idl.dr_dx = (u32)(CALCIS(r, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+ idl.dr_dy = (u32)(CALCIS(x, r) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+
+ idl.dg_dx = (u32)(CALCIS(g, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+ idl.dg_dy = (u32)(CALCIS(x, g) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+
+ idl.db_dx = (u32)(CALCIS(b, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+ idl.db_dy = (u32)(CALCIS(x, b) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+ }
+
+ if constexpr (texture_enable)
+ {
+ idl.du_dx = (u32)(CALCIS(u, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+ idl.du_dy = (u32)(CALCIS(x, u) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+
+ idl.dv_dx = (u32)(CALCIS(v, y) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+ idl.dv_dy = (u32)(CALCIS(x, v) * (1 << COORD_FBS) / denom) << COORD_POST_PADDING;
+ }
+
+ return true;
+
+#undef CALCIS
+}
+
+template
+void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DX(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
+{
+ if constexpr (shading_enable)
+ {
+ ig.r += idl.dr_dx * count;
+ ig.g += idl.dg_dx * count;
+ ig.b += idl.db_dx * count;
+ }
+
+ if constexpr (texture_enable)
+ {
+ ig.u += idl.du_dx * count;
+ ig.v += idl.dv_dx * count;
+ }
+}
+
+template
+void ALWAYS_INLINE_RELEASE GPU_SW_Backend::AddIDeltas_DY(i_group& ig, const i_deltas& idl, u32 count /*= 1*/)
+{
+ if constexpr (shading_enable)
+ {
+ ig.r += idl.dr_dy * count;
+ ig.g += idl.dg_dy * count;
+ ig.b += idl.db_dy * count;
+ }
+
+ if constexpr (texture_enable)
+ {
+ ig.u += idl.du_dy * count;
+ ig.v += idl.dv_dy * count;
+ }
+}
+
+template
+void GPU_SW_Backend::DrawSpan(const GPUBackendDrawPolygonCommand* cmd, s32 y, s32 x_start, s32 x_bound, i_group ig,
+ const i_deltas& idl)
+{
+ if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast(y)) & 1u))
+ return;
+
+ s32 x_ig_adjust = x_start;
+ s32 w = x_bound - x_start;
+ s32 x = TruncateGPUVertexPosition(x_start);
+
+ if (x < static_cast(m_drawing_area.left))
+ {
+ s32 delta = static_cast(m_drawing_area.left) - x;
+ x_ig_adjust += delta;
+ x += delta;
+ w -= delta;
+ }
+
+ if ((x + w) > (static_cast(m_drawing_area.right) + 1))
+ w = static_cast(m_drawing_area.right) + 1 - x;
+
+ if (w <= 0)
+ return;
+
+ AddIDeltas_DX(ig, idl, x_ig_adjust);
+ AddIDeltas_DY(ig, idl, y);
+
+ do
+ {
+ const u32 r = ig.r >> (COORD_FBS + COORD_POST_PADDING);
+ const u32 g = ig.g >> (COORD_FBS + COORD_POST_PADDING);
+ const u32 b = ig.b >> (COORD_FBS + COORD_POST_PADDING);
+ const u32 u = ig.u >> (COORD_FBS + COORD_POST_PADDING);
+ const u32 v = ig.v >> (COORD_FBS + COORD_POST_PADDING);
+
+ ShadePixel(
+ cmd, static_cast(x), static_cast(y), Truncate8(r), Truncate8(g), Truncate8(b), Truncate8(u),
+ Truncate8(v));
+
+ x++;
+ AddIDeltas_DX(ig, idl);
+ } while (--w > 0);
+}
+
+template
+void GPU_SW_Backend::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd,
+ const GPUBackendDrawPolygonCommand::Vertex* v0,
+ const GPUBackendDrawPolygonCommand::Vertex* v1,
+ const GPUBackendDrawPolygonCommand::Vertex* v2)
+{
+ u32 core_vertex;
+ {
+ u32 cvtemp = 0;
+
+ if (v1->x <= v0->x)
+ {
+ if (v2->x <= v1->x)
+ cvtemp = (1 << 2);
+ else
+ cvtemp = (1 << 1);
+ }
+ else if (v2->x < v0->x)
+ cvtemp = (1 << 2);
+ else
+ cvtemp = (1 << 0);
+
+ if (v2->y < v1->y)
+ {
+ std::swap(v2, v1);
+ cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
+ }
+
+ if (v1->y < v0->y)
+ {
+ std::swap(v1, v0);
+ cvtemp = ((cvtemp >> 1) & 0x1) | ((cvtemp << 1) & 0x2) | (cvtemp & 0x4);
+ }
+
+ if (v2->y < v1->y)
+ {
+ std::swap(v2, v1);
+ cvtemp = ((cvtemp >> 1) & 0x2) | ((cvtemp << 1) & 0x4) | (cvtemp & 0x1);
+ }
+
+ core_vertex = cvtemp >> 1;
+ }
+
+ if (v0->y == v2->y)
+ return;
+
+ if (static_cast(std::abs(v2->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
+ static_cast(std::abs(v2->x - v1->x)) >= MAX_PRIMITIVE_WIDTH ||
+ static_cast(std::abs(v1->x - v0->x)) >= MAX_PRIMITIVE_WIDTH ||
+ static_cast(v2->y - v0->y) >= MAX_PRIMITIVE_HEIGHT)
+ {
+ return;
+ }
+
+ s64 base_coord = MakePolyXFP(v0->x);
+ s64 base_step = MakePolyXFPStep((v2->x - v0->x), (v2->y - v0->y));
+ s64 bound_coord_us;
+ s64 bound_coord_ls;
+ bool right_facing;
+
+ if (v1->y == v0->y)
+ {
+ bound_coord_us = 0;
+ right_facing = (bool)(v1->x > v0->x);
+ }
+ else
+ {
+ bound_coord_us = MakePolyXFPStep((v1->x - v0->x), (v1->y - v0->y));
+ right_facing = (bool)(bound_coord_us > base_step);
+ }
+
+ if (v2->y == v1->y)
+ bound_coord_ls = 0;
+ else
+ bound_coord_ls = MakePolyXFPStep((v2->x - v1->x), (v2->y - v1->y));
+
+ i_deltas idl;
+ if (!CalcIDeltas(idl, v0, v1, v2))
+ return;
+
+ const GPUBackendDrawPolygonCommand::Vertex* vertices[3] = {v0, v1, v2};
+
+ i_group ig;
+ if constexpr (texture_enable)
+ {
+ ig.u = (COORD_MF_INT(vertices[core_vertex]->u) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+ ig.v = (COORD_MF_INT(vertices[core_vertex]->v) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+ }
+
+ ig.r = (COORD_MF_INT(vertices[core_vertex]->r) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+ ig.g = (COORD_MF_INT(vertices[core_vertex]->g) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+ ig.b = (COORD_MF_INT(vertices[core_vertex]->b) + (1 << (COORD_FBS - 1))) << COORD_POST_PADDING;
+
+ AddIDeltas_DX(ig, idl, -vertices[core_vertex]->x);
+ AddIDeltas_DY(ig, idl, -vertices[core_vertex]->y);
+
+ struct TriangleHalf
+ {
+ u64 x_coord[2];
+ u64 x_step[2];
+
+ s32 y_coord;
+ s32 y_bound;
+
+ bool dec_mode;
+ } tripart[2];
+
+ u32 vo = 0;
+ u32 vp = 0;
+ if (core_vertex != 0)
+ vo = 1;
+ if (core_vertex == 2)
+ vp = 3;
+
+ {
+ TriangleHalf* tp = &tripart[vo];
+ tp->y_coord = vertices[0 ^ vo]->y;
+ tp->y_bound = vertices[1 ^ vo]->y;
+ tp->x_coord[right_facing] = MakePolyXFP(vertices[0 ^ vo]->x);
+ tp->x_step[right_facing] = bound_coord_us;
+ tp->x_coord[!right_facing] = base_coord + ((vertices[vo]->y - vertices[0]->y) * base_step);
+ tp->x_step[!right_facing] = base_step;
+ tp->dec_mode = vo;
+ }
+
+ {
+ TriangleHalf* tp = &tripart[vo ^ 1];
+ tp->y_coord = vertices[1 ^ vp]->y;
+ tp->y_bound = vertices[2 ^ vp]->y;
+ tp->x_coord[right_facing] = MakePolyXFP(vertices[1 ^ vp]->x);
+ tp->x_step[right_facing] = bound_coord_ls;
+ tp->x_coord[!right_facing] =
+ base_coord + ((vertices[1 ^ vp]->y - vertices[0]->y) *
+ base_step); // base_coord + ((vertices[1].y - vertices[0].y) * base_step);
+ tp->x_step[!right_facing] = base_step;
+ tp->dec_mode = vp;
+ }
+
+ for (u32 i = 0; i < 2; i++)
+ {
+ s32 yi = tripart[i].y_coord;
+ s32 yb = tripart[i].y_bound;
+
+ u64 lc = tripart[i].x_coord[0];
+ u64 ls = tripart[i].x_step[0];
+
+ u64 rc = tripart[i].x_coord[1];
+ u64 rs = tripart[i].x_step[1];
+
+ if (tripart[i].dec_mode)
+ {
+ while (yi > yb)
+ {
+ yi--;
+ lc -= ls;
+ rc -= rs;
+
+ s32 y = TruncateGPUVertexPosition(yi);
+
+ if (y < static_cast(m_drawing_area.top))
+ break;
+
+ if (y > static_cast(m_drawing_area.bottom))
+ continue;
+
+ DrawSpan(
+ cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
+ }
+ }
+ else
+ {
+ while (yi < yb)
+ {
+ s32 y = TruncateGPUVertexPosition(yi);
+
+ if (y > static_cast(m_drawing_area.bottom))
+ break;
+
+ if (y >= static_cast(m_drawing_area.top))
+ {
+
+ DrawSpan(
+ cmd, yi, GetPolyXFP_Int(lc), GetPolyXFP_Int(rc), ig, idl);
+ }
+
+ yi++;
+ lc += ls;
+ rc += rs;
+ }
+ }
+ }
+}
+
+GPU_SW_Backend::DrawTriangleFunction GPU_SW_Backend::GetDrawTriangleFunction(bool shading_enable, bool texture_enable,
+ bool raw_texture_enable,
+ bool transparency_enable,
+ bool dithering_enable)
+{
+#define F(SHADING, TEXTURE, RAW_TEXTURE, TRANSPARENCY, DITHERING) \
+ &GPU_SW_Backend::DrawTriangle
+
+ static constexpr DrawTriangleFunction funcs[2][2][2][2][2] = {
+ {{{{F(false, false, false, false, false), F(false, false, false, false, true)},
+ {F(false, false, false, true, false), F(false, false, false, true, true)}},
+ {{F(false, false, true, false, false), F(false, false, true, false, true)},
+ {F(false, false, true, true, false), F(false, false, true, true, true)}}},
+ {{{F(false, true, false, false, false), F(false, true, false, false, true)},
+ {F(false, true, false, true, false), F(false, true, false, true, true)}},
+ {{F(false, true, true, false, false), F(false, true, true, false, true)},
+ {F(false, true, true, true, false), F(false, true, true, true, true)}}}},
+ {{{{F(true, false, false, false, false), F(true, false, false, false, true)},
+ {F(true, false, false, true, false), F(true, false, false, true, true)}},
+ {{F(true, false, true, false, false), F(true, false, true, false, true)},
+ {F(true, false, true, true, false), F(true, false, true, true, true)}}},
+ {{{F(true, true, false, false, false), F(true, true, false, false, true)},
+ {F(true, true, false, true, false), F(true, true, false, true, true)}},
+ {{F(true, true, true, false, false), F(true, true, true, false, true)},
+ {F(true, true, true, true, false), F(true, true, true, true, true)}}}}};
+
+#undef F
+
+ return funcs[u8(shading_enable)][u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]
+ [u8(dithering_enable)];
+}
+
+enum
+{
+ Line_XY_FractBits = 32
+};
+enum
+{
+ Line_RGB_FractBits = 12
+};
+
+struct line_fxp_coord
+{
+ u64 x, y;
+ u32 r, g, b;
+};
+
+struct line_fxp_step
+{
+ s64 dx_dk, dy_dk;
+ s32 dr_dk, dg_dk, db_dk;
+};
+
+static ALWAYS_INLINE_RELEASE s64 LineDivide(s64 delta, s32 dk)
+{
+ delta = (u64)delta << Line_XY_FractBits;
+
+ if (delta < 0)
+ delta -= dk - 1;
+ if (delta > 0)
+ delta += dk - 1;
+
+ return (delta / dk);
+}
+
+template
+void GPU_SW_Backend::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
+ const GPUBackendDrawLineCommand::Vertex* p1)
+{
+ const s32 i_dx = std::abs(p1->x - p0->x);
+ const s32 i_dy = std::abs(p1->y - p0->y);
+ const s32 k = (i_dx > i_dy) ? i_dx : i_dy;
+ if (i_dx >= MAX_PRIMITIVE_WIDTH || i_dy >= MAX_PRIMITIVE_HEIGHT)
+ return;
+
+ if (p0->x >= p1->x && k > 0)
+ std::swap(p0, p1);
+
+ line_fxp_step step;
+ if (k == 0)
+ {
+ step.dx_dk = 0;
+ step.dy_dk = 0;
+
+ if constexpr (shading_enable)
+ {
+ step.dr_dk = 0;
+ step.dg_dk = 0;
+ step.db_dk = 0;
+ }
+ }
+ else
+ {
+ step.dx_dk = LineDivide(p1->x - p0->x, k);
+ step.dy_dk = LineDivide(p1->y - p0->y, k);
+
+ if constexpr (shading_enable)
+ {
+ step.dr_dk = (s32)((u32)(p1->r - p0->r) << Line_RGB_FractBits) / k;
+ step.dg_dk = (s32)((u32)(p1->g - p0->g) << Line_RGB_FractBits) / k;
+ step.db_dk = (s32)((u32)(p1->b - p0->b) << Line_RGB_FractBits) / k;
+ }
+ }
+
+ line_fxp_coord cur_point;
+ cur_point.x = ((u64)p0->x << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
+ cur_point.y = ((u64)p0->y << Line_XY_FractBits) | (1ULL << (Line_XY_FractBits - 1));
+
+ cur_point.x -= 1024;
+
+ if (step.dy_dk < 0)
+ cur_point.y -= 1024;
+
+ if constexpr (shading_enable)
+ {
+ cur_point.r = (p0->r << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
+ cur_point.g = (p0->g << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
+ cur_point.b = (p0->b << Line_RGB_FractBits) | (1 << (Line_RGB_FractBits - 1));
+ }
+
+ for (s32 i = 0; i <= k; i++)
+ {
+ // Sign extension is not necessary here for x and y, due to the maximum values that ClipX1 and ClipY1 can contain.
+ const s32 x = (cur_point.x >> Line_XY_FractBits) & 2047;
+ const s32 y = (cur_point.y >> Line_XY_FractBits) & 2047;
+
+ if ((!cmd->params.interlaced_rendering || cmd->params.active_line_lsb != (Truncate8(static_cast(y)) & 1u)) &&
+ x >= static_cast(m_drawing_area.left) && x <= static_cast