PGXP: Add initial implementation

2025-06-18 08:45:46 -04:00 · 2020-08-02 00:25:07 +10:00
parent 013497cf20
commit 0c1b637549
30 changed files with 1699 additions and 377 deletions
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@ -2,17 +2,25 @@
 #include "common/assert.h"
 #include "common/log.h"
 #include "common/state_wrapper.h"
+#include "cpu_core.h"
+#include "pgxp.h"
 #include "settings.h"
 #include "system.h"
 #include <imgui.h>
 #include <sstream>
 Log_SetChannel(GPU_HW);

-GPU_HW::GPU_HW() : GPU() { m_vram_ptr = m_vram_shadow.data(); }
+GPU_HW::GPU_HW() : GPU()
+{
+  m_vram_ptr = m_vram_shadow.data();
+}

 GPU_HW::~GPU_HW() = default;

-bool GPU_HW::IsHardwareRenderer() const { return true; }
+bool GPU_HW::IsHardwareRenderer() const
+{
+  return true;
+}

 bool GPU_HW::Initialize(HostDisplay* host_display)
 {
@ -110,35 +118,39 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)

  // It might be faster to do more direct checking here, but the code below handles primitives in any order and
  // orientation, and is far more SIMD-friendly if needed.
-  const s32 abx = vertices[1].x - vertices[0].x;
-  const s32 aby = vertices[1].y - vertices[0].y;
-  const s32 bcx = vertices[2].x - vertices[1].x;
-  const s32 bcy = vertices[2].y - vertices[1].y;
-  const s32 cax = vertices[0].x - vertices[2].x;
-  const s32 cay = vertices[0].y - vertices[2].y;
+  const float abx = vertices[1].x - vertices[0].x;
+  const float aby = vertices[1].y - vertices[0].y;
+  const float bcx = vertices[2].x - vertices[1].x;
+  const float bcy = vertices[2].y - vertices[1].y;
+  const float cax = vertices[0].x - vertices[2].x;
+  const float cay = vertices[0].y - vertices[2].y;

  // Compute static derivatives, just assume W is uniform across the primitive and that the plane equation remains the
  // same across the quad. (which it is, there is no Z.. yet).
-  const s32 dudx = -aby * vertices[2].u - bcy * vertices[0].u - cay * vertices[1].u;
-  const s32 dvdx = -aby * vertices[2].v - bcy * vertices[0].v - cay * vertices[1].v;
-  const s32 dudy = +abx * vertices[2].u + bcx * vertices[0].u + cax * vertices[1].u;
-  const s32 dvdy = +abx * vertices[2].v + bcx * vertices[0].v + cax * vertices[1].v;
-  const s32 area = bcx * cay - bcy * cax;
+  const float dudx = -aby * static_cast<float>(vertices[2].u) - bcy * static_cast<float>(vertices[0].u) -
+                     cay * static_cast<float>(vertices[1].u);
+  const float dvdx = -aby * static_cast<float>(vertices[2].v) - bcy * static_cast<float>(vertices[0].v) -
+                     cay * static_cast<float>(vertices[1].v);
+  const float dudy = +abx * static_cast<float>(vertices[2].u) + bcx * static_cast<float>(vertices[0].u) +
+                     cax * static_cast<float>(vertices[1].u);
+  const float dvdy = +abx * static_cast<float>(vertices[2].v) + bcx * static_cast<float>(vertices[0].v) +
+                     cax * static_cast<float>(vertices[1].v);
+  const float area = bcx * cay - bcy * cax;

  // Detect and reject any triangles with 0 size texture area
  const s32 texArea = (vertices[1].u - vertices[0].u) * (vertices[2].v - vertices[0].v) -
                      (vertices[2].u - vertices[0].u) * (vertices[1].v - vertices[0].v);

  // Shouldn't matter as degenerate primitives will be culled anyways.
-  if (area == 0 && texArea == 0)
+  if (area == 0.0f && texArea == 0)
    return;

  // Use floats here as it'll be faster than integer divides.
-  const float rcp_area = 1.0f / static_cast<float>(area);
-  const float dudx_area = static_cast<float>(dudx) * rcp_area;
-  const float dudy_area = static_cast<float>(dudy) * rcp_area;
-  const float dvdx_area = static_cast<float>(dvdx) * rcp_area;
-  const float dvdy_area = static_cast<float>(dvdy) * rcp_area;
+  const float rcp_area = 1.0f / area;
+  const float dudx_area = dudx * rcp_area;
+  const float dudy_area = dudy * rcp_area;
+  const float dvdx_area = dvdx * rcp_area;
+  const float dvdy_area = dvdy * rcp_area;
  const bool neg_dudx = dudx_area < 0.0f;
  const bool neg_dudy = dudy_area < 0.0f;
  const bool neg_dvdx = dvdx_area < 0.0f;
@ -179,22 +191,22 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices)

 // The PlayStation GPU draws lines from start to end, inclusive. Or, more specifically, inclusive of the greatest delta
 // in the x or y direction.
-void GPU_HW::FixLineVertexCoordinates(BatchVertex& start, BatchVertex& end, s32 dx, s32 dy)
+void GPU_HW::FixLineVertexCoordinates(s32& start_x, s32& start_y, s32& end_x, s32& end_y, s32 dx, s32 dy)
 {
  // deliberately not else if to catch the equal case
  if (dx >= dy)
  {
-    if (start.x > end.x)
-      start.x++;
+    if (start_x > end_x)
+      start_x++;
    else
-      end.x++;
+      end_x++;
  }
  if (dx <= dy)
  {
-    if (start.y > end.y)
-      start.y++;
+    if (start_y > end_y)
+      start_y++;
    else
-      end.y++;
+      end_y++;
  }
 }

@ -202,6 +214,7 @@ void GPU_HW::LoadVertices()
 {
  const RenderCommand rc{m_render_command.bits};
  const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
+  const float depth = GetCurrentNormalizedVertexDepth();

  if (m_GPUSTAT.check_mask_before_draw)
    m_current_depth++;
@ -215,17 +228,36 @@ void GPU_HW::LoadVertices()
      const u32 first_color = rc.color_for_first_vertex;
      const bool shaded = rc.shading_enable;
      const bool textured = rc.texture_enable;
+      const bool pgxp = g_settings.gpu_pgxp_enable;

      const u32 num_vertices = rc.quad_polygon ? 4 : 3;
      std::array<BatchVertex, 4> vertices;
+      std::array<std::array<s32, 2>, 4> native_vertex_positions;
+      bool valid_w = g_settings.gpu_pgxp_texture_correction;
      for (u32 i = 0; i < num_vertices; i++)
      {
-        const u32 color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color;
-        const VertexPosition vp{m_fifo.Pop()};
-        const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0;
+        const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
+        const u64 maddr_and_pos = m_fifo.Pop();
+        const VertexPosition vp{Truncate32(maddr_and_pos)};
+        const u16 texcoord = textured ? Truncate16(FifoPop()) : 0;
+        const s32 native_x = m_drawing_offset.x + vp.x;
+        const s32 native_y = m_drawing_offset.y + vp.y;
+        native_vertex_positions[i][0] = native_x;
+        native_vertex_positions[i][1] = native_y;
+        vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage,
+                        texcoord);

-        vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, texpage,
-                        packed_texcoord);
+        if (pgxp)
+        {
+          valid_w &=
+            PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, m_drawing_offset.x,
+                                 m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
+        }
+      }
+      if (!valid_w)
+      {
+        for (BatchVertex& v : vertices)
+          v.w = 1.0f;
      }

      if (rc.quad_polygon && m_resolution_scale > 1)
@ -235,19 +267,20 @@ void GPU_HW::LoadVertices()
        return;

      // Cull polygons which are too large.
-      const s32 min_x_12 = std::min(vertices[1].x, vertices[2].x);
-      const s32 max_x_12 = std::max(vertices[1].x, vertices[2].x);
-      const s32 min_y_12 = std::min(vertices[1].y, vertices[2].y);
-      const s32 max_y_12 = std::max(vertices[1].y, vertices[2].y);
-      const s32 min_x = std::min(min_x_12, vertices[0].x);
-      const s32 max_x = std::max(max_x_12, vertices[0].x);
-      const s32 min_y = std::min(min_y_12, vertices[0].y);
-      const s32 max_y = std::max(max_y_12, vertices[0].y);
+      const s32 min_x_12 = std::min(native_vertex_positions[1][0], native_vertex_positions[2][0]);
+      const s32 max_x_12 = std::max(native_vertex_positions[1][0], native_vertex_positions[2][0]);
+      const s32 min_y_12 = std::min(native_vertex_positions[1][1], native_vertex_positions[2][1]);
+      const s32 max_y_12 = std::max(native_vertex_positions[1][1], native_vertex_positions[2][1]);
+      const s32 min_x = std::min(min_x_12, native_vertex_positions[0][0]);
+      const s32 max_x = std::max(max_x_12, native_vertex_positions[0][0]);
+      const s32 min_y = std::min(min_y_12, native_vertex_positions[0][1]);
+      const s32 max_y = std::max(max_y_12, native_vertex_positions[0][1]);

      if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
      {
-        Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", vertices[0].x, vertices[0].y, vertices[1].x,
-                        vertices[1].y, vertices[2].x, vertices[2].y);
+        Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", native_vertex_positions[0][0],
+                        native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1],
+                        native_vertex_positions[2][0], native_vertex_positions[2][1]);
      }
      else
      {
@ -268,16 +301,17 @@ void GPU_HW::LoadVertices()
      // quads
      if (rc.quad_polygon)
      {
-        const s32 min_x_123 = std::min(min_x_12, vertices[3].x);
-        const s32 max_x_123 = std::max(max_x_12, vertices[3].x);
-        const s32 min_y_123 = std::min(min_y_12, vertices[3].y);
-        const s32 max_y_123 = std::max(max_y_12, vertices[3].y);
+        const s32 min_x_123 = std::min(min_x_12, native_vertex_positions[3][0]);
+        const s32 max_x_123 = std::max(max_x_12, native_vertex_positions[3][0]);
+        const s32 min_y_123 = std::min(min_y_12, native_vertex_positions[3][1]);
+        const s32 max_y_123 = std::max(max_y_12, native_vertex_positions[3][1]);

        // Cull polygons which are too large.
        if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT)
        {
-          Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", vertices[2].x,
-                          vertices[2].y, vertices[1].x, vertices[1].y, vertices[0].x, vertices[0].y);
+          Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d",
+                          native_vertex_positions[2][0], native_vertex_positions[2][1], native_vertex_positions[1][0],
+                          native_vertex_positions[1][1], native_vertex_positions[0][0], native_vertex_positions[0][1]);
        }
        else
        {
@ -303,11 +337,11 @@ void GPU_HW::LoadVertices()
    case Primitive::Rectangle:
    {
      const u32 color = rc.color_for_first_vertex;
-      const VertexPosition vp{m_fifo.Pop()};
+      const VertexPosition vp{FifoPop()};
      const s32 pos_x = TruncateVertexPosition(m_drawing_offset.x + vp.x);
      const s32 pos_y = TruncateVertexPosition(m_drawing_offset.y + vp.y);

-      const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(m_fifo.Pop()) : 0);
+      const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0);
      u16 orig_tex_left = ZeroExtend16(texcoord_x);
      u16 orig_tex_top = ZeroExtend16(texcoord_y);
      s32 rectangle_width;
@ -328,7 +362,7 @@ void GPU_HW::LoadVertices()
          break;
        default:
        {
-          const u32 width_and_height = m_fifo.Pop();
+          const u32 width_and_height = FifoPop();
          rectangle_width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK);
          rectangle_height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK);

@ -353,25 +387,25 @@ void GPU_HW::LoadVertices()
      for (s32 y_offset = 0; y_offset < rectangle_height;)
      {
        const s32 quad_height = std::min<s32>(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top);
-        const s32 quad_start_y = pos_y + y_offset;
-        const s32 quad_end_y = quad_start_y + quad_height;
+        const float quad_start_y = static_cast<float>(pos_y + y_offset);
+        const float quad_end_y = quad_start_y + static_cast<float>(quad_height);
        const u16 tex_bottom = tex_top + static_cast<u16>(quad_height);

        u16 tex_left = orig_tex_left;
        for (s32 x_offset = 0; x_offset < rectangle_width;)
        {
          const s32 quad_width = std::min<s32>(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left);
-          const s32 quad_start_x = pos_x + x_offset;
-          const s32 quad_end_x = quad_start_x + quad_width;
+          const float quad_start_x = static_cast<float>(pos_x + x_offset);
+          const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
          const u16 tex_right = tex_left + static_cast<u16>(quad_width);

-          AddNewVertex(quad_start_x, quad_start_y, m_current_depth, color, texpage, tex_left, tex_top);
-          AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top);
-          AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom);
+          AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top);
+          AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
+          AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);

-          AddNewVertex(quad_start_x, quad_end_y, m_current_depth, color, texpage, tex_left, tex_bottom);
-          AddNewVertex(quad_end_x, quad_start_y, m_current_depth, color, texpage, tex_right, tex_top);
-          AddNewVertex(quad_end_x, quad_end_y, m_current_depth, color, texpage, tex_right, tex_bottom);
+          AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom);
+          AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top);
+          AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom);

          x_offset += quad_width;
          tex_left = 0;
@ -404,41 +438,41 @@ void GPU_HW::LoadVertices()
        if (rc.shading_enable)
        {
          color0 = rc.color_for_first_vertex;
-          pos0.bits = m_fifo.Pop();
-          color1 = m_fifo.Pop() & UINT32_C(0x00FFFFFF);
-          pos1.bits = m_fifo.Pop();
+          pos0.bits = FifoPop();
+          color1 = FifoPop() & UINT32_C(0x00FFFFFF);
+          pos1.bits = FifoPop();
        }
        else
        {
          color0 = color1 = rc.color_for_first_vertex;
-          pos0.bits = m_fifo.Pop();
-          pos1.bits = m_fifo.Pop();
+          pos0.bits = FifoPop();
+          pos1.bits = FifoPop();
        }

        if (!IsDrawingAreaIsValid())
          return;

-        BatchVertex start, end;
-        start.Set(m_drawing_offset.x + pos0.x, m_drawing_offset.y + pos0.y, m_current_depth, color0, 0, 0);
-        end.Set(m_drawing_offset.x + pos1.x, m_drawing_offset.y + pos1.y, m_current_depth, color1, 0, 0);
+        s32 start_x = pos0.x + m_drawing_offset.x;
+        s32 start_y = pos0.y + m_drawing_offset.y;
+        s32 end_x = pos1.x + m_drawing_offset.x;
+        s32 end_y = pos1.y + m_drawing_offset.y;

-        const s32 min_x = std::min(start.x, end.x);
-        const s32 max_x = std::max(start.x, end.x);
-        const s32 min_y = std::min(start.y, end.y);
-        const s32 max_y = std::max(start.y, end.y);
+        const s32 min_x = std::min(start_x, end_x);
+        const s32 max_x = std::max(start_x, end_x);
+        const s32 min_y = std::min(start_y, end_y);
+        const s32 max_y = std::max(start_y, end_y);
        const s32 dx = max_x - min_x;
        const s32 dy = max_y - min_y;
-
        if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT)
        {
-          Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start.x, start.y, end.x, end.y);
+          Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y);
          return;
        }

-        FixLineVertexCoordinates(start, end, dx, dy);
-
-        AddVertex(start);
-        AddVertex(end);
+        FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy);
+        AddNewVertex(static_cast<float>(start_x), static_cast<float>(start_y), depth, 1.0f, color0, 0,
+                     static_cast<u16>(0));
+        AddNewVertex(static_cast<float>(end_x), static_cast<float>(end_y), depth, 1.0f, color1, 0, static_cast<u16>(0));

        const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
        const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
@ -461,37 +495,38 @@ void GPU_HW::LoadVertices()
        const u32 first_color = rc.color_for_first_vertex;
        const bool shaded = rc.shading_enable;

-        BatchVertex last_vertex;
+        s32 last_x, last_y;
+        u32 last_color;
        u32 buffer_pos = 0;
        for (u32 i = 0; i < num_vertices; i++)
        {
          const u32 color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
          const VertexPosition vp{m_blit_buffer[buffer_pos++]};
-
-          BatchVertex vertex;
-          vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, m_current_depth, color, 0, 0);
+          const s32 x = m_drawing_offset.x + vp.x;
+          const s32 y = m_drawing_offset.y + vp.y;

          if (i > 0)
          {
-            const s32 min_x = std::min(last_vertex.x, vertex.x);
-            const s32 max_x = std::max(last_vertex.x, vertex.x);
-            const s32 min_y = std::min(last_vertex.y, vertex.y);
-            const s32 max_y = std::max(last_vertex.y, vertex.y);
+            const s32 min_x = std::min(last_x, x);
+            const s32 max_x = std::max(last_x, x);
+            const s32 min_y = std::min(last_y, y);
+            const s32 max_y = std::max(last_y, y);
            const s32 dx = max_x - min_x;
            const s32 dy = max_y - min_y;

            if (dx >= MAX_PRIMITIVE_WIDTH || dy >= MAX_PRIMITIVE_HEIGHT)
            {
-              Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", last_vertex.x, last_vertex.y, vertex.x,
-                              vertex.y);
+              Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", last_x, last_y, x, y);
            }
            else
            {
-              BatchVertex start(last_vertex);
-              BatchVertex end(vertex);
-              FixLineVertexCoordinates(start, end, dx, dy);
-              AddVertex(start);
-              AddVertex(end);
+              s32 start_x = last_x, start_y = last_y;
+              s32 end_x = x, end_y = y;
+              FixLineVertexCoordinates(start_x, start_y, end_x, end_y, dx, dy);
+              AddNewVertex(static_cast<float>(start_x), static_cast<float>(start_y), depth, 1.0f, last_color, 0,
+                           static_cast<u16>(0));
+              AddNewVertex(static_cast<float>(end_x), static_cast<float>(end_y), depth, 1.0f, color, 0,
+                           static_cast<u16>(0));

              const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
              const u32 clip_right =
@ -505,7 +540,9 @@ void GPU_HW::LoadVertices()
            }
          }

-          std::memcpy(&last_vertex, &vertex, sizeof(BatchVertex));
+          last_x = x;
+          last_y = y;
+          last_color = color;
        }
      }
    }