GPU: Add base functionality for GL streaming buffers

This commit is contained in:
Connor McLaughlin
2019-11-02 22:21:56 +10:00
parent c52c0608ae
commit 407fee9ec3
8 changed files with 189 additions and 59 deletions

View File

@ -49,9 +49,9 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Polygon:
{
// if we're drawing quads, we need to create a degenerate triangle to restart the triangle strip
bool restart_strip = (rc.quad_polygon && !m_batch.vertices.empty());
bool restart_strip = (rc.quad_polygon && !IsFlushed());
if (restart_strip)
m_batch.vertices.push_back(m_batch.vertices.back());
AddDuplicateVertex();
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
@ -60,28 +60,15 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
u32 buffer_pos = 1;
for (u32 i = 0; i < num_vertices; i++)
{
HWVertex hw_vert;
hw_vert.color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
hw_vert.x = vp.x;
hw_vert.y = vp.y;
hw_vert.texpage = texpage;
const u16 packed_texcoord = textured ? Truncate16(command_ptr[buffer_pos++]) : 0;
if (textured)
{
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(command_ptr[buffer_pos++]));
hw_vert.texcoord = HWVertex::PackTexcoord(texcoord_x, texcoord_y);
}
else
{
hw_vert.texcoord = 0;
}
(m_batch_current_vertex_ptr++)->Set(vp.x, vp.y, color, texpage, packed_texcoord);
m_batch.vertices.push_back(hw_vert);
if (restart_strip)
{
m_batch.vertices.push_back(m_batch.vertices.back());
AddDuplicateVertex();
restart_strip = false;
}
}
@ -91,9 +78,9 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
case Primitive::Rectangle:
{
// if we're drawing quads, we need to create a degenerate triangle to restart the triangle strip
const bool restart_strip = !m_batch.vertices.empty();
const bool restart_strip = !IsFlushed();
if (restart_strip)
m_batch.vertices.push_back(m_batch.vertices.back());
AddDuplicateVertex();
u32 buffer_pos = 1;
const u32 color = rc.color_for_first_vertex;
@ -132,16 +119,13 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
const u16 tex_right = tex_left + static_cast<u16>(rectangle_width);
const u16 tex_bottom = tex_top + static_cast<u16>(rectangle_height);
m_batch.vertices.push_back(
HWVertex{pos_left, pos_top, color, texpage, HWVertex::PackTexcoord(tex_left, tex_top)});
(m_batch_current_vertex_ptr++)->Set(pos_left, pos_top, color, texpage, tex_left, tex_top);
if (restart_strip)
m_batch.vertices.push_back(m_batch.vertices.back());
m_batch.vertices.push_back(
HWVertex{pos_right, pos_top, color, texpage, HWVertex::PackTexcoord(tex_right, tex_top)});
m_batch.vertices.push_back(
HWVertex{pos_left, pos_bottom, color, texpage, HWVertex::PackTexcoord(tex_left, tex_bottom)});
m_batch.vertices.push_back(
HWVertex{pos_right, pos_bottom, color, texpage, HWVertex::PackTexcoord(tex_right, tex_bottom)});
AddDuplicateVertex();
(m_batch_current_vertex_ptr++)->Set(pos_right, pos_top, color, texpage, tex_right, tex_top);
(m_batch_current_vertex_ptr++)->Set(pos_left, pos_bottom, color, texpage, tex_left, tex_bottom);
(m_batch_current_vertex_ptr++)->Set(pos_right, pos_bottom, color, texpage, tex_right, tex_bottom);
}
break;
@ -155,7 +139,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
{
const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color;
const VertexPosition vp{command_ptr[buffer_pos++]};
m_batch.vertices.push_back(HWVertex{vp.x.GetValue(), vp.y.GetValue(), color});
(m_batch_current_vertex_ptr++)->Set(vp.x, vp.y, color, 0, 0);
}
}
break;
@ -166,6 +150,12 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command
}
}
void GPU_HW::AddDuplicateVertex()
{
std::memcpy(m_batch_current_vertex_ptr, m_batch_current_vertex_ptr - 1, sizeof(HWVertex));
m_batch_current_vertex_ptr++;
}
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
{
*left = m_drawing_area.left * m_resolution_scale;
@ -567,8 +557,6 @@ GPU_HW::HWPrimitive GPU_HW::GetPrimitiveForCommand(RenderCommand rc)
return HWPrimitive::Triangles;
}
void GPU_HW::InvalidateVRAMReadCache() {}
void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr)
{
TextureMode texture_mode;
@ -612,10 +600,10 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
rc.transparency_enable ? m_render_state.transparency_mode : TransparencyMode::Disabled;
const HWPrimitive rc_primitive = GetPrimitiveForCommand(rc);
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
const u32 max_added_vertices = num_vertices + 2;
if (!IsFlushed())
{
const u32 max_added_vertices = num_vertices + 2;
const bool buffer_overflow = (m_batch.vertices.size() + max_added_vertices) >= MAX_BATCH_VERTEX_COUNT;
const bool buffer_overflow = GetBatchVertexSpace() < max_added_vertices;
if (buffer_overflow || rc_primitive == HWPrimitive::LineStrip || m_batch.texture_mode != texture_mode ||
m_batch.transparency_mode != transparency_mode || m_batch.primitive != rc_primitive ||
dithering_enable != m_batch.dithering || m_render_state.IsTexturePageChanged() ||
@ -625,6 +613,10 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32
}
}
// map buffer if it's not already done
if (!m_batch_current_vertex_ptr)
MapBatchVertexPointer(max_added_vertices);
// update state
m_batch.primitive = rc_primitive;
m_batch.texture_mode = texture_mode;

View File

@ -38,13 +38,24 @@ protected:
s32 y;
u32 color;
u32 texpage;
u32 texcoord;
u32 texcoord; // 16-bit texcoords are needed for 256 extent rectangles
// 16-bit texcoords are needed for 256 extent rectangles
static u32 PackTexcoord(u16 x, u16 y) { return ZeroExtend32(x) | (ZeroExtend32(y) << 16); }
ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 packed_texcoord)
{
Set(x_, y_, color_, texpage_, packed_texcoord & 0xFF, (packed_texcoord >> 8));
}
ALWAYS_INLINE void Set(s32 x_, s32 y_, u32 color_, u32 texpage_, u16 texcoord_x, u16 texcoord_y)
{
x = x_;
y = y_;
color = color_;
texpage = texpage_;
texcoord = ZeroExtend32(texcoord_x) | (ZeroExtend32(texcoord_y) << 16);
}
};
struct HWRenderBatch
struct HWBatchConfig
{
u32 texture_page_x;
u32 texture_page_y;
@ -56,8 +67,6 @@ protected:
std::array<u8, 4> texture_window_values;
bool dithering;
std::vector<HWVertex> vertices;
// We need two-pass rendering when using BG-FG blending and texturing, as the transparency can be enabled
// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
bool NeedsTwoPassRendering() const
@ -75,6 +84,7 @@ protected:
};
static constexpr u32 VERTEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr u32 MIN_BATCH_VERTEX_COUNT = 6;
static constexpr u32 MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(HWVertex);
static constexpr u32 TEXTURE_TILE_SIZE = 256;
static constexpr u32 TEXTURE_TILE_X_COUNT = VRAM_WIDTH / TEXTURE_TILE_SIZE;
@ -89,9 +99,14 @@ protected:
static_cast<float>(rgba >> 24) * (1.0f / 255.0f));
}
virtual void InvalidateVRAMReadCache();
virtual void InvalidateVRAMReadCache() = 0;
bool IsFlushed() const { return m_batch.vertices.empty(); }
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
u32 GetBatchVertexSpace() const { return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); }
u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override;
@ -108,7 +123,13 @@ protected:
std::string GenerateFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
HWRenderBatch m_batch = {};
HWBatchConfig m_batch = {};
HWVertex* m_batch_start_vertex_ptr = nullptr;
HWVertex* m_batch_end_vertex_ptr = nullptr;
HWVertex* m_batch_current_vertex_ptr = nullptr;
u32 m_batch_base_vertex = 0;
u32 m_resolution_scale = 1;
u32 m_max_resolution_scale = 1;
bool m_true_color = false;
@ -119,4 +140,5 @@ private:
void GenerateShaderHeader(std::stringstream& ss);
void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr);
void AddDuplicateVertex();
};

View File

@ -62,7 +62,6 @@ void GPU_HW_OpenGL::RestoreGraphicsAPIState()
glLineWidth(static_cast<float>(m_resolution_scale));
UpdateDrawingArea();
glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
glBindVertexArray(m_vao_id);
}
@ -132,6 +131,19 @@ void GPU_HW_OpenGL::InvalidateVRAMReadCache()
m_vram_read_texture_dirty = true;
}
void GPU_HW_OpenGL::MapBatchVertexPointer(u32 required_vertices)
{
Assert(!m_batch_start_vertex_ptr);
const GL::StreamBuffer::MappingResult res =
m_vertex_stream_buffer->Map(sizeof(HWVertex), required_vertices * sizeof(HWVertex));
m_batch_start_vertex_ptr = static_cast<HWVertex*>(res.pointer);
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_batch_end_vertex_ptr = m_batch_start_vertex_ptr + res.space_aligned;
m_batch_base_vertex = res.index_aligned;
}
std::tuple<s32, s32> GPU_HW_OpenGL::ConvertToFramebufferCoordinates(s32 x, s32 y)
{
return std::make_tuple(x, static_cast<s32>(static_cast<s32>(VRAM_HEIGHT) - y));
@ -217,9 +229,11 @@ void GPU_HW_OpenGL::DestroyFramebuffer()
void GPU_HW_OpenGL::CreateVertexBuffer()
{
glGenBuffers(1, &m_vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, nullptr, GL_STREAM_DRAW);
m_vertex_stream_buffer = GL::StreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
if (!m_vertex_stream_buffer)
Panic("Failed to create vertex streaming buffer");
m_vertex_stream_buffer->Bind();
glGenVertexArrays(1, &m_vao_id);
glBindVertexArray(m_vao_id);
@ -638,35 +652,36 @@ void GPU_HW_OpenGL::UpdateVRAMReadTexture()
void GPU_HW_OpenGL::FlushRender()
{
if (m_batch.vertices.empty())
const u32 vertex_count = GetBatchVertexCount();
if (vertex_count == 0)
return;
if (m_vram_read_texture_dirty)
UpdateVRAMReadTexture();
m_stats.num_batches++;
m_stats.num_vertices += static_cast<u32>(m_batch.vertices.size());
m_stats.num_vertices += vertex_count;
Assert((m_batch.vertices.size() * sizeof(HWVertex)) <= VERTEX_BUFFER_SIZE);
glBufferSubData(GL_ARRAY_BUFFER, 0, static_cast<GLsizei>(sizeof(HWVertex) * m_batch.vertices.size()),
m_batch.vertices.data());
m_vertex_stream_buffer->Unmap(vertex_count * sizeof(HWVertex));
m_vertex_stream_buffer->Bind();
m_batch_start_vertex_ptr = nullptr;
m_batch_end_vertex_ptr = nullptr;
m_batch_current_vertex_ptr = nullptr;
static constexpr std::array<GLenum, 4> gl_primitives = {{GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP}};
if (m_batch.NeedsTwoPassRendering())
{
SetDrawState(HWBatchRenderMode::OnlyTransparent);
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, static_cast<GLsizei>(m_batch.vertices.size()));
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
SetDrawState(HWBatchRenderMode::OnlyOpaque);
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, static_cast<GLsizei>(m_batch.vertices.size()));
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
}
else
{
SetDrawState(m_batch.GetRenderMode());
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, static_cast<GLsizei>(m_batch.vertices.size()));
glDrawArrays(gl_primitives[static_cast<u8>(m_batch.primitive)], 0, vertex_count);
}
m_batch.vertices.clear();
}
std::unique_ptr<GPU> GPU::CreateHardwareOpenGLRenderer()

View File

@ -1,5 +1,6 @@
#pragma once
#include "common/gl_program.h"
#include "common/gl_stream_buffer.h"
#include "common/gl_texture.h"
#include "glad.h"
#include "gpu_hw.h"
@ -31,6 +32,7 @@ protected:
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void FlushRender() override;
void InvalidateVRAMReadCache() override;
void MapBatchVertexPointer(u32 required_vertices) override;
private:
struct GLStats
@ -62,7 +64,7 @@ private:
std::unique_ptr<GL::Texture> m_vram_downsample_texture;
std::unique_ptr<GL::Texture> m_display_texture;
GLuint m_vertex_buffer = 0;
std::unique_ptr<GL::StreamBuffer> m_vertex_stream_buffer;
GLuint m_vao_id = 0;
GLuint m_attributeless_vao_id = 0;