GPU/HW: Add JINC2 and xBRZ texture filtering options

Shaders ported from beetle-psx.
This commit is contained in:
Connor McLaughlin
2020-09-11 22:20:19 +10:00
parent 83f64dbc87
commit 6f250a4ff7
19 changed files with 651 additions and 103 deletions

View File

@ -6,10 +6,10 @@
Log_SetChannel(GPU_HW_ShaderGen);
GPU_HW_ShaderGen::GPU_HW_ShaderGen(HostDisplay::RenderAPI render_api, u32 resolution_scale, bool true_color,
bool scaled_dithering, bool texture_filtering, bool uv_limits,
bool scaled_dithering, GPUTextureFilter texture_filtering, bool uv_limits,
bool supports_dual_source_blend)
: m_render_api(render_api), m_resolution_scale(resolution_scale), m_true_color(true_color),
m_scaled_dithering(scaled_dithering), m_texture_filering(texture_filtering), m_uv_limits(uv_limits),
m_scaled_dithering(scaled_dithering), m_texture_filter(texture_filtering), m_uv_limits(uv_limits),
m_glsl(render_api != HostDisplay::RenderAPI::D3D11), m_supports_dual_source_blend(supports_dual_source_blend),
m_use_glsl_interface_blocks(false)
{
@ -148,6 +148,8 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define CONSTANT const\n";
ss << "#define VECTOR_EQ(a, b) ((a) == (b))\n";
ss << "#define VECTOR_NEQ(a, b) ((a) != (b))\n";
ss << "#define VECTOR_COMP_EQ(a, b) equal((a), (b))\n";
ss << "#define VECTOR_COMP_NEQ(a, b) notEqual((a), (b))\n";
ss << "#define SAMPLE_TEXTURE(name, coords) texture(name, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) texelFetch(name, coords, mip)\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) texelFetchOffset(name, coords, mip, offset)\n";
@ -160,6 +162,8 @@ void GPU_HW_ShaderGen::WriteHeader(std::stringstream& ss)
ss << "#define CONSTANT static const\n";
ss << "#define VECTOR_EQ(a, b) (all((a) == (b)))\n";
ss << "#define VECTOR_NEQ(a, b) (any((a) != (b)))\n";
ss << "#define VECTOR_COMP_EQ(a, b) ((a) == (b))\n";
ss << "#define VECTOR_COMP_NEQ(a, b) ((a) != (b))\n";
ss << "#define SAMPLE_TEXTURE(name, coords) name.Sample(name##_ss, coords)\n";
ss << "#define LOAD_TEXTURE(name, coords, mip) name.Load(int3(coords, mip))\n";
ss << "#define LOAD_TEXTURE_OFFSET(name, coords, mip, offset) name.Load(int3(coords, mip), offset)\n";
@ -578,6 +582,476 @@ std::string GPU_HW_ShaderGen::GenerateBatchVertexShader(bool textured)
return ss.str();
}
void GPU_HW_ShaderGen::WriteBatchTextureFilter(std::stringstream& ss, GPUTextureFilter texture_filter)
{
// JINC2 and xBRZ shaders originally from beetle-psx, modified to support filtering mask channel.
if (texture_filter == GPUTextureFilter::Bilinear)
{
ss << R"(
void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha)
{
// Compute the coordinates of the four texels we will be interpolating between.
// Clamp this to the triangle texture coordinates.
float2 texel_top_left = frac(coords) - float2(0.5, 0.5);
float2 texel_offset = sign(texel_top_left);
float4 fcoords = max(coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y),
float4(0.0, 0.0, 0.0, 0.0));
// Load four texels.
float4 s00 = SampleFromVRAM(texpage, clamp(fcoords.xy, uv_limits.xy, uv_limits.zw));
float4 s10 = SampleFromVRAM(texpage, clamp(fcoords.zy, uv_limits.xy, uv_limits.zw));
float4 s01 = SampleFromVRAM(texpage, clamp(fcoords.xw, uv_limits.xy, uv_limits.zw));
float4 s11 = SampleFromVRAM(texpage, clamp(fcoords.zw, uv_limits.xy, uv_limits.zw));
// Compute alpha from how many texels aren't pixel color 0000h.
float a00 = float(VECTOR_NEQ(s00, TRANSPARENT_PIXEL_COLOR));
float a10 = float(VECTOR_NEQ(s10, TRANSPARENT_PIXEL_COLOR));
float a01 = float(VECTOR_NEQ(s01, TRANSPARENT_PIXEL_COLOR));
float a11 = float(VECTOR_NEQ(s11, TRANSPARENT_PIXEL_COLOR));
// Bilinearly interpolate.
float2 weights = abs(texel_top_left);
texcol = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y);
ialpha = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y);
// Compensate for partially transparent sampling.
if (ialpha > 0.0)
texcol.rgb /= float3(ialpha, ialpha, ialpha);
}
)";
}
else if (texture_filter == GPUTextureFilter::JINC2)
{
ss << R"(
CONSTANT float JINC2_WINDOW_SINC = 0.44;
CONSTANT float JINC2_SINC = 0.82;
CONSTANT float JINC2_AR_STRENGTH = 0.8;
CONSTANT float halfpi = 1.5707963267948966192313216916398;
CONSTANT float pi = 3.1415926535897932384626433832795;
CONSTANT float wa = 1.382300768;
CONSTANT float wb = 2.576105976;
// Calculates the distance between two points
float d(float2 pt1, float2 pt2)
{
float2 v = pt2 - pt1;
return sqrt(dot(v,v));
}
float min4(float a, float b, float c, float d)
{
return min(a, min(b, min(c, d)));
}
float4 min4(float4 a, float4 b, float4 c, float4 d)
{
return min(a, min(b, min(c, d)));
}
float max4(float a, float b, float c, float d)
{
return max(a, max(b, max(c, d)));
}
float4 max4(float4 a, float4 b, float4 c, float4 d)
{
return max(a, max(b, max(c, d)));
}
float4 resampler(float4 x)
{
float4 res;
// res = (x==float4(0.0, 0.0, 0.0, 0.0)) ? float4(wa*wb) : sin(x*wa)*sin(x*wb)/(x*x);
// Need to use mix(.., equal(..)) since we want zero check to be component wise
res = lerp(sin(x*wa)*sin(x*wb)/(x*x), float4(wa*wb, wa*wb, wa*wb, wa*wb), VECTOR_COMP_EQ(x,float4(0.0, 0.0, 0.0, 0.0)));
return res;
}
void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha)
{
float4 weights[4];
float2 dx = float2(1.0, 0.0);
float2 dy = float2(0.0, 1.0);
float2 pc = coords.xy;
float2 tc = (floor(pc-float2(0.5,0.5))+float2(0.5,0.5));
weights[0] = resampler(float4(d(pc, tc -dx -dy), d(pc, tc -dy), d(pc, tc +dx -dy), d(pc, tc+2.0*dx -dy)));
weights[1] = resampler(float4(d(pc, tc -dx ), d(pc, tc ), d(pc, tc +dx ), d(pc, tc+2.0*dx )));
weights[2] = resampler(float4(d(pc, tc -dx +dy), d(pc, tc +dy), d(pc, tc +dx +dy), d(pc, tc+2.0*dx +dy)));
weights[3] = resampler(float4(d(pc, tc -dx+2.0*dy), d(pc, tc +2.0*dy), d(pc, tc +dx+2.0*dy), d(pc, tc+2.0*dx+2.0*dy)));
dx = dx;
dy = dy;
tc = tc;
#define sample_texel(coords) SampleFromVRAM(texpage, clamp((coords), uv_limits.xy, uv_limits.zw))
float4 c00 = sample_texel(tc -dx -dy);
float a00 = float(VECTOR_NEQ(c00, TRANSPARENT_PIXEL_COLOR));
float4 c10 = sample_texel(tc -dy);
float a10 = float(VECTOR_NEQ(c10, TRANSPARENT_PIXEL_COLOR));
float4 c20 = sample_texel(tc +dx -dy);
float a20 = float(VECTOR_NEQ(c20, TRANSPARENT_PIXEL_COLOR));
float4 c30 = sample_texel(tc+2.0*dx -dy);
float a30 = float(VECTOR_NEQ(c30, TRANSPARENT_PIXEL_COLOR));
float4 c01 = sample_texel(tc -dx );
float a01 = float(VECTOR_NEQ(c01, TRANSPARENT_PIXEL_COLOR));
float4 c11 = sample_texel(tc );
float a11 = float(VECTOR_NEQ(c11, TRANSPARENT_PIXEL_COLOR));
float4 c21 = sample_texel(tc +dx );
float a21 = float(VECTOR_NEQ(c21, TRANSPARENT_PIXEL_COLOR));
float4 c31 = sample_texel(tc+2.0*dx );
float a31 = float(VECTOR_NEQ(c31, TRANSPARENT_PIXEL_COLOR));
float4 c02 = sample_texel(tc -dx +dy);
float a02 = float(VECTOR_NEQ(c02, TRANSPARENT_PIXEL_COLOR));
float4 c12 = sample_texel(tc +dy);
float a12 = float(VECTOR_NEQ(c12, TRANSPARENT_PIXEL_COLOR));
float4 c22 = sample_texel(tc +dx +dy);
float a22 = float(VECTOR_NEQ(c22, TRANSPARENT_PIXEL_COLOR));
float4 c32 = sample_texel(tc+2.0*dx +dy);
float a32 = float(VECTOR_NEQ(c32, TRANSPARENT_PIXEL_COLOR));
float4 c03 = sample_texel(tc -dx+2.0*dy);
float a03 = float(VECTOR_NEQ(c03, TRANSPARENT_PIXEL_COLOR));
float4 c13 = sample_texel(tc +2.0*dy);
float a13 = float(VECTOR_NEQ(c13, TRANSPARENT_PIXEL_COLOR));
float4 c23 = sample_texel(tc +dx+2.0*dy);
float a23 = float(VECTOR_NEQ(c23, TRANSPARENT_PIXEL_COLOR));
float4 c33 = sample_texel(tc+2.0*dx+2.0*dy);
float a33 = float(VECTOR_NEQ(c33, TRANSPARENT_PIXEL_COLOR));
#undef sample_texel
// Get min/max samples
float4 min_sample = min4(c11, c21, c12, c22);
float min_sample_alpha = min4(a11, a21, a12, a22);
float4 max_sample = max4(c11, c21, c12, c22);
float max_sample_alpha = max4(a11, a21, a12, a22);
float4 color;
color = float4(dot(weights[0], float4(c00.x, c10.x, c20.x, c30.x)), dot(weights[0], float4(c00.y, c10.y, c20.y, c30.y)), dot(weights[0], float4(c00.z, c10.z, c20.z, c30.z)), dot(weights[0], float4(c00.w, c10.w, c20.w, c30.w)));
color+= float4(dot(weights[1], float4(c01.x, c11.x, c21.x, c31.x)), dot(weights[1], float4(c01.y, c11.y, c21.y, c31.y)), dot(weights[1], float4(c01.z, c11.z, c21.z, c31.z)), dot(weights[1], float4(c01.w, c11.w, c21.w, c31.w)));
color+= float4(dot(weights[2], float4(c02.x, c12.x, c22.x, c32.x)), dot(weights[2], float4(c02.y, c12.y, c22.y, c32.y)), dot(weights[2], float4(c02.z, c12.z, c22.z, c32.z)), dot(weights[2], float4(c02.w, c12.w, c22.w, c32.w)));
color+= float4(dot(weights[3], float4(c03.x, c13.x, c23.x, c33.x)), dot(weights[3], float4(c03.y, c13.y, c23.y, c33.y)), dot(weights[3], float4(c03.z, c13.z, c23.z, c33.z)), dot(weights[3], float4(c03.w, c13.w, c23.w, c33.w)));
color = color/(dot(weights[0], float4(1,1,1,1)) + dot(weights[1], float4(1,1,1,1)) + dot(weights[2], float4(1,1,1,1)) + dot(weights[3], float4(1,1,1,1)));
float alpha;
alpha = dot(weights[0], float4(a00, a10, a20, a30));
alpha+= dot(weights[1], float4(a01, a11, a21, a31));
alpha+= dot(weights[2], float4(a02, a12, a22, a32));
alpha+= dot(weights[3], float4(a03, a13, a23, a33));
//alpha = alpha/(weights[0].w + weights[1].w + weights[2].w + weights[3].w);
alpha = alpha/(dot(weights[0], float4(1,1,1,1)) + dot(weights[1], float4(1,1,1,1)) + dot(weights[2], float4(1,1,1,1)) + dot(weights[3], float4(1,1,1,1)));
// Anti-ringing
float4 aux = color;
float aux_alpha = alpha;
color = clamp(color, min_sample, max_sample);
alpha = clamp(alpha, min_sample_alpha, max_sample_alpha);
color = lerp(aux, color, JINC2_AR_STRENGTH);
alpha = lerp(aux_alpha, alpha, JINC2_AR_STRENGTH);
// final sum and weight normalization
ialpha = alpha;
texcol = color;
// Compensate for partially transparent sampling.
if (ialpha > 0.0)
texcol.rgb /= float3(ialpha, ialpha, ialpha);
}
)";
}
else if (texture_filter == GPUTextureFilter::xBRZ)
{
ss << R"(
CONSTANT int BLEND_NONE = 0;
CONSTANT int BLEND_NORMAL = 1;
CONSTANT int BLEND_DOMINANT = 2;
CONSTANT float LUMINANCE_WEIGHT = 1.0;
CONSTANT float EQUAL_COLOR_TOLERANCE = 0.1176470588235294;
CONSTANT float STEEP_DIRECTION_THRESHOLD = 2.2;
CONSTANT float DOMINANT_DIRECTION_THRESHOLD = 3.6;
CONSTANT float4 w = float4(0.2627, 0.6780, 0.0593, 0.5);
float DistYCbCr(float4 pixA, float4 pixB)
{
const float scaleB = 0.5 / (1.0 - w.b);
const float scaleR = 0.5 / (1.0 - w.r);
float4 diff = pixA - pixB;
float Y = dot(diff, w);
float Cb = scaleB * (diff.b - Y);
float Cr = scaleR * (diff.r - Y);
return sqrt(((LUMINANCE_WEIGHT * Y) * (LUMINANCE_WEIGHT * Y)) + (Cb * Cb) + (Cr * Cr));
}
bool IsPixEqual(const float4 pixA, const float4 pixB)
{
return (DistYCbCr(pixA, pixB) < EQUAL_COLOR_TOLERANCE);
}
float get_left_ratio(float2 center, float2 origin, float2 direction, float2 scale)
{
float2 P0 = center - origin;
float2 proj = direction * (dot(P0, direction) / dot(direction, direction));
float2 distv = P0 - proj;
float2 orth = float2(-direction.y, direction.x);
float side = sign(dot(P0, orth));
float v = side * length(distv * scale);
// return step(0, v);
return smoothstep(-sqrt(2.0)/2.0, sqrt(2.0)/2.0, v);
}
#define P(coord, xoffs, yoffs) SampleFromVRAM(texpage, clamp(coords + float2((xoffs), (yoffs)), uv_limits.xy, uv_limits.zw))
void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha)
{
//---------------------------------------
// Input Pixel Mapping: -|x|x|x|-
// x|A|B|C|x
// x|D|E|F|x
// x|G|H|I|x
// -|x|x|x|-
float2 scale = float2(8.0, 8.0);
float2 pos = frac(coords.xy) - float2(0.5, 0.5);
float2 coord = coords.xy - pos;
float4 A = P(coord, -1,-1);
float Aw = A.w;
A.w = float(VECTOR_NEQ(A, TRANSPARENT_PIXEL_COLOR));
float4 B = P(coord, 0,-1);
float Bw = B.w;
B.w = float(VECTOR_NEQ(B, TRANSPARENT_PIXEL_COLOR));
float4 C = P(coord, 1,-1);
float Cw = C.w;
C.w = float(VECTOR_NEQ(C, TRANSPARENT_PIXEL_COLOR));
float4 D = P(coord, -1, 0);
float Dw = D.w;
D.w = float(VECTOR_NEQ(D, TRANSPARENT_PIXEL_COLOR));
float4 E = P(coord, 0, 0);
float Ew = E.w;
E.w = float(VECTOR_NEQ(E, TRANSPARENT_PIXEL_COLOR));
float4 F = P(coord, 1, 0);
float Fw = F.w;
F.w = float(VECTOR_NEQ(F, TRANSPARENT_PIXEL_COLOR));
float4 G = P(coord, -1, 1);
float Gw = G.w;
G.w = float(VECTOR_NEQ(G, TRANSPARENT_PIXEL_COLOR));
float4 H = P(coord, 0, 1);
float Hw = H.w;
H.w = float(VECTOR_NEQ(H, TRANSPARENT_PIXEL_COLOR));
float4 I = P(coord, 1, 1);
float Iw = I.w;
I.w = float(VECTOR_NEQ(H, TRANSPARENT_PIXEL_COLOR));
// blendResult Mapping: x|y|
// w|z|
int4 blendResult = int4(BLEND_NONE,BLEND_NONE,BLEND_NONE,BLEND_NONE);
// Preprocess corners
// Pixel Tap Mapping: -|-|-|-|-
// -|-|B|C|-
// -|D|E|F|x
// -|G|H|I|x
// -|-|x|x|-
if (!((VECTOR_EQ(E,F) && VECTOR_EQ(H,I)) || (VECTOR_EQ(E,H) && VECTOR_EQ(F,I))))
{
float dist_H_F = DistYCbCr(G, E) + DistYCbCr(E, C) + DistYCbCr(P(coord, 0,2), I) + DistYCbCr(I, P(coord, 2,0)) + (4.0 * DistYCbCr(H, F));
float dist_E_I = DistYCbCr(D, H) + DistYCbCr(H, P(coord, 1,2)) + DistYCbCr(B, F) + DistYCbCr(F, P(coord, 2,1)) + (4.0 * DistYCbCr(E, I));
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_H_F) < dist_E_I;
blendResult.z = ((dist_H_F < dist_E_I) && VECTOR_NEQ(E,F) && VECTOR_NEQ(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
}
// Pixel Tap Mapping: -|-|-|-|-
// -|A|B|-|-
// x|D|E|F|-
// x|G|H|I|-
// -|x|x|-|-
if (!((VECTOR_EQ(D,E) && VECTOR_EQ(G,H)) || (VECTOR_EQ(D,G) && VECTOR_EQ(E,H))))
{
float dist_G_E = DistYCbCr(P(coord, -2,1) , D) + DistYCbCr(D, B) + DistYCbCr(P(coord, -1,2), H) + DistYCbCr(H, F) + (4.0 * DistYCbCr(G, E));
float dist_D_H = DistYCbCr(P(coord, -2,0) , G) + DistYCbCr(G, P(coord, 0,2)) + DistYCbCr(A, E) + DistYCbCr(E, I) + (4.0 * DistYCbCr(D, H));
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_H) < dist_G_E;
blendResult.w = ((dist_G_E > dist_D_H) && VECTOR_NEQ(E,D) && VECTOR_NEQ(E,H)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
}
// Pixel Tap Mapping: -|-|x|x|-
// -|A|B|C|x
// -|D|E|F|x
// -|-|H|I|-
// -|-|-|-|-
if (!((VECTOR_EQ(B,C) && VECTOR_EQ(E,F)) || (VECTOR_EQ(B,E) && VECTOR_EQ(C,F))))
{
float dist_E_C = DistYCbCr(D, B) + DistYCbCr(B, P(coord, 1,-2)) + DistYCbCr(H, F) + DistYCbCr(F, P(coord, 2,-1)) + (4.0 * DistYCbCr(E, C));
float dist_B_F = DistYCbCr(A, E) + DistYCbCr(E, I) + DistYCbCr(P(coord, 0,-2), C) + DistYCbCr(C, P(coord, 2,0)) + (4.0 * DistYCbCr(B, F));
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_B_F) < dist_E_C;
blendResult.y = ((dist_E_C > dist_B_F) && VECTOR_NEQ(E,B) && VECTOR_NEQ(E,F)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
}
// Pixel Tap Mapping: -|x|x|-|-
// x|A|B|C|-
// x|D|E|F|-
// -|G|H|-|-
// -|-|-|-|-
if (!((VECTOR_EQ(A,B) && VECTOR_EQ(D,E)) || (VECTOR_EQ(A,D) && VECTOR_EQ(B,E))))
{
float dist_D_B = DistYCbCr(P(coord, -2,0), A) + DistYCbCr(A, P(coord, 0,-2)) + DistYCbCr(G, E) + DistYCbCr(E, C) + (4.0 * DistYCbCr(D, B));
float dist_A_E = DistYCbCr(P(coord, -2,-1), D) + DistYCbCr(D, H) + DistYCbCr(P(coord, -1,-2), B) + DistYCbCr(B, F) + (4.0 * DistYCbCr(A, E));
bool dominantGradient = (DOMINANT_DIRECTION_THRESHOLD * dist_D_B) < dist_A_E;
blendResult.x = ((dist_D_B < dist_A_E) && VECTOR_NEQ(E,D) && VECTOR_NEQ(E,B)) ? ((dominantGradient) ? BLEND_DOMINANT : BLEND_NORMAL) : BLEND_NONE;
}
float4 res = E;
float resW = Ew;
// Pixel Tap Mapping: -|-|-|-|-
// -|-|B|C|-
// -|D|E|F|x
// -|G|H|I|x
// -|-|x|x|-
if(blendResult.z != BLEND_NONE)
{
float dist_F_G = DistYCbCr(F, G);
float dist_H_C = DistYCbCr(H, C);
bool doLineBlend = (blendResult.z == BLEND_DOMINANT ||
!((blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) || (blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) ||
(IsPixEqual(G, H) && IsPixEqual(H, I) && IsPixEqual(I, F) && IsPixEqual(F, C) && !IsPixEqual(E, I))));
float2 origin = float2(0.0, 1.0 / sqrt(2.0));
float2 direction = float2(1.0, -1.0);
if(doLineBlend)
{
bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_F_G <= dist_H_C) && VECTOR_NEQ(E,G) && VECTOR_NEQ(D,G);
bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_H_C <= dist_F_G) && VECTOR_NEQ(E,C) && VECTOR_NEQ(B,C);
origin = haveShallowLine? float2(0.0, 0.25) : float2(0.0, 0.5);
direction.x += haveShallowLine? 1.0: 0.0;
direction.y -= haveSteepLine? 1.0: 0.0;
}
float4 blendPix = lerp(H,F, step(DistYCbCr(E, F), DistYCbCr(E, H)));
float blendW = lerp(Hw,Fw, step(DistYCbCr(E, F), DistYCbCr(E, H)));
res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale));
resW = lerp(resW, blendW, get_left_ratio(pos, origin, direction, scale));
}
// Pixel Tap Mapping: -|-|-|-|-
// -|A|B|-|-
// x|D|E|F|-
// x|G|H|I|-
// -|x|x|-|-
if(blendResult.w != BLEND_NONE)
{
float dist_H_A = DistYCbCr(H, A);
float dist_D_I = DistYCbCr(D, I);
bool doLineBlend = (blendResult.w == BLEND_DOMINANT ||
!((blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) || (blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) ||
(IsPixEqual(A, D) && IsPixEqual(D, G) && IsPixEqual(G, H) && IsPixEqual(H, I) && !IsPixEqual(E, G))));
float2 origin = float2(-1.0 / sqrt(2.0), 0.0);
float2 direction = float2(1.0, 1.0);
if(doLineBlend)
{
bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_H_A <= dist_D_I) && VECTOR_NEQ(E,A) && VECTOR_NEQ(B,A);
bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_D_I <= dist_H_A) && VECTOR_NEQ(E,I) && VECTOR_NEQ(F,I);
origin = haveShallowLine? float2(-0.25, 0.0) : float2(-0.5, 0.0);
direction.y += haveShallowLine? 1.0: 0.0;
direction.x += haveSteepLine? 1.0: 0.0;
}
origin = origin;
direction = direction;
float4 blendPix = lerp(H,D, step(DistYCbCr(E, D), DistYCbCr(E, H)));
float blendW = lerp(Hw,Dw, step(DistYCbCr(E, D), DistYCbCr(E, H)));
res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale));
resW = lerp(resW, blendW, get_left_ratio(pos, origin, direction, scale));
}
// Pixel Tap Mapping: -|-|x|x|-
// -|A|B|C|x
// -|D|E|F|x
// -|-|H|I|-
// -|-|-|-|-
if(blendResult.y != BLEND_NONE)
{
float dist_B_I = DistYCbCr(B, I);
float dist_F_A = DistYCbCr(F, A);
bool doLineBlend = (blendResult.y == BLEND_DOMINANT ||
!((blendResult.x != BLEND_NONE && !IsPixEqual(E, I)) || (blendResult.z != BLEND_NONE && !IsPixEqual(E, A)) ||
(IsPixEqual(I, F) && IsPixEqual(F, C) && IsPixEqual(C, B) && IsPixEqual(B, A) && !IsPixEqual(E, C))));
float2 origin = float2(1.0 / sqrt(2.0), 0.0);
float2 direction = float2(-1.0, -1.0);
if(doLineBlend)
{
bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_B_I <= dist_F_A) && VECTOR_NEQ(E,I) && VECTOR_NEQ(H,I);
bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_F_A <= dist_B_I) && VECTOR_NEQ(E,A) && VECTOR_NEQ(D,A);
origin = haveShallowLine? float2(0.25, 0.0) : float2(0.5, 0.0);
direction.y -= haveShallowLine? 1.0: 0.0;
direction.x -= haveSteepLine? 1.0: 0.0;
}
float4 blendPix = lerp(F,B, step(DistYCbCr(E, B), DistYCbCr(E, F)));
float blendW = lerp(Fw,Bw, step(DistYCbCr(E, B), DistYCbCr(E, F)));
res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale));
resW = lerp(resW, blendW, get_left_ratio(pos, origin, direction, scale));
}
// Pixel Tap Mapping: -|x|x|-|-
// x|A|B|C|-
// x|D|E|F|-
// -|G|H|-|-
// -|-|-|-|-
if(blendResult.x != BLEND_NONE)
{
float dist_D_C = DistYCbCr(D, C);
float dist_B_G = DistYCbCr(B, G);
bool doLineBlend = (blendResult.x == BLEND_DOMINANT ||
!((blendResult.w != BLEND_NONE && !IsPixEqual(E, C)) || (blendResult.y != BLEND_NONE && !IsPixEqual(E, G)) ||
(IsPixEqual(C, B) && IsPixEqual(B, A) && IsPixEqual(A, D) && IsPixEqual(D, G) && !IsPixEqual(E, A))));
float2 origin = float2(0.0, -1.0 / sqrt(2.0));
float2 direction = float2(-1.0, 1.0);
if(doLineBlend)
{
bool haveShallowLine = (STEEP_DIRECTION_THRESHOLD * dist_D_C <= dist_B_G) && VECTOR_NEQ(E,C) && VECTOR_NEQ(F,C);
bool haveSteepLine = (STEEP_DIRECTION_THRESHOLD * dist_B_G <= dist_D_C) && VECTOR_NEQ(E,G) && VECTOR_NEQ(H,G);
origin = haveShallowLine? float2(0.0, -0.25) : float2(0.0, -0.5);
direction.x -= haveShallowLine? 1.0: 0.0;
direction.y += haveSteepLine? 1.0: 0.0;
}
float4 blendPix = lerp(D,B, step(DistYCbCr(E, B), DistYCbCr(E, D)));
float blendW = lerp(Dw,Bw, step(DistYCbCr(E, B), DistYCbCr(E, D)));
res = lerp(res, blendPix, get_left_ratio(pos, origin, direction, scale));
resW = lerp(resW, blendW, get_left_ratio(pos, origin, direction, scale));
}
ialpha = res.w;
texcol = float4(res.xyz, resW);
// Compensate for partially transparent sampling.
if (ialpha > 0.0)
texcol.rgb /= float3(ialpha, ialpha, ialpha);
}
#undef P
)";
}
}
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency,
GPU::TextureMode texture_mode, bool dithering,
bool interlacing)
@ -588,7 +1062,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
const bool use_dual_source =
m_supports_dual_source_blend && ((transparency != GPU_HW::BatchRenderMode::TransparencyDisabled &&
transparency != GPU_HW::BatchRenderMode::OnlyOpaque) ||
m_texture_filering);
m_texture_filter != GPUTextureFilter::Nearest);
std::stringstream ss;
WriteHeader(ss);
@ -606,7 +1080,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering);
DefineMacro(ss, "INTERLACING", interlacing);
DefineMacro(ss, "TRUE_COLOR", m_true_color);
DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filering);
DefineMacro(ss, "TEXTURE_FILTERING", m_texture_filter != GPUTextureFilter::Nearest);
DefineMacro(ss, "UV_LIMITS", m_uv_limits);
DefineMacro(ss, "USE_DUAL_SOURCE", use_dual_source);
@ -708,43 +1182,14 @@ float4 SampleFromVRAM(uint4 texpage, float2 coords)
#endif
}
void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
out float4 texcol, out float ialpha)
{
// Compute the coordinates of the four texels we will be interpolating between.
// Clamp this to the triangle texture coordinates.
float2 texel_top_left = frac(coords) - float2(0.5, 0.5);
float2 texel_offset = sign(texel_top_left);
float4 fcoords = max(coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y),
float4(0.0, 0.0, 0.0, 0.0));
// Load four texels.
float4 s00 = SampleFromVRAM(texpage, clamp(fcoords.xy, uv_limits.xy, uv_limits.zw));
float4 s10 = SampleFromVRAM(texpage, clamp(fcoords.zy, uv_limits.xy, uv_limits.zw));
float4 s01 = SampleFromVRAM(texpage, clamp(fcoords.xw, uv_limits.xy, uv_limits.zw));
float4 s11 = SampleFromVRAM(texpage, clamp(fcoords.zw, uv_limits.xy, uv_limits.zw));
// Compute alpha from how many texels aren't pixel color 0000h.
float a00 = float(VECTOR_NEQ(s00, TRANSPARENT_PIXEL_COLOR));
float a10 = float(VECTOR_NEQ(s10, TRANSPARENT_PIXEL_COLOR));
float a01 = float(VECTOR_NEQ(s01, TRANSPARENT_PIXEL_COLOR));
float a11 = float(VECTOR_NEQ(s11, TRANSPARENT_PIXEL_COLOR));
// Bilinearly interpolate.
float2 weights = abs(texel_top_left);
texcol = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y);
ialpha = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y);
// Compensate for partially transparent sampling.
if (ialpha > 0.0)
texcol.rgb /= float3(ialpha, ialpha, ialpha);
}
#endif
)";
if (textured)
{
if (m_texture_filter != GPUTextureFilter::Nearest)
WriteBatchTextureFilter(ss, m_texture_filter);
if (m_uv_limits)
{
DeclareFragmentEntryPoint(ss, 1, 1,
@ -794,7 +1239,7 @@ void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
float4 texcol;
#if TEXTURE_FILTERING
BilinearSampleFromVRAM(v_texpage, coords, uv_limits, texcol, ialpha);
FilteredSampleFromVRAM(v_texpage, coords, uv_limits, texcol, ialpha);
if (ialpha < 0.5)
discard;
#else
@ -809,7 +1254,7 @@ void BilinearSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
ialpha = 1.0;
#endif
semitransparent = (texcol.a != 0.0);
semitransparent = (texcol.a >= 0.5);
// If not using true color, truncate the framebuffer colors to 5-bit.
#if !TRUE_COLOR