Add a new port of crt-royale.fx (#3260)

- A new port of crt-royale. More faithful to original. It uses the same mask textures.
- The only thing not ported is the original geometry pass. It was replaced by geom curvature code.
- It's configured for 1080p displays. 4k displays need to adjust param mask_triad_size_desired from 3.0 to 4.0.

OBS: It's up to you decide if the two versions should be maintained.
This commit is contained in:
Hyllian
2024-07-26 01:33:01 -03:00
committed by GitHub
parent e455a5e371
commit bf1b023f12
34 changed files with 8404 additions and 2 deletions

View File

@ -0,0 +1,97 @@
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
//#define GAMMA_ENCODE_EVERY_FBO
//#define FIRST_PASS
//#define LAST_PASS
//#define SIMULATE_CRT_ON_LCD
//#define SIMULATE_GBA_ON_LCD
//#define SIMULATE_LCD_ON_CRT
//#define SIMULATE_GBA_ON_CRT
////////////////////////////////// INCLUDES //////////////////////////////////
// #included by vertex shader:
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p4
{
float2 blur_dxdy : TEXCOORD1;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Blur9Fast_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p4 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
/* float2 texture_size = 1.0/NormalizedNativePixelSize;
float2 output_size = (ViewportSize*BufferToViewportRatio);
float2 video_size = 1.0/NormalizedNativePixelSize;
*/
// float2 texture_size = float2(320.0, 240.0);
float2 texture_size = HALATION_BLUR_texture_size;
float2 output_size = VIEWPORT_SIZE;
// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
// float2 output_size = float2(320.0, 240.0);
// float2 output_size = 1.0/NormalizedNativePixelSize;
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is horizontal-only, so zero out the vertical offset:
OUT.blur_dxdy = float2(dxdy.x, 0.0);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Blur9Fast_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p4 VAR) : SV_Target
{
float3 color = tex2Dblur9fast(BLUR9FAST_VERTICAL, vTexCoord, VAR.blur_dxdy);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@ -0,0 +1,95 @@
///////////////////////////////// MIT LICENSE ////////////////////////////////
// Copyright (C) 2014 TroggleMonkey
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
//#define GAMMA_ENCODE_EVERY_FBO
//#define FIRST_PASS
//#define LAST_PASS
//#define SIMULATE_CRT_ON_LCD
//#define SIMULATE_GBA_ON_LCD
//#define SIMULATE_LCD_ON_CRT
//#define SIMULATE_GBA_ON_CRT
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p3
{
float2 blur_dxdy : TEXCOORD1;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Blur9Fast_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p3 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
/*
float2 texture_size = 1.0/NormalizedNativePixelSize;
float2 output_size = (ViewportSize*BufferToViewportRatio);
float2 video_size = 1.0/NormalizedNativePixelSize;
*/
// float2 texture_size = float2(320.0, 240.0);
float2 texture_size = BLUR9FAST_VERTICAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
// float2 output_size = VIEWPORT_SIZE/4.0;
// float2 output_size = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
// float2 output_size = 1.0/NormalizedNativePixelSize;
// Get the uv sample distance between output pixels. Blurs are not generic
// Gaussian resizers, and correct blurs require:
// 1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
// 2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
// 3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
// Gaussian resizers would upsize using the distance between input texels
// (not output pixels), but we avoid this and consistently blur at the
// destination size. Otherwise, combining statically calculated weights
// with bilinear sample exploitation would result in terrible artifacts.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is vertical-only, so zero out the horizontal offset:
OUT.blur_dxdy = float2(0.0, dxdy.y);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Blur9Fast_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p3 VAR) : SV_Target
{
float3 color = tex2Dblur9fast(BLOOM_APPROX, vTexCoord, VAR.blur_dxdy);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@ -0,0 +1,363 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
////////////////////////////////// INCLUDES //////////////////////////////////
#define ORIG_LINEARIZEDvideo_size VERTICAL_SCANLINES_texture_size
#define ORIG_LINEARIZEDtexture_size VERTICAL_SCANLINES_video_size
#define bloom_approx_scale_x (4.0/3.0)
static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
#include "../include/scanline-functions.fxh"
#include "../include/bloom-functions.fxh"
/////////////////////////////////// HELPERS //////////////////////////////////
float3 tex2Dresize_gaussian4x4(const sampler2D tex, const float2 tex_uv,
const float2 dxdy, const float2 texture_size, const float2 texture_size_inv,
const float2 tex_uv_to_pixel_scale, const float sigma)
{
// Requires: 1.) All requirements of gamma-management.h must be satisfied!
// 2.) filter_linearN must == "true" in your .cgp preset.
// 3.) mipmap_inputN must == "true" in your .cgp preset if
// IN.output_size << SRC.video_size.
// 4.) dxdy should contain the uv pixel spacing:
// dxdy = max(float2(1.0),
// SRC.video_size/IN.output_size)/SRC.texture_size;
// 5.) texture_size == SRC.texture_size
// 6.) texture_size_inv == float2(1.0)/SRC.texture_size
// 7.) tex_uv_to_pixel_scale == IN.output_size *
// SRC.texture_size / SRC.video_size;
// 8.) sigma is the desired Gaussian standard deviation, in
// terms of output pixels. It should be < ~0.66171875 to
// ensure the first unused sample (outside the 4x4 box) has
// a weight < 1.0/256.0.
// Returns: A true 4x4 Gaussian resize of the input.
// Description:
// Given correct inputs, this Gaussian resizer samples 4 pixel locations
// along each downsized dimension and/or 4 texel locations along each
// upsized dimension. It computes dynamic weights based on the pixel-space
// distance of each sample from the destination pixel. It is arbitrarily
// resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
// TODO: Move this to a more suitable file once there are others like it.
const float denom_inv = 0.5/(sigma*sigma);
// We're taking 4x4 samples, and we're snapping to texels for upsizing.
// Find texture coords for sample 5 (second row, second column):
const float2 curr_texel = tex_uv * texture_size;
const float2 prev_texel =
floor(curr_texel - under_half.xx) + 0.5.xx;
const float2 prev_texel_uv = prev_texel * texture_size_inv;
const float2 snap = float2(dxdy <= texture_size_inv);
const float2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
const float2 sample5_uv = lerp(sample5_downsize_uv, prev_texel_uv, snap);
// Compute texture coords for other samples:
const float2 dx = float2(dxdy.x, 0.0);
const float2 sample0_uv = sample5_uv - dxdy;
const float2 sample10_uv = sample5_uv + dxdy;
const float2 sample15_uv = sample5_uv + 2.0 * dxdy;
const float2 sample1_uv = sample0_uv + dx;
const float2 sample2_uv = sample0_uv + 2.0 * dx;
const float2 sample3_uv = sample0_uv + 3.0 * dx;
const float2 sample4_uv = sample5_uv - dx;
const float2 sample6_uv = sample5_uv + dx;
const float2 sample7_uv = sample5_uv + 2.0 * dx;
const float2 sample8_uv = sample10_uv - 2.0 * dx;
const float2 sample9_uv = sample10_uv - dx;
const float2 sample11_uv = sample10_uv + dx;
const float2 sample12_uv = sample15_uv - 3.0 * dx;
const float2 sample13_uv = sample15_uv - 2.0 * dx;
const float2 sample14_uv = sample15_uv - dx;
// Load each sample:
const float3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
const float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
const float3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
const float3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
const float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
const float3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
const float3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
const float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
const float3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
const float3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
const float3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
const float3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
const float3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
const float3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
const float3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
const float3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
// Compute destination pixel offsets for each sample:
const float2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
const float2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
const float2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
// Compute Gaussian sample weights:
const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
const float weight_sum_inv = 1.0/(
w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
// Weight and sum the samples:
const float3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
return sum * weight_sum_inv;
}
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p2
{
float2 tex_uv : TEXCOORD1;
float2 blur_dxdy : TEXCOORD2;
float2 uv_scanline_step : TEXCOORD3;
float estimated_viewport_size_x : TEXCOORD4;
float2 texture_size_inv : TEXCOORD5;
float2 tex_uv_to_pixel_scale : TEXCOORD6;
float2 output_size : TEXCOORD7;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Approx(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p2 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_APPROX_texture_size;
float2 output_size = VIEWPORT_SIZE;
OUT.output_size = output_size;
// This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
// except we're using a different source image.
const float2 video_uv = texcoord * texture_size/video_size;
OUT.tex_uv = video_uv * ORIG_LINEARIZEDvideo_size /
ORIG_LINEARIZEDtexture_size;
// The last pass (vertical scanlines) had a viewport y scale, so we can
// use it to calculate a better runtime sigma:
// OUT.estimated_viewport_size_x = video_size.y * geom_aspect_ratio_x/geom_aspect_ratio_y;
OUT.estimated_viewport_size_x = video_size.y * texture_size.x/texture_size.y;
// Get the uv sample distance between output pixels. We're using a resize
// blur, so arbitrary upsizing will be acceptable if filter_linearN =
// "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
// "true" too. The blur will be much more accurate if a true 4x4 Gaussian
// resize is used instead of tex2Dblur3x3_resize (which samples between
// texels even for upsizing).
const float2 dxdy_min_scale = ORIG_LINEARIZEDvideo_size/output_size;
const float2 texture_size_inv = 1.0.xx/ORIG_LINEARIZEDtexture_size;
if(bloom_approx_filter > 1.5) // 4x4 true Gaussian resize
{
// For upsizing, we'll snap to texels and sample the nearest 4.
const float2 dxdy_scale = max(dxdy_min_scale, 1.0.xx);
OUT.blur_dxdy = dxdy_scale * texture_size_inv;
}
else
{
const float2 dxdy_scale = dxdy_min_scale;
OUT.blur_dxdy = dxdy_scale * texture_size_inv;
}
// tex2Dresize_gaussian4x4 needs to know a bit more than the other filters:
OUT.tex_uv_to_pixel_scale = output_size *
ORIG_LINEARIZEDtexture_size / ORIG_LINEARIZEDvideo_size;
OUT.texture_size_inv = texture_size_inv;
// Detecting interlacing again here lets us apply convergence offsets in
// this pass. il_step_multiple contains the (texel, scanline) step
// multiple: 1 for progressive, 2 for interlaced.
const float2 orig_video_size = ORIG_LINEARIZEDvideo_size;
const float y_step = 1.0 + float(is_interlaced(orig_video_size.y));
const float2 il_step_multiple = float2(1.0, y_step);
// Get the uv distance between (texels, same-field scanlines):
OUT.uv_scanline_step = il_step_multiple / ORIG_LINEARIZEDtexture_size;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Approx(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p2 VAR) : SV_Target
{
// Would a viewport-relative size work better for this pass? (No.)
// PROS:
// 1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
// write a viewport scale. That number could be used to directly scale
// the viewport-resolution bloom sigma and/or triad size to a smaller
// scale. This way, we could calculate an optimal dynamic sigma no
// matter how the dot pitch is specified.
// CONS:
// 1.) Texel smearing would be much worse at small viewport sizes, but
// performance would be much worse at large viewport sizes, so there
// would be no easy way to calculate a decent scale.
// 2.) Worse, we could no longer get away with using a constant-size blur!
// Instead, we'd have to face all the same difficulties as the real
// phosphor bloom, which requires static #ifdefs to decide the blur
// size based on the expected triad size...a dynamic value.
// 3.) Like the phosphor bloom, we'd have less control over making the blur
// size correct for an optical blur. That said, we likely overblur (to
// maintain brightness) more than the eye would do by itself: 20/20
// human vision distinguishes ~1 arc minute, or 1/60 of a degree. The
// highest viewing angle recommendation I know of is THX's 40.04 degree
// recommendation, at which 20/20 vision can distinguish about 2402.4
// lines. Assuming the "TV lines" definition, that means 1201.2
// distinct light lines and 1201.2 distinct dark lines can be told
// apart, i.e. 1201.2 pairs of lines. This would correspond to 1201.2
// pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
// (if they're alternately lit). That's a max of 800.8 triads. Using
// a more popular 30 degree viewing angle recommendation, 20/20 vision
// can distinguish 1800 lines, or 600 triads of alternately lit
// phosphors. In contrast, we currently blur phosphors all the way
// down to 341.3 triads to ensure full brightness.
// 4.) Realistically speaking, we're usually just going to use bilinear
// filtering in this pass anyway, but it only works well to limit
// bandwidth if it's done at a small constant scale.
// Get the constants we need to sample:
float2 output_size = VAR.output_size;
//const sampler2D Source = ORIG_LINEARIZED;
const float2 tex_uv = VAR.tex_uv;
const float2 blur_dxdy = VAR.blur_dxdy;
const float2 texture_size = ORIG_LINEARIZEDtexture_size;
const float2 texture_size_inv = VAR.texture_size_inv;
const float2 tex_uv_to_pixel_scale = VAR.tex_uv_to_pixel_scale;
float2 tex_uv_r, tex_uv_g, tex_uv_b;
if(beam_misconvergence)
{
const float2 uv_scanline_step = VAR.uv_scanline_step;
const float2 convergence_offsets_r = get_convergence_offsets_r_vector();
const float2 convergence_offsets_g = get_convergence_offsets_g_vector();
const float2 convergence_offsets_b = get_convergence_offsets_b_vector();
tex_uv_r = tex_uv - convergence_offsets_r * uv_scanline_step;
tex_uv_g = tex_uv - convergence_offsets_g * uv_scanline_step;
tex_uv_b = tex_uv - convergence_offsets_b * uv_scanline_step;
}
// Get the blur sigma:
const float bloom_approx_sigma = get_bloom_approx_sigma(output_size.x,
VAR.estimated_viewport_size_x);
// Sample the resized and blurred texture, and apply convergence offsets if
// necessary. Applying convergence offsets here triples our samples from
// 16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
// HALATION_BLUR 3 times at full resolution every time they're used.
float3 color_r, color_g, color_b, color;
if(bloom_approx_filter > 1.5)
{
// Use a 4x4 Gaussian resize. This is slower but technically correct.
if(beam_misconvergence)
{
color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
else
{
color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
blur_dxdy, texture_size, texture_size_inv,
tex_uv_to_pixel_scale, bloom_approx_sigma);
}
}
else if(bloom_approx_filter > 0.5)
{
// Use a 3x3 resize blur. This is the softest option, because we're
// blurring already blurry bilinear samples. It doesn't play quite as
// nicely with convergence offsets, but it has its charms.
if(beam_misconvergence)
{
color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
blur_dxdy, bloom_approx_sigma);
color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
blur_dxdy, bloom_approx_sigma);
color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
blur_dxdy, bloom_approx_sigma);
}
else
{
color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
}
}
else
{
// Use bilinear sampling. This approximates a 4x4 Gaussian resize MUCH
// better than tex2Dblur3x3_resize for the very small sigmas we're
// likely to use at small output resolutions. (This estimate becomes
// too sharp above ~400x300, but the blurs break down above that
// resolution too, unless min_allowed_viewport_triads is high enough to
// keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
if(beam_misconvergence)
{
color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
}
else
{
color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
}
}
// Pack the colors from the red/green/blue beams into a single vector:
if(beam_misconvergence)
{
color = float3(color_r.r, color_g.g, color_b.b);
}
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@ -0,0 +1,129 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/scanline-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p10
{
float2 video_uv : TEXCOORD1;
float2 bloom_dxdy : TEXCOORD2;
float bloom_sigma_runtime : TEXCOORD3;
float2 sinangle : TEXCOORD4;
float2 cosangle : TEXCOORD5;
float3 stretch : TEXCOORD6;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p10 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_HORIZONTAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Screen centering
texcoord = texcoord - float2(centerx,centery)/100.0;
float2 tex_uv = texcoord;
// Our various input textures use different coords:
const float2 video_uv = tex_uv * texture_size/video_size;
OUT.video_uv = video_uv;
// We're horizontally blurring the bloom input (vertically blurred
// brightpass). Get the uv distance between output pixels / input texels
// in the horizontal direction (this pass must NOT resize):
OUT.bloom_dxdy = float2(1.0/texture_size.x, 0.0);
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
// Precalculate a bunch of useful values we'll need in the fragment
// shader.
OUT.sinangle = sin(float2(geom_x_tilt, geom_y_tilt));
OUT.cosangle = cos(float2(geom_x_tilt, geom_y_tilt));
OUT.stretch = maxscale(OUT.sinangle, OUT.cosangle);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p10 VAR) : SV_Target
{
VAR.video_uv = (geom_curvature == true) ? transform(VAR.video_uv, VAR.sinangle, VAR.cosangle, VAR.stretch) : VAR.video_uv;
float cval = corner((VAR.video_uv-0.5.xx) * BufferToViewportRatio + 0.5.xx);
// Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float3 blurred_brightpass = tex2DblurNfast(BLOOM_VERTICAL,
VAR.video_uv, VAR.bloom_dxdy, bloom_sigma);
// Sample the masked scanlines. Alpha contains the auto-dim factor:
const float3 intensity_dim =
tex2D_linearize(MASKED_SCANLINES, VAR.video_uv).rgb;
const float auto_dim_factor = levels_autodim_temp;
const float undim_factor = 1.0/auto_dim_factor;
// Calculate the mask dimpass, add it to the blurred brightpass, and
// undim (from scanline auto-dim) and amplify (from mask dim) the result:
const float mask_amplify = get_mask_amplify();
const float3 brightpass = tex2D_linearize(BRIGHTPASS,
VAR.video_uv).rgb;
const float3 dimpass = intensity_dim - brightpass;
const float3 phosphor_bloom = (dimpass + blurred_brightpass) *
mask_amplify * undim_factor * levels_contrast;
// Sample the halation texture, and let some light bleed into refractive
// diffusion. Conceptually this occurs before the phosphor bloom, but
// adding it in earlier passes causes black crush in the diffusion colors.
const float3 diffusion_color = levels_contrast * tex2D_linearize(
HALATION_BLUR, VAR.video_uv).rgb;
float3 final_bloom = lerp(phosphor_bloom,
diffusion_color, diffusion_weight);
final_bloom = (geom_curvature == true) ? final_bloom * cval.xxx : final_bloom;
final_bloom = pow(final_bloom.rgb, 1.0/get_output_gamma());
// Encode and output the bloomed image:
return encode_output(float4(final_bloom, 1.0));
}

View File

@ -0,0 +1,83 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p9
{
float2 tex_uv : TEXCOORD1;
float2 bloom_dxdy : TEXCOORD2;
float bloom_sigma_runtime : TEXCOORD3;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Bloom_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p9 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 texture_size = BLOOM_VERTICAL_texture_size;
float2 output_size = VIEWPORT_SIZE;
OUT.tex_uv = texcoord;
// Get the uv sample distance between output pixels. Calculate dxdy like
// blurs/vertex-shader-blur-fast-vertical.h.
const float2 dxdy_scale = video_size/output_size;
const float2 dxdy = dxdy_scale/texture_size;
// This blur is vertical-only, so zero out the vertical offset:
OUT.bloom_dxdy = float2(0.0, dxdy.y);
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Bloom_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p9 VAR) : SV_Target
{
// Blur the brightpass horizontally with a 9/17/25/43x blur:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float3 color = tex2DblurNfast(BRIGHTPASS, VAR.tex_uv,
VAR.bloom_dxdy, bloom_sigma);
// Encode and output the blurred image:
return encode_output(float4(color, 1.0));
}

View File

@ -0,0 +1,130 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/gamma-management.fxh"
#include "../include/blur-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/scanline-functions.fxh"
#include "../include/bloom-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p8
{
float2 video_uv : TEXCOORD1;
float2 scanline_tex_uv : TEXCOORD2;
float2 blur3x3_tex_uv : TEXCOORD3;
float bloom_sigma_runtime : TEXCOORD4;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Brightpass(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p8 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = BRIGHTPASS_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Our various input textures use different coords:
const float2 video_uv = tex_uv * texture_size/video_size;
OUT.video_uv = video_uv;
OUT.scanline_tex_uv = video_uv * MASKED_SCANLINES_video_size /
MASKED_SCANLINES_texture_size;
OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size / BLOOM_APPROX_texture_size;
// Calculate a runtime bloom_sigma in case it's needed:
const float mask_tile_size_x = get_resized_mask_tile_size(
output_size, output_size * mask_resize_viewport_scale, false).x;
OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Brightpass(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p8 VAR) : SV_Target
{
// Sample the masked scanlines:
const float3 intensity_dim =
tex2D_linearize(MASKED_SCANLINES, VAR.scanline_tex_uv).rgb;
// Get the full intensity, including auto-undimming, and mask compensation:
const float auto_dim_factor = levels_autodim_temp;
const float undim_factor = 1.0/auto_dim_factor;
const float mask_amplify = get_mask_amplify();
const float3 intensity = intensity_dim * undim_factor * mask_amplify *
levels_contrast;
// Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
// would look like, so we can estimate how much energy we'll receive from
// blooming neighbors:
const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(
BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
// Compute the blur weight for the center texel and the maximum energy we
// expect to receive from neighbors:
const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
const float center_weight = get_center_weight(bloom_sigma);
const float3 max_area_contribution_approx =
max(0.0.xxx, phosphor_blur_approx - center_weight * intensity);
// Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
// because it actually gets better results (on top of being very simple),
// but adjust all intensities for the user's desired underestimate factor:
const float3 area_contrib_underestimate =
bloom_underestimate_levels * max_area_contribution_approx;
const float3 intensity_underestimate =
bloom_underestimate_levels * intensity;
// Calculate the blur_ratio, the ratio of intensity we want to blur:
#ifdef BRIGHTPASS_AREA_BASED
// This area-based version changes blur_ratio more smoothly and blurs
// more, clipping less but offering less phosphor differentiation:
const float3 phosphor_blur_underestimate = bloom_underestimate_levels *
phosphor_blur_approx;
const float3 soft_intensity = max(intensity_underestimate,
phosphor_blur_underestimate * mask_amplify);
const float3 blur_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
soft_intensity - 1.0.xxx) / (center_weight - 1.0);
#else
const float3 blur_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
intensity_underestimate - 1.0.xxx) / (center_weight - 1.0);
#endif
const float3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
// Calculate the brightpass based on the auto-dimmed, unamplified, masked
// scanlines, encode if necessary, and return!
const float3 brightpass = intensity_dim *
lerp(blur_ratio, 1.0.xxx, bloom_excess);
return encode_output(float4(brightpass, 1.0));
}

View File

@ -0,0 +1,109 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
// PASS SETTINGS:
// gamma-management.h needs to know what kind of pipeline we're using and
// what pass this is in that pipeline. This will become obsolete if/when we
// can #define things like this in the .cgp preset file.
#define FIRST_PASS
#define SIMULATE_CRT_ON_LCD
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/gamma-management.fxh"
#include "../include/scanline-functions.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex
{
float2 tex_uv : TEXCOORD1;
float2 uv_step : TEXCOORD2;
float interlaced : TEXCOORD3;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Linearize(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
OUT.tex_uv = texcoord;
// OUT.tex_uv = (floor(texcoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
// Save the uv distance between texels:
OUT.uv_step = NormalizedNativePixelSize;
// Detect interlacing: 1.0 = true, 0.0 = false.
OUT.interlaced = is_interlaced(1.0/NormalizedNativePixelSize.y);
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
#define input_texture sBackBuffer
float4 PS_Linearize(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex VAR) : SV_Target
{
// Linearize the input based on CRT gamma and bob interlaced fields.
// Bobbing ensures we can immediately blur without getting artifacts.
// Note: TFF/BFF won't matter for sources that double-weave or similar.
// VAR.tex_uv = (floor(VAR.tex_uv / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
if(interlace_detect)
{
// Sample the current line and an average of the previous/next line;
// tex2D_linearize will decode CRT gamma. Don't bother branching:
const float2 tex_uv = VAR.tex_uv;
const float2 v_step = float2(0.0, VAR.uv_step.y);
const float3 curr_line = tex2D_linearize_first(
input_texture, tex_uv).rgb;
const float3 last_line = tex2D_linearize_first(
input_texture, tex_uv - v_step).rgb;
const float3 next_line = tex2D_linearize_first(
input_texture, tex_uv + v_step).rgb;
const float3 interpolated_line = 0.5 * (last_line + next_line);
// If we're interlacing, determine which field curr_line is in:
const float modulus = VAR.interlaced + 1.0;
const float field_offset =
fmod(FrameCount + float(interlace_bff), modulus);
const float curr_line_texel = tex_uv.y / NormalizedNativePixelSize.y;
// Use under_half to fix a rounding bug around exact texel locations.
const float line_num_last = floor(curr_line_texel - under_half);
const float wrong_field = fmod(line_num_last + field_offset, modulus);
// Select the correct color, and output the result:
const float3 color = lerp(curr_line, interpolated_line, wrong_field);
return encode_output(float4(color, 1.0));
}
else
{
return encode_output(tex2D_linearize_first(input_texture, VAR.tex_uv));
}
}

View File

@ -0,0 +1,130 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p6
{
float2 src_tex_uv_wrap : TEXCOORD1;
float2 tile_uv_wrap : TEXCOORD2;
float2 resize_magnification_scale : TEXCOORD3;
float2 src_dxdy : TEXCOORD4;
float2 tile_size_uv : TEXCOORD5;
float2 input_tiles_per_texture : TEXCOORD6;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Mask_Resize_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p6 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASK_RESIZE_texture_size;
float2 output_size = 0.0625*(VIEWPORT_SIZE);
// First estimate the viewport size (the user will get the wrong number of
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
const float2 estimated_viewport_size =
output_size / mask_resize_viewport_scale;
// Find the final size of our resized phosphor mask tiles. We probably
// estimated the viewport size and MASK_RESIZE output size differently last
// pass, so do not swear they were the same. ;)
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
estimated_viewport_size, output_size, false);
// We'll render resized tiles until filling the output FBO or meeting a
// limit, so compute [wrapped] tile uv coords based on the output uv coords
// and the number of tiles that will fit in the FBO.
const float2 output_tiles_this_pass = output_size / mask_resize_tile_size;
const float2 output_video_uv = tex_uv * texture_size / video_size;
const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
// Get the texel size of an input tile and related values:
const float2 input_tile_size = float2(min(
mask_resize_src_lut_size.x, video_size.x), mask_resize_tile_size.y);
const float2 tile_size_uv = input_tile_size / texture_size;
const float2 input_tiles_per_texture = texture_size / input_tile_size;
// Derive [wrapped] texture uv coords from [wrapped] tile uv coords and
// the tile size in uv coords, and save frac() for the fragment shader.
const float2 src_tex_uv_wrap = tile_uv_wrap * tile_size_uv;
// Output the values we need, including the magnification scale and step:
OUT.tile_uv_wrap = tile_uv_wrap;
OUT.src_tex_uv_wrap = src_tex_uv_wrap;
OUT.resize_magnification_scale = mask_resize_tile_size / input_tile_size;
OUT.src_dxdy = float2(1.0/texture_size.x, 0.0);
OUT.tile_size_uv = tile_size_uv;
OUT.input_tiles_per_texture = input_tiles_per_texture;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Mask_Resize_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p6 VAR) : SV_Target
{
// The input contains one mask tile horizontally and a number vertically.
// Resize the tile horizontally to its final screen size and repeat it
// until drawing at least mask_resize_num_tiles, leaving it unchanged
// vertically. Lanczos-resizing the phosphor mask achieves much sharper
// results than mipmapping, outputting >= mask_resize_num_tiles makes for
// easier tiled sampling later.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
// Discard unneeded fragments in case our profile allows real branches.
float2 texture_size = MASK_RESIZE_texture_size;
const float2 tile_uv_wrap = VAR.tile_uv_wrap;
if(get_mask_sample_mode() < 0.5 &&
max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
{
const float src_dx = VAR.src_dxdy.x;
const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
const float3 pixel_color = downsample_horizontal_sinc_tiled(MASK_RESIZE_VERTICAL,
src_tex_uv, texture_size, VAR.src_dxdy.x,
VAR.resize_magnification_scale.x, VAR.tile_size_uv.x);
// The input LUT was linear RGB, and so is our output:
return float4(pixel_color, 1.0);
}
else
{
discard;
}
#else
discard;
return 1.0.xxxx;
#endif
}

View File

@ -0,0 +1,164 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/phosphor-mask-resizing.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p5
{
float2 src_tex_uv_wrap : TEXCOORD1;
float2 resize_magnification_scale : TEXCOORD2;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Mask_Resize_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p5 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASK_RESIZE_VERT_texture_size;
float2 output_size = float2(64.0, 0.0625*((VIEWPORT_SIZE).y));
// First estimate the viewport size (the user will get the wrong number of
// triads if it's wrong and mask_specify_num_triads is 1.0/true).
const float viewport_y = output_size.y / mask_resize_viewport_scale.y;
// Now get aspect_ratio from texture_size.
// const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y;
const float aspect_ratio = texture_size.x / texture_size.y;
const float2 estimated_viewport_size =
float2(viewport_y * aspect_ratio, viewport_y);
// Estimate the output size of MASK_RESIZE (the next pass). The estimated
// x component shouldn't matter, because we're not using the x result, and
// we're not swearing it's correct (if we did, the x result would influence
// the y result to maintain the tile aspect ratio).
const float2 estimated_mask_resize_output_size =
float2(output_size.y * aspect_ratio, output_size.y);
// Find the final intended [y] size of our resized phosphor mask tiles,
// then the tile size for the current pass (resize y only):
const float2 mask_resize_tile_size = get_resized_mask_tile_size(
estimated_viewport_size, estimated_mask_resize_output_size, false);
const float2 pass_output_tile_size = float2(min(
mask_resize_src_lut_size.x, output_size.x), mask_resize_tile_size.y);
// We'll render resized tiles until filling the output FBO or meeting a
// limit, so compute [wrapped] tile uv coords based on the output uv coords
// and the number of tiles that will fit in the FBO.
const float2 output_tiles_this_pass = output_size / pass_output_tile_size;
const float2 output_video_uv = tex_uv * texture_size / video_size;
const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
// The input LUT is just a single mask tile, so texture uv coords are the
// same as tile uv coords (save frac() for the fragment shader). The
// magnification scale is also straightforward:
OUT.src_tex_uv_wrap = tile_uv_wrap;
OUT.resize_magnification_scale =
pass_output_tile_size / mask_resize_src_lut_size;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Mask_Resize_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p5 VAR) : SV_Target
{
// Resize the input phosphor mask tile to the final vertical size it will
// appear on screen. Keep 1x horizontal size if possible (IN.output_size
// >= mask_resize_src_lut_size), and otherwise linearly sample horizontally
// to fit exactly one tile. Lanczos-resizing the phosphor mask achieves
// much sharper results than mipmapping, and vertically resizing first
// minimizes the total number of taps required. We output a number of
// resized tiles >= mask_resize_num_tiles for easier tiled sampling later.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
// Discard unneeded fragments in case our profile allows real branches.
const float2 tile_uv_wrap = VAR.src_tex_uv_wrap;
if(get_mask_sample_mode() < 0.5 &&
tile_uv_wrap.y <= mask_resize_num_tiles)
{
static const float src_dy = 1.0/mask_resize_src_lut_size.y;
const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
float3 pixel_color;
// If mask_type is static, this branch will be resolved statically.
#ifdef PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
if(mask_type < 0.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_grille_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else if(mask_type < 1.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_slot_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else
{
pixel_color = downsample_vertical_sinc_tiled(
mask_shadow_texture_large, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
#else
if(mask_type < 0.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_grille_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else if(mask_type < 1.5)
{
pixel_color = downsample_vertical_sinc_tiled(
mask_slot_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
else
{
pixel_color = downsample_vertical_sinc_tiled(
mask_shadow_texture_small, src_tex_uv, mask_resize_src_lut_size,
src_dy, VAR.resize_magnification_scale.y, 1.0);
}
#endif
// The input LUT was linear RGB, and so is our output:
return float4(pixel_color, 1.0);
}
else
{
discard;
}
#else
discard;
return 1.0.xxxx;
#endif
}

View File

@ -0,0 +1,283 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
///////////////////////////// SETTINGS MANAGEMENT ////////////////////////////
#include "../include/user-settings.fxh"
#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
////////////////////////////////// INCLUDES //////////////////////////////////
#include "../include/scanline-functions.fxh"
#include "../include/phosphor-mask-resizing.fxh"
#include "../include/bloom-functions.fxh"
#include "../include/gamma-management.fxh"
/////////////////////////////////// HELPERS //////////////////////////////////
float4 tex2Dtiled_mask_linearize(const sampler2D tex,
const float2 tex_uv)
{
// If we're manually tiling a texture, anisotropic filtering can get
// confused. One workaround is to just select the lowest mip level:
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
// TODO: Use tex2Dlod_linearize with a calculated mip level.
return tex2Dlod_linearize(tex, float4(tex_uv, 0.0, 0.0));
#else
#ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
return tex2Dbias_linearize(tex, float4(tex_uv, 0.0, -16.0));
#else
return tex2D_linearize(tex, tex_uv);
#endif
#endif
#else
return tex2D_linearize(tex, tex_uv);
#endif
}
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p7
{
// Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
float2 video_uv : TEXCOORD1;
float2 scanline_tex_uv : TEXCOORD2;
float2 blur3x3_tex_uv : TEXCOORD3;
float2 halation_tex_uv : TEXCOORD4;
float2 scanline_texture_size_inv : TEXCOORD5;
float4 mask_tile_start_uv_and_size : TEXCOORD6;
float2 mask_tiles_per_screen : TEXCOORD7;
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Scanlines_Horizontal_Apply_Mask(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p7 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
float2 tex_uv = texcoord;
float2 texture_size = MASKED_SCANLINES_texture_size;
float2 output_size = VIEWPORT_SIZE;
// Our various input textures use different coords.
const float2 video_uv = tex_uv * texture_size/video_size;
const float2 scanline_texture_size_inv =
1.0.xx/VERTICAL_SCANLINES_texture_size;
OUT.video_uv = video_uv;
OUT.scanline_tex_uv = video_uv * VERTICAL_SCANLINES_video_size *
scanline_texture_size_inv;
OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size /
BLOOM_APPROX_texture_size;
OUT.halation_tex_uv = video_uv * HALATION_BLUR_video_size /
HALATION_BLUR_texture_size;
OUT.scanline_texture_size_inv = scanline_texture_size_inv;
// Get a consistent name for the final mask texture size. Sample mode 0
// uses the manually resized mask, but ignore it if we never resized.
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
const float mask_sample_mode = get_mask_sample_mode();
const float2 mask_resize_texture_size = mask_sample_mode < 0.5 ?
MASKED_SCANLINES_texture_size : mask_texture_large_size;
const float2 mask_resize_video_size = mask_sample_mode < 0.5 ?
MASKED_SCANLINES_video_size : mask_texture_large_size;
#else
const float2 mask_resize_texture_size = mask_texture_large_size;
const float2 mask_resize_video_size = mask_texture_large_size;
#endif
// Compute mask tile dimensions, starting points, etc.:
float2 mask_tiles_per_screen;
OUT.mask_tile_start_uv_and_size = get_mask_sampling_parameters(
mask_resize_texture_size, mask_resize_video_size, output_size,
mask_tiles_per_screen);
OUT.mask_tiles_per_screen = mask_tiles_per_screen;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Scanlines_Horizontal_Apply_Mask(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p7 VAR) : SV_Target
{
// This pass: Sample (misconverged?) scanlines to the final horizontal
// resolution, apply halation (bouncing electrons), and apply the phosphor
// mask. Fake a bloom if requested. Unless we fake a bloom, the output
// will be dim from the scanline auto-dim, mask dimming, and low gamma.
// Horizontally sample the current row (a vertically interpolated scanline)
// and account for horizontal convergence offsets, given in units of texels.
// float2 VERTICAL_SCANLINES_texture_size = float2(1.0/NormalizedNativePixelSize.x, ViewportSize.y*BufferToViewportRatio.y);
float2 output_size = VIEWPORT_SIZE;
const float3 scanline_color_dim = sample_rgb_scanline_horizontal(
VERTICAL_SCANLINES, VAR.scanline_tex_uv,
VERTICAL_SCANLINES_texture_size, VAR.scanline_texture_size_inv);
const float auto_dim_factor = levels_autodim_temp;
// Sample the phosphor mask:
const float2 tile_uv_wrap = VAR.video_uv * VAR.mask_tiles_per_screen;
const float2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv(
tile_uv_wrap, VAR.mask_tile_start_uv_and_size);
float3 phosphor_mask_sample;
#ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
const bool sample_orig_luts = get_mask_sample_mode() > 0.5;
#else
static const bool sample_orig_luts = true;
#endif
if(sample_orig_luts)
{
// If mask_type is static, this branch will be resolved statically.
if(mask_type < 0.5)
{
phosphor_mask_sample = tex2D_linearize(
mask_grille_texture_large, mask_tex_uv).rgb;
}
else if(mask_type < 1.5)
{
phosphor_mask_sample = tex2D_linearize(
mask_slot_texture_large, mask_tex_uv).rgb;
}
else
{
phosphor_mask_sample = tex2D_linearize(
mask_shadow_texture_large, mask_tex_uv).rgb;
}
}
else
{
// Sample the resized mask, and avoid tiling artifacts:
phosphor_mask_sample = tex2Dtiled_mask_linearize(
MASK_RESIZE, mask_tex_uv).rgb;
}
// Sample the halation texture (auto-dim to match the scanlines), and
// account for both horizontal and vertical convergence offsets, given
// in units of texels horizontally and same-field scanlines vertically:
const float3 halation_color = tex2D_linearize(
HALATION_BLUR, VAR.halation_tex_uv).rgb;
// Apply halation: Halation models electrons flying around under the glass
// and hitting the wrong phosphors (of any color). It desaturates, so
// average the halation electrons to a scalar. Reduce the local scanline
// intensity accordingly to conserve energy.
const float3 halation_intensity_dim =
dot(halation_color, auto_dim_factor.xxx/3.0).xxx;
const float3 electron_intensity_dim = lerp(scanline_color_dim,
halation_intensity_dim, halation_weight);
// Apply the phosphor mask:
const float3 phosphor_emission_dim = electron_intensity_dim *
phosphor_mask_sample;
#ifdef PHOSPHOR_BLOOM_FAKE
// The BLOOM_APPROX pass approximates a blurred version of a masked
// and scanlined image. It's usually used to compute the brightpass,
// but we can also use it to fake the bloom stage entirely. Caveats:
// 1.) A fake bloom is conceptually different, since we're mixing in a
// fully blurred low-res image, and the biggest implication are:
// 2.) If mask_amplify is incorrect, results deteriorate more quickly.
// 3.) The inaccurate blurring hurts quality in high-contrast areas.
// 4.) The bloom_underestimate_levels parameter seems less sensitive.
// Reverse the auto-dimming and amplify to compensate for mask dimming:
#define PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
#ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
static const float blur_contrast = 1.05;
#else
static const float blur_contrast = 1.0;
#endif
const float mask_amplify = get_mask_amplify();
const float undim_factor = 1.0/auto_dim_factor;
const float3 phosphor_emission =
phosphor_emission_dim * undim_factor * mask_amplify;
// Get a phosphor blur estimate, accounting for convergence offsets:
const float3 electron_intensity = electron_intensity_dim * undim_factor;
const float3 phosphor_blur_approx_soft = tex2D_linearize(
BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
const float3 phosphor_blur_approx = lerp(phosphor_blur_approx_soft,
electron_intensity, 0.1) * blur_contrast;
// We could blend between phosphor_emission and phosphor_blur_approx,
// solving for the minimum blend_ratio that avoids clipping past 1.0:
// 1.0 >= total_intensity
// 1.0 >= phosphor_emission * (1.0 - blend_ratio) +
// phosphor_blur_approx * blend_ratio
// blend_ratio = (phosphor_emission - 1.0)/
// (phosphor_emission - phosphor_blur_approx);
// However, this blurs far more than necessary, because it aims for
// full brightness, not minimal blurring. To fix it, base blend_ratio
// on a max area intensity only so it varies more smoothly:
const float3 phosphor_blur_underestimate =
phosphor_blur_approx * bloom_underestimate_levels;
const float3 area_max_underestimate =
phosphor_blur_underestimate * mask_amplify;
#ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
const float3 blend_ratio_temp =
(area_max_underestimate - 1.0.xxx) /
(area_max_underestimate - phosphor_blur_underestimate);
#else
// Try doing it like an area-based brightpass. This is nearly
// identical, but it's worth toying with the code in case I ever
// find a way to make it look more like a real bloom. (I've had
// some promising textures from combining an area-based blend ratio
// for the phosphor blur and a more brightpass-like blend-ratio for
// the phosphor emission, but I haven't found a way to make the
// brightness correct across the whole color range, especially with
// different bloom_underestimate_levels values.)
const float desired_triad_size = lerp(mask_triad_size_desired,
output_size.x/mask_num_triads_desired,
mask_specify_num_triads);
const float bloom_sigma = get_min_sigma_to_blur_triad(
desired_triad_size, bloom_diff_thresh);
const float center_weight = get_center_weight(bloom_sigma);
const float3 max_area_contribution_approx =
max(0.0.xxx, phosphor_blur_approx -
center_weight * phosphor_emission);
const float3 area_contrib_underestimate =
bloom_underestimate_levels * max_area_contribution_approx;
const float3 blend_ratio_temp =
((1.0.xxx - area_contrib_underestimate) /
area_max_underestimate - 1.0.xxx) / (center_weight - 1.0);
#endif
// Clamp blend_ratio in case it's out-of-range, but be SUPER careful:
// min/max/clamp are BIZARRELY broken with lerp (optimization bug?),
// and this redundant sequence avoids bugs, at least on nVidia cards:
const float3 blend_ratio_clamped = max(clamp(blend_ratio_temp, 0.0, 1.0), 0.0);
const float3 blend_ratio = lerp(blend_ratio_clamped, 1.0.xxx, bloom_excess);
// Blend the blurred and unblurred images:
const float3 phosphor_emission_unclipped =
lerp(phosphor_emission, phosphor_blur_approx, blend_ratio);
// Simulate refractive diffusion by reusing the halation sample.
const float3 pixel_color = lerp(phosphor_emission_unclipped,
halation_color, diffusion_weight);
#else
const float3 pixel_color = phosphor_emission_dim;
#endif
// Encode if necessary, and output.
return encode_output(float4(pixel_color, 1.0));
}

View File

@ -0,0 +1,241 @@
///////////////////////////// GPL LICENSE NOTICE /////////////////////////////
// crt-royale: A full-featured CRT shader, with cheese.
// Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
//
// This program is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by the Free
// Software Foundation; either version 2 of the License, or any later version.
//
// This program is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along with
// this program; if not, write to the Free Software Foundation, Inc., 59 Temple
// Place, Suite 330, Boston, MA 02111-1307 USA
#undef FIRST_PASS
////////////////////////////////// INCLUDES //////////////////////////////////
//#include "../include/user-settings.fxh"
//#include "../include/derived-settings-and-constants.fxh"
#include "../include/bind-shader-params.fxh"
#include "../include/scanline-functions.fxh"
//#include "../include/gamma-management.fxh"
///////////////////////////////// STRUCTURES /////////////////////////////////
struct out_vertex_p1
{
// Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
float2 tex_uv : TEXCOORD1;
float2 uv_step : TEXCOORD2; // uv size of a texel (x) and scanline (y)
float2 il_step_multiple : TEXCOORD3; // (1, 1) = progressive, (1, 2) = interlaced
float pixel_height_in_scanlines : TEXCOORD4; // Height of an output pixel in scanlines
};
//////////////////////////////// VERTEX SHADER ///////////////////////////////
// Vertex shader generating a triangle covering the entire screen
void VS_Scanlines_Vertical_Interlacing(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p1 OUT)
{
texcoord.x = (id == 2) ? 2.0 : 0.0;
texcoord.y = (id == 1) ? 2.0 : 0.0;
position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
OUT.tex_uv = texcoord;
float2 texture_size = VERTICAL_SCANLINES_texture_size;
float2 output_size = float2(TEXTURE_SIZE.x, VIEWPORT_SIZE.y);
// Detect interlacing: il_step_multiple indicates the step multiple between
// lines: 1 is for progressive sources, and 2 is for interlaced sources.
// const float2 video_size = 1.0/NormalizedNativePixelSize;
const float y_step = 1.0 + float(is_interlaced(video_size.y));
OUT.il_step_multiple = float2(1.0, y_step);
// Get the uv tex coords step between one texel (x) and scanline (y):
OUT.uv_step = OUT.il_step_multiple / texture_size;
// If shader parameters are used, {min, max}_{sigma, shape} are runtime
// values. Compute {sigma, shape}_range outside of scanline_contrib() so
// they aren't computed once per scanline (6 times per fragment and up to
// 18 times per vertex):
/* const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
beam_min_sigma;
const float shape_range = max(beam_max_shape, beam_min_shape) -
beam_min_shape;
*/
// We need the pixel height in scanlines for antialiased/integral sampling:
const float ph = (video_size.y / output_size.y) /
OUT.il_step_multiple.y;
OUT.pixel_height_in_scanlines = ph;
}
/////////////////////////////// FRAGMENT SHADER //////////////////////////////
float4 PS_Scanlines_Vertical_Interlacing(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p1 VAR) : SV_Target
{
// This pass: Sample multiple (misconverged?) scanlines to the final
// vertical resolution. Temporarily auto-dim the output to avoid clipping.
// Read some attributes into local variables:
const float2 texture_size = VERTICAL_SCANLINES_texture_size;
const float2 texture_size_inv = 1.0/texture_size;
const float2 uv_step = VAR.uv_step;
const float2 il_step_multiple = VAR.il_step_multiple;
const float frame_count = FrameCount;
const float ph = VAR.pixel_height_in_scanlines;
// Get the uv coords of the previous scanline (in this field), and the
// scanline's distance from this sample, in scanlines.
float dist;
const float2 scanline_uv = get_last_scanline_uv(VAR.tex_uv, texture_size,
texture_size_inv, il_step_multiple, frame_count, dist);
// Consider 2, 3, 4, or 6 scanlines numbered 0-5: The previous and next
// scanlines are numbered 2 and 3. Get scanline colors colors (ignore
// horizontal sampling, since since IN.output_size.x = video_size.x).
// NOTE: Anisotropic filtering creates interlacing artifacts, which is why
// ORIG_LINEARIZED bobbed any interlaced input before this pass.
const float2 v_step = float2(0.0, uv_step.y);
const float3 scanline2_color = tex2D_linearize(ORIG_LINEARIZED, scanline_uv).rgb;
const float3 scanline3_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + v_step).rgb;
float3 scanline0_color, scanline1_color, scanline4_color, scanline5_color,
scanline_outside_color;
float dist_round;
// Use scanlines 0, 1, 4, and 5 for a total of 6 scanlines:
if(beam_num_scanlines > 5.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
scanline0_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - 2.0 * v_step).rgb;
scanline5_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 3.0 * v_step).rgb;
}
// Use scanlines 1, 4, and either 0 or 5 for a total of 5 scanlines:
else if(beam_num_scanlines > 4.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
// dist is in [0, 1]
dist_round = round(dist);
const float2 sample_0_or_5_uv_off =
lerp(-2.0 * v_step, 3.0 * v_step, dist_round);
// Call this "scanline_outside_color" to cope with the conditional
// scanline number:
scanline_outside_color = tex2D_linearize(
ORIG_LINEARIZED, scanline_uv + sample_0_or_5_uv_off).rgb;
}
// Use scanlines 1 and 4 for a total of 4 scanlines:
else if(beam_num_scanlines > 3.5)
{
scanline1_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
scanline4_color =
tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
}
// Use scanline 1 or 4 for a total of 3 scanlines:
else if(beam_num_scanlines > 2.5)
{
// dist is in [0, 1]
dist_round = round(dist);
const float2 sample_1or4_uv_off =
lerp(-v_step, 2.0 * v_step, dist_round);
scanline_outside_color = tex2D_linearize(
ORIG_LINEARIZED, scanline_uv + sample_1or4_uv_off).rgb;
}
// Compute scanline contributions, accounting for vertical convergence.
// Vertical convergence offsets are in units of current-field scanlines.
// dist2 means "positive sample distance from scanline 2, in scanlines:"
float3 dist2 = dist.xxx;
if(beam_misconvergence)
{
const float3 convergence_offsets_vert_rgb =
get_convergence_offsets_y_vector();
dist2 = dist.xxx - convergence_offsets_vert_rgb;
}
// Calculate {sigma, shape}_range outside of scanline_contrib so it's only
// done once per pixel (not 6 times) with runtime params. Don't reuse the
// vertex shader calculations, so static versions can be constant-folded.
const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
beam_min_sigma;
const float shape_range = max(beam_max_shape, beam_min_shape) -
beam_min_shape;
// Calculate and sum final scanline contributions, starting with lines 2/3.
// There is no normalization step, because we're not interpolating a
// continuous signal. Instead, each scanline is an additive light source.
const float3 scanline2_contrib = scanline_contrib(dist2,
scanline2_color, ph, sigma_range, shape_range);
const float3 scanline3_contrib = scanline_contrib(abs(1.0.xxx - dist2),
scanline3_color, ph, sigma_range, shape_range);
float3 scanline_intensity = scanline2_contrib + scanline3_contrib;
if(beam_num_scanlines > 5.5)
{
const float3 scanline0_contrib =
scanline_contrib(dist2 + 2.0.xxx, scanline0_color,
ph, sigma_range, shape_range);
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
const float3 scanline5_contrib =
scanline_contrib(abs(3.0.xxx - dist2), scanline5_color,
ph, sigma_range, shape_range);
scanline_intensity += scanline0_contrib + scanline1_contrib +
scanline4_contrib + scanline5_contrib;
}
else if(beam_num_scanlines > 4.5)
{
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
const float3 dist0or5 = lerp(
dist2 + 2.0.xxx, 3.0.xxx - dist2, dist_round);
const float3 scanline0or5_contrib = scanline_contrib(
dist0or5, scanline_outside_color, ph, sigma_range, shape_range);
scanline_intensity += scanline1_contrib + scanline4_contrib +
scanline0or5_contrib;
}
else if(beam_num_scanlines > 3.5)
{
const float3 scanline1_contrib =
scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
ph, sigma_range, shape_range);
const float3 scanline4_contrib =
scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
ph, sigma_range, shape_range);
scanline_intensity += scanline1_contrib + scanline4_contrib;
}
else if(beam_num_scanlines > 2.5)
{
const float3 dist1or4 = lerp(
dist2 + 1.0.xxx, 2.0.xxx - dist2, dist_round);
const float3 scanline1or4_contrib = scanline_contrib(
dist1or4, scanline_outside_color, ph, sigma_range, shape_range);
scanline_intensity += scanline1or4_contrib;
}
// Auto-dim the image to avoid clipping, encode if necessary, and output.
// My original idea was to compute a minimal auto-dim factor and put it in
// the alpha channel, but it wasn't working, at least not reliably. This
// is faster anyway, levels_autodim_temp = 0.5 isn't causing banding.
return encode_output(float4(scanline_intensity * levels_autodim_temp, 1.0));
}