Add a new port of crt-royale.fx (#3260)

- A new port of crt-royale. More faithful to original. It uses the same mask textures. - The only thing not ported is the original geometry pass. It was replaced by geom curvature code. - It's configured for 1080p displays. 4k displays need to adjust param mask_triad_size_desired from 3.0 to 4.0. OBS: It's up to you decide if the two versions should be maintained.
2025-06-18 13:05:46 -04:00 · 2024-07-26 01:33:01 -03:00
parent e455a5e371
commit bf1b023f12
34 changed files with 8404 additions and 2 deletions
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-horizontal.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-horizontal.fxh
@ -0,0 +1,97 @@
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the .cgp preset file.
+//#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+//#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+//  #included by vertex shader:
+#include "../include/gamma-management.fxh"
+#include "../include/blur-functions.fxh"
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p4
+{
+    float2 blur_dxdy        : TEXCOORD1;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Blur9Fast_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p4 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+/*    float2 texture_size = 1.0/NormalizedNativePixelSize;
+    float2 output_size  = (ViewportSize*BufferToViewportRatio);
+    float2 video_size   = 1.0/NormalizedNativePixelSize;
+*/
+//    float2 texture_size = float2(320.0, 240.0);
+    float2 texture_size = HALATION_BLUR_texture_size;
+    float2 output_size  = VIEWPORT_SIZE;
+//    float2 output_size  = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
+  //  float2 output_size  = float2(320.0, 240.0);
+//    float2 output_size  = 1.0/NormalizedNativePixelSize;
+
+	//  Get the uv sample distance between output pixels.  Blurs are not generic
+    //  Gaussian resizers, and correct blurs require:
+    //  1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
+    //  2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
+    //  3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
+    //  Gaussian resizers would upsize using the distance between input texels
+    //  (not output pixels), but we avoid this and consistently blur at the
+    //  destination size.  Otherwise, combining statically calculated weights
+    //  with bilinear sample exploitation would result in terrible artifacts.
+    const float2 dxdy_scale = video_size/output_size;
+	const float2 dxdy = dxdy_scale/texture_size;
+    //  This blur is horizontal-only, so zero out the vertical offset:
+	OUT.blur_dxdy = float2(dxdy.x, 0.0);
+}
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Blur9Fast_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p4 VAR) : SV_Target
+{
+	float3 color = tex2Dblur9fast(BLUR9FAST_VERTICAL, vTexCoord, VAR.blur_dxdy);
+    //  Encode and output the blurred image:
+    return encode_output(float4(color, 1.0));
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-vertical.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/blur9fast-vertical.fxh
@ -0,0 +1,95 @@
+/////////////////////////////////  MIT LICENSE  ////////////////////////////////
+
+//  Copyright (C) 2014 TroggleMonkey
+//
+//  Permission is hereby granted, free of charge, to any person obtaining a copy
+//  of this software and associated documentation files (the "Software"), to
+//  deal in the Software without restriction, including without limitation the
+//  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+//  sell copies of the Software, and to permit persons to whom the Software is
+//  furnished to do so, subject to the following conditions:
+//  
+//  The above copyright notice and this permission notice shall be included in
+//  all copies or substantial portions of the Software.
+//
+//  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+//  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+//  IN THE SOFTWARE.
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the .cgp preset file.
+//#define GAMMA_ENCODE_EVERY_FBO
+//#define FIRST_PASS
+//#define LAST_PASS
+//#define SIMULATE_CRT_ON_LCD
+//#define SIMULATE_GBA_ON_LCD
+//#define SIMULATE_LCD_ON_CRT
+//#define SIMULATE_GBA_ON_CRT
+
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/gamma-management.fxh"
+#include "../include/blur-functions.fxh"
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p3
+{
+    float2 blur_dxdy        : TEXCOORD1;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Blur9Fast_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p3 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+/*
+    float2 texture_size = 1.0/NormalizedNativePixelSize;
+    float2 output_size  = (ViewportSize*BufferToViewportRatio);
+    float2 video_size   = 1.0/NormalizedNativePixelSize;
+*/
+//    float2 texture_size = float2(320.0, 240.0);
+    float2 texture_size = BLUR9FAST_VERTICAL_texture_size;
+    float2 output_size  = VIEWPORT_SIZE;
+   // float2 output_size  = VIEWPORT_SIZE/4.0;
+//    float2 output_size  = VIEWPORT_SIZE*NormalizedNativePixelSize/float2(320.0, 240.0);
+//    float2 output_size  = 1.0/NormalizedNativePixelSize;
+
+	//  Get the uv sample distance between output pixels.  Blurs are not generic
+    //  Gaussian resizers, and correct blurs require:
+    //  1.) IN.output_size == IN.video_size * 2^m, where m is an integer <= 0.
+    //  2.) mipmap_inputN = "true" for this pass in .cgp preset if m != 0
+    //  3.) filter_linearN = "true" except for 1x scale nearest neighbor blurs
+    //  Gaussian resizers would upsize using the distance between input texels
+    //  (not output pixels), but we avoid this and consistently blur at the
+    //  destination size.  Otherwise, combining statically calculated weights
+    //  with bilinear sample exploitation would result in terrible artifacts.
+    const float2 dxdy_scale = video_size/output_size;
+	const float2 dxdy = dxdy_scale/texture_size;
+    //  This blur is vertical-only, so zero out the horizontal offset:
+	OUT.blur_dxdy = float2(0.0, dxdy.y);
+}
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Blur9Fast_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p3 VAR) : SV_Target
+{
+	float3 color = tex2Dblur9fast(BLOOM_APPROX, vTexCoord, VAR.blur_dxdy);
+    //  Encode and output the blurred image:
+    return encode_output(float4(color, 1.0));
+}
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-approx.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-approx.fxh
@ -0,0 +1,363 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#define ORIG_LINEARIZEDvideo_size   VERTICAL_SCANLINES_texture_size
+#define ORIG_LINEARIZEDtexture_size VERTICAL_SCANLINES_video_size
+
+#define bloom_approx_scale_x (4.0/3.0)
+static const float max_viewport_size_x = 1080.0*1024.0*(4.0/3.0);
+
+#include "../include/user-settings.fxh"
+#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+#include "../include/gamma-management.fxh"
+#include "../include/blur-functions.fxh"
+#include "../include/scanline-functions.fxh"
+#include "../include/bloom-functions.fxh"
+
+///////////////////////////////////  HELPERS  //////////////////////////////////
+
+float3 tex2Dresize_gaussian4x4(const sampler2D tex, const float2 tex_uv,
+    const float2 dxdy, const float2 texture_size, const float2 texture_size_inv,
+    const float2 tex_uv_to_pixel_scale, const float sigma)
+{
+    //  Requires:   1.) All requirements of gamma-management.h must be satisfied!
+    //              2.) filter_linearN must == "true" in your .cgp preset.
+    //              3.) mipmap_inputN must == "true" in your .cgp preset if
+    //                  IN.output_size << SRC.video_size.
+    //              4.) dxdy should contain the uv pixel spacing:
+    //                      dxdy = max(float2(1.0),
+    //                          SRC.video_size/IN.output_size)/SRC.texture_size;
+    //              5.) texture_size == SRC.texture_size
+    //              6.) texture_size_inv == float2(1.0)/SRC.texture_size
+    //              7.) tex_uv_to_pixel_scale == IN.output_size *
+    //                      SRC.texture_size / SRC.video_size;
+    //              8.) sigma is the desired Gaussian standard deviation, in
+    //                  terms of output pixels.  It should be < ~0.66171875 to
+    //                  ensure the first unused sample (outside the 4x4 box) has
+    //                  a weight < 1.0/256.0.
+    //  Returns:    A true 4x4 Gaussian resize of the input.
+    //  Description:
+    //  Given correct inputs, this Gaussian resizer samples 4 pixel locations
+    //  along each downsized dimension and/or 4 texel locations along each
+    //  upsized dimension.  It computes dynamic weights based on the pixel-space
+    //  distance of each sample from the destination pixel.  It is arbitrarily
+    //  resizable and higher quality than tex2Dblur3x3_resize, but it's slower.
+    //  TODO: Move this to a more suitable file once there are others like it.
+    const float denom_inv = 0.5/(sigma*sigma);
+    //  We're taking 4x4 samples, and we're snapping to texels for upsizing.
+    //  Find texture coords for sample 5 (second row, second column):
+    const float2 curr_texel = tex_uv * texture_size;
+    const float2 prev_texel =
+        floor(curr_texel - under_half.xx) + 0.5.xx;
+    const float2 prev_texel_uv = prev_texel * texture_size_inv;
+    const float2 snap = float2(dxdy <= texture_size_inv);
+    const float2 sample5_downsize_uv = tex_uv - 0.5 * dxdy;
+    const float2 sample5_uv = lerp(sample5_downsize_uv, prev_texel_uv, snap);
+    //  Compute texture coords for other samples:
+    const float2 dx = float2(dxdy.x, 0.0);
+    const float2 sample0_uv = sample5_uv - dxdy;
+    const float2 sample10_uv = sample5_uv + dxdy;
+    const float2 sample15_uv = sample5_uv + 2.0 * dxdy;
+    const float2 sample1_uv = sample0_uv + dx;
+    const float2 sample2_uv = sample0_uv + 2.0 * dx;
+    const float2 sample3_uv = sample0_uv + 3.0 * dx;
+    const float2 sample4_uv = sample5_uv - dx;
+    const float2 sample6_uv = sample5_uv + dx;
+    const float2 sample7_uv = sample5_uv + 2.0 * dx;
+    const float2 sample8_uv = sample10_uv - 2.0 * dx;
+    const float2 sample9_uv = sample10_uv - dx;
+    const float2 sample11_uv = sample10_uv + dx;
+    const float2 sample12_uv = sample15_uv - 3.0 * dx;
+    const float2 sample13_uv = sample15_uv - 2.0 * dx;
+    const float2 sample14_uv = sample15_uv - dx;
+    //  Load each sample:
+    const float3 sample0 = tex2D_linearize(tex, sample0_uv).rgb;
+    const float3 sample1 = tex2D_linearize(tex, sample1_uv).rgb;
+    const float3 sample2 = tex2D_linearize(tex, sample2_uv).rgb;
+    const float3 sample3 = tex2D_linearize(tex, sample3_uv).rgb;
+    const float3 sample4 = tex2D_linearize(tex, sample4_uv).rgb;
+    const float3 sample5 = tex2D_linearize(tex, sample5_uv).rgb;
+    const float3 sample6 = tex2D_linearize(tex, sample6_uv).rgb;
+    const float3 sample7 = tex2D_linearize(tex, sample7_uv).rgb;
+    const float3 sample8 = tex2D_linearize(tex, sample8_uv).rgb;
+    const float3 sample9 = tex2D_linearize(tex, sample9_uv).rgb;
+    const float3 sample10 = tex2D_linearize(tex, sample10_uv).rgb;
+    const float3 sample11 = tex2D_linearize(tex, sample11_uv).rgb;
+    const float3 sample12 = tex2D_linearize(tex, sample12_uv).rgb;
+    const float3 sample13 = tex2D_linearize(tex, sample13_uv).rgb;
+    const float3 sample14 = tex2D_linearize(tex, sample14_uv).rgb;
+    const float3 sample15 = tex2D_linearize(tex, sample15_uv).rgb;
+    //  Compute destination pixel offsets for each sample:
+    const float2 dest_pixel = tex_uv * tex_uv_to_pixel_scale;
+    const float2 sample0_offset = sample0_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample1_offset = sample1_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample2_offset = sample2_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample3_offset = sample3_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample4_offset = sample4_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample5_offset = sample5_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample6_offset = sample6_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample7_offset = sample7_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample8_offset = sample8_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample9_offset = sample9_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample10_offset = sample10_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample11_offset = sample11_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample12_offset = sample12_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample13_offset = sample13_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample14_offset = sample14_uv * tex_uv_to_pixel_scale - dest_pixel;
+    const float2 sample15_offset = sample15_uv * tex_uv_to_pixel_scale - dest_pixel;
+    //  Compute Gaussian sample weights:
+    const float w0 = exp(-LENGTH_SQ(sample0_offset) * denom_inv);
+    const float w1 = exp(-LENGTH_SQ(sample1_offset) * denom_inv);
+    const float w2 = exp(-LENGTH_SQ(sample2_offset) * denom_inv);
+    const float w3 = exp(-LENGTH_SQ(sample3_offset) * denom_inv);
+    const float w4 = exp(-LENGTH_SQ(sample4_offset) * denom_inv);
+    const float w5 = exp(-LENGTH_SQ(sample5_offset) * denom_inv);
+    const float w6 = exp(-LENGTH_SQ(sample6_offset) * denom_inv);
+    const float w7 = exp(-LENGTH_SQ(sample7_offset) * denom_inv);
+    const float w8 = exp(-LENGTH_SQ(sample8_offset) * denom_inv);
+    const float w9 = exp(-LENGTH_SQ(sample9_offset) * denom_inv);
+    const float w10 = exp(-LENGTH_SQ(sample10_offset) * denom_inv);
+    const float w11 = exp(-LENGTH_SQ(sample11_offset) * denom_inv);
+    const float w12 = exp(-LENGTH_SQ(sample12_offset) * denom_inv);
+    const float w13 = exp(-LENGTH_SQ(sample13_offset) * denom_inv);
+    const float w14 = exp(-LENGTH_SQ(sample14_offset) * denom_inv);
+    const float w15 = exp(-LENGTH_SQ(sample15_offset) * denom_inv);
+    const float weight_sum_inv = 1.0/(
+        w0 + w1 + w2 + w3 + w4 + w5 + w6 + w7 +
+        w8 +w9 + w10 + w11 + w12 + w13 + w14 + w15);
+    //  Weight and sum the samples:
+    const float3 sum = w0 * sample0 + w1 * sample1 + w2 * sample2 + w3 * sample3 +
+        w4 * sample4 + w5 * sample5 + w6 * sample6 + w7 * sample7 +
+        w8 * sample8 + w9 * sample9 + w10 * sample10 + w11 * sample11 +
+        w12 * sample12 + w13 * sample13 + w14 * sample14 + w15 * sample15;
+    return sum * weight_sum_inv;
+}
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p2
+{
+    float2 tex_uv                       : TEXCOORD1;
+    float2 blur_dxdy                    : TEXCOORD2;
+    float2 uv_scanline_step             : TEXCOORD3;
+    float estimated_viewport_size_x     : TEXCOORD4;
+    float2 texture_size_inv             : TEXCOORD5;
+    float2 tex_uv_to_pixel_scale        : TEXCOORD6;
+    float2 output_size                  : TEXCOORD7;
+};
+
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Bloom_Approx(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p2 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    float2 texture_size  = BLOOM_APPROX_texture_size;
+    float2 output_size   = VIEWPORT_SIZE;
+
+    OUT.output_size = output_size;
+
+    //  This vertex shader copies blurs/vertex-shader-blur-one-pass-resize.h,
+    //  except we're using a different source image.
+    const float2 video_uv = texcoord * texture_size/video_size;
+    OUT.tex_uv = video_uv * ORIG_LINEARIZEDvideo_size /
+        ORIG_LINEARIZEDtexture_size;
+    //  The last pass (vertical scanlines) had a viewport y scale, so we can
+    //  use it to calculate a better runtime sigma:
+//    OUT.estimated_viewport_size_x = video_size.y * geom_aspect_ratio_x/geom_aspect_ratio_y;
+    OUT.estimated_viewport_size_x = video_size.y * texture_size.x/texture_size.y;
+
+    //  Get the uv sample distance between output pixels.  We're using a resize
+    //  blur, so arbitrary upsizing will be acceptable if filter_linearN =
+    //  "true," and arbitrary downsizing will be acceptable if mipmap_inputN =
+    //  "true" too.  The blur will be much more accurate if a true 4x4 Gaussian
+    //  resize is used instead of tex2Dblur3x3_resize (which samples between
+    //  texels even for upsizing).
+    const float2 dxdy_min_scale = ORIG_LINEARIZEDvideo_size/output_size;
+    const float2 texture_size_inv = 1.0.xx/ORIG_LINEARIZEDtexture_size;
+    if(bloom_approx_filter > 1.5)   //  4x4 true Gaussian resize
+    {
+        //  For upsizing, we'll snap to texels and sample the nearest 4.
+        const float2 dxdy_scale = max(dxdy_min_scale, 1.0.xx);
+        OUT.blur_dxdy = dxdy_scale * texture_size_inv;
+    }
+    else
+    {
+        const float2 dxdy_scale = dxdy_min_scale;
+        OUT.blur_dxdy = dxdy_scale * texture_size_inv;
+    }
+    //  tex2Dresize_gaussian4x4 needs to know a bit more than the other filters:
+    OUT.tex_uv_to_pixel_scale = output_size *
+        ORIG_LINEARIZEDtexture_size / ORIG_LINEARIZEDvideo_size;
+    OUT.texture_size_inv = texture_size_inv;
+
+    //  Detecting interlacing again here lets us apply convergence offsets in
+    //  this pass.  il_step_multiple contains the (texel, scanline) step
+    //  multiple: 1 for progressive, 2 for interlaced.
+    const float2 orig_video_size = ORIG_LINEARIZEDvideo_size;
+    const float y_step = 1.0 + float(is_interlaced(orig_video_size.y));
+    const float2 il_step_multiple = float2(1.0, y_step);
+    //  Get the uv distance between (texels, same-field scanlines):
+    OUT.uv_scanline_step = il_step_multiple / ORIG_LINEARIZEDtexture_size;
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Bloom_Approx(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p2 VAR) : SV_Target
+{
+    //  Would a viewport-relative size work better for this pass?  (No.)
+    //  PROS:
+    //  1.) Instead of writing an absolute size to user-cgp-constants.h, we'd
+    //      write a viewport scale.  That number could be used to directly scale
+    //      the viewport-resolution bloom sigma and/or triad size to a smaller
+    //      scale.  This way, we could calculate an optimal dynamic sigma no
+    //      matter how the dot pitch is specified.
+    //  CONS:
+    //  1.) Texel smearing would be much worse at small viewport sizes, but
+    //      performance would be much worse at large viewport sizes, so there
+    //      would be no easy way to calculate a decent scale.
+    //  2.) Worse, we could no longer get away with using a constant-size blur!
+    //      Instead, we'd have to face all the same difficulties as the real
+    //      phosphor bloom, which requires static #ifdefs to decide the blur
+    //      size based on the expected triad size...a dynamic value.
+    //  3.) Like the phosphor bloom, we'd have less control over making the blur
+    //      size correct for an optical blur.  That said, we likely overblur (to
+    //      maintain brightness) more than the eye would do by itself: 20/20
+    //      human vision distinguishes ~1 arc minute, or 1/60 of a degree.  The
+    //      highest viewing angle recommendation I know of is THX's 40.04 degree
+    //      recommendation, at which 20/20 vision can distinguish about 2402.4
+    //      lines.  Assuming the "TV lines" definition, that means 1201.2
+    //      distinct light lines and 1201.2 distinct dark lines can be told
+    //      apart, i.e. 1201.2 pairs of lines.  This would correspond to 1201.2
+    //      pairs of alternating lit/unlit phosphors, so 2402.4 phosphors total
+    //      (if they're alternately lit).  That's a max of 800.8 triads.  Using
+    //      a more popular 30 degree viewing angle recommendation, 20/20 vision
+    //      can distinguish 1800 lines, or 600 triads of alternately lit
+    //      phosphors.  In contrast, we currently blur phosphors all the way
+    //      down to 341.3 triads to ensure full brightness.
+    //  4.) Realistically speaking, we're usually just going to use bilinear
+    //      filtering in this pass anyway, but it only works well to limit
+    //      bandwidth if it's done at a small constant scale.
+    
+    //  Get the constants we need to sample:
+    float2 output_size  = VAR.output_size;
+     //const sampler2D Source = ORIG_LINEARIZED;
+    const float2 tex_uv = VAR.tex_uv;
+    const float2 blur_dxdy = VAR.blur_dxdy;
+    const float2 texture_size = ORIG_LINEARIZEDtexture_size;
+    const float2 texture_size_inv = VAR.texture_size_inv;
+    const float2 tex_uv_to_pixel_scale = VAR.tex_uv_to_pixel_scale;
+    float2 tex_uv_r, tex_uv_g, tex_uv_b;
+    if(beam_misconvergence)
+    {
+        const float2 uv_scanline_step = VAR.uv_scanline_step;
+        const float2 convergence_offsets_r = get_convergence_offsets_r_vector();
+        const float2 convergence_offsets_g = get_convergence_offsets_g_vector();
+        const float2 convergence_offsets_b = get_convergence_offsets_b_vector();
+        tex_uv_r = tex_uv - convergence_offsets_r * uv_scanline_step;
+        tex_uv_g = tex_uv - convergence_offsets_g * uv_scanline_step;
+        tex_uv_b = tex_uv - convergence_offsets_b * uv_scanline_step;
+    }
+    //  Get the blur sigma:
+    const float bloom_approx_sigma = get_bloom_approx_sigma(output_size.x,
+        VAR.estimated_viewport_size_x);
+
+    //  Sample the resized and blurred texture, and apply convergence offsets if
+    //  necessary.  Applying convergence offsets here triples our samples from
+    //  16/9/1 to 48/27/3, but faster and easier than sampling BLOOM_APPROX and
+    //  HALATION_BLUR 3 times at full resolution every time they're used.
+    float3 color_r, color_g, color_b, color;
+    if(bloom_approx_filter > 1.5)
+    {
+        //  Use a 4x4 Gaussian resize.  This is slower but technically correct.
+        if(beam_misconvergence)
+        {
+            color_r = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_r,
+                blur_dxdy, texture_size, texture_size_inv,
+                tex_uv_to_pixel_scale, bloom_approx_sigma);
+            color_g = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_g,
+                blur_dxdy, texture_size, texture_size_inv,
+                tex_uv_to_pixel_scale, bloom_approx_sigma);
+            color_b = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv_b,
+                blur_dxdy, texture_size, texture_size_inv,
+                tex_uv_to_pixel_scale, bloom_approx_sigma);
+        }
+        else
+        {
+            color = tex2Dresize_gaussian4x4(ORIG_LINEARIZED, tex_uv,
+                blur_dxdy, texture_size, texture_size_inv,
+                tex_uv_to_pixel_scale, bloom_approx_sigma);
+        }
+    }
+    else if(bloom_approx_filter > 0.5)
+    {
+        //  Use a 3x3 resize blur.  This is the softest option, because we're
+        //  blurring already blurry bilinear samples.  It doesn't play quite as
+        //  nicely with convergence offsets, but it has its charms.
+        if(beam_misconvergence)
+        {
+            color_r = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_r,
+                blur_dxdy, bloom_approx_sigma);
+            color_g = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_g,
+                blur_dxdy, bloom_approx_sigma);
+            color_b = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv_b,
+                blur_dxdy, bloom_approx_sigma);
+        }
+        else
+        {
+            color = tex2Dblur3x3resize(ORIG_LINEARIZED, tex_uv, blur_dxdy);
+        }
+    }
+    else
+    {
+        //  Use bilinear sampling.  This approximates a 4x4 Gaussian resize MUCH
+        //  better than tex2Dblur3x3_resize for the very small sigmas we're
+        //  likely to use at small output resolutions.  (This estimate becomes
+        //  too sharp above ~400x300, but the blurs break down above that
+        //  resolution too, unless min_allowed_viewport_triads is high enough to
+        //  keep bloom_approx_scale_x/min_allowed_viewport_triads < ~1.1658025.)
+        if(beam_misconvergence)
+        {
+            color_r = tex2D_linearize(ORIG_LINEARIZED, tex_uv_r).rgb;
+            color_g = tex2D_linearize(ORIG_LINEARIZED, tex_uv_g).rgb;
+            color_b = tex2D_linearize(ORIG_LINEARIZED, tex_uv_b).rgb;
+        }
+        else
+        {
+            color = tex2D_linearize(ORIG_LINEARIZED, tex_uv).rgb;
+        }
+    }
+    //  Pack the colors from the red/green/blue beams into a single vector:
+    if(beam_misconvergence)
+    {
+        color = float3(color_r.r, color_g.g, color_b.b);
+    }
+    //  Encode and output the blurred image:
+    return encode_output(float4(color, 1.0));
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.fxh
@ -0,0 +1,129 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+#include "../include/user-settings.fxh"
+#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/gamma-management.fxh"
+#include "../include/bloom-functions.fxh"
+#include "../include/phosphor-mask-resizing.fxh"
+#include "../include/scanline-functions.fxh"
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p10
+{
+    float2 video_uv            : TEXCOORD1;
+    float2 bloom_dxdy          : TEXCOORD2;
+    float bloom_sigma_runtime  : TEXCOORD3;
+    float2 sinangle            : TEXCOORD4;
+    float2 cosangle            : TEXCOORD5;
+    float3 stretch             : TEXCOORD6;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Bloom_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p10 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    float2 texture_size = BLOOM_HORIZONTAL_texture_size;
+    float2 output_size  = VIEWPORT_SIZE;
+
+    // Screen centering
+    texcoord = texcoord - float2(centerx,centery)/100.0;
+
+    float2 tex_uv = texcoord;
+
+    //  Our various input textures use different coords:
+    const float2 video_uv = tex_uv * texture_size/video_size;
+    OUT.video_uv = video_uv;
+
+    //  We're horizontally blurring the bloom input (vertically blurred
+    //  brightpass).  Get the uv distance between output pixels / input texels
+    //  in the horizontal direction (this pass must NOT resize):
+    OUT.bloom_dxdy = float2(1.0/texture_size.x, 0.0);
+
+    //  Calculate a runtime bloom_sigma in case it's needed:
+    const float mask_tile_size_x = get_resized_mask_tile_size(
+        output_size, output_size * mask_resize_viewport_scale, false).x;
+    OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
+        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
+
+    // Precalculate a bunch of useful values we'll need in the fragment
+    // shader.
+    OUT.sinangle    = sin(float2(geom_x_tilt, geom_y_tilt));
+    OUT.cosangle    = cos(float2(geom_x_tilt, geom_y_tilt));
+    OUT.stretch     = maxscale(OUT.sinangle, OUT.cosangle);
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Bloom_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p10 VAR) : SV_Target
+{
+    VAR.video_uv = (geom_curvature == true) ? transform(VAR.video_uv, VAR.sinangle, VAR.cosangle, VAR.stretch) : VAR.video_uv;
+
+    float cval = corner((VAR.video_uv-0.5.xx) * BufferToViewportRatio + 0.5.xx);
+
+    //  Blur the vertically blurred brightpass horizontally by 9/17/25/43x:
+    const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
+    const float3 blurred_brightpass = tex2DblurNfast(BLOOM_VERTICAL,
+        VAR.video_uv, VAR.bloom_dxdy, bloom_sigma);
+
+    //  Sample the masked scanlines.  Alpha contains the auto-dim factor:
+    const float3 intensity_dim =
+        tex2D_linearize(MASKED_SCANLINES, VAR.video_uv).rgb;
+    const float auto_dim_factor = levels_autodim_temp;
+    const float undim_factor = 1.0/auto_dim_factor;
+
+    //  Calculate the mask dimpass, add it to the blurred brightpass, and
+    //  undim (from scanline auto-dim) and amplify (from mask dim) the result:
+    const float mask_amplify = get_mask_amplify();
+    const float3 brightpass = tex2D_linearize(BRIGHTPASS,
+        VAR.video_uv).rgb;
+    const float3 dimpass = intensity_dim - brightpass;
+    const float3 phosphor_bloom = (dimpass + blurred_brightpass) *
+        mask_amplify * undim_factor * levels_contrast;
+
+    //  Sample the halation texture, and let some light bleed into refractive
+    //  diffusion.  Conceptually this occurs before the phosphor bloom, but
+    //  adding it in earlier passes causes black crush in the diffusion colors.
+    const float3 diffusion_color = levels_contrast * tex2D_linearize(
+        HALATION_BLUR, VAR.video_uv).rgb;
+    float3 final_bloom = lerp(phosphor_bloom,
+        diffusion_color, diffusion_weight);
+
+    final_bloom = (geom_curvature == true) ? final_bloom * cval.xxx : final_bloom;
+
+    final_bloom = pow(final_bloom.rgb, 1.0/get_output_gamma());
+
+    //  Encode and output the bloomed image:
+    return encode_output(float4(final_bloom, 1.0));
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-vertical.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-bloom-vertical.fxh
@ -0,0 +1,83 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+#include "../include/user-settings.fxh"
+#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/gamma-management.fxh"
+#include "../include/bloom-functions.fxh"
+#include "../include/phosphor-mask-resizing.fxh"
+
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p9
+{
+    float2 tex_uv               : TEXCOORD1;
+    float2 bloom_dxdy           : TEXCOORD2;
+    float bloom_sigma_runtime   : TEXCOORD3;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Bloom_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p9 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    float2 texture_size = BLOOM_VERTICAL_texture_size;
+    float2 output_size  = VIEWPORT_SIZE;
+
+    OUT.tex_uv = texcoord;
+
+    //  Get the uv sample distance between output pixels.  Calculate dxdy like
+    //  blurs/vertex-shader-blur-fast-vertical.h.
+    const float2 dxdy_scale = video_size/output_size;
+    const float2 dxdy = dxdy_scale/texture_size;
+    //  This blur is vertical-only, so zero out the vertical offset:
+    OUT.bloom_dxdy = float2(0.0, dxdy.y);
+
+    //  Calculate a runtime bloom_sigma in case it's needed:
+    const float mask_tile_size_x = get_resized_mask_tile_size(
+        output_size, output_size * mask_resize_viewport_scale, false).x;
+    OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
+        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Bloom_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p9 VAR) : SV_Target
+{
+    //  Blur the brightpass horizontally with a 9/17/25/43x blur:
+    const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
+    const float3 color = tex2DblurNfast(BRIGHTPASS, VAR.tex_uv,
+        VAR.bloom_dxdy, bloom_sigma);
+    //  Encode and output the blurred image:
+    return encode_output(float4(color, 1.0));
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-brightpass.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-brightpass.fxh
@ -0,0 +1,130 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+#include "../include/user-settings.fxh"
+#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/gamma-management.fxh"
+#include "../include/blur-functions.fxh"
+#include "../include/phosphor-mask-resizing.fxh"
+#include "../include/scanline-functions.fxh"
+#include "../include/bloom-functions.fxh"
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p8
+{
+    float2 video_uv                     : TEXCOORD1;
+    float2 scanline_tex_uv              : TEXCOORD2;
+    float2 blur3x3_tex_uv               : TEXCOORD3;
+    float bloom_sigma_runtime           : TEXCOORD4;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Brightpass(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p8 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    float2 tex_uv = texcoord;
+
+    float2 texture_size = BRIGHTPASS_texture_size;
+    float2 output_size  = VIEWPORT_SIZE;
+
+    //  Our various input textures use different coords:
+    const float2 video_uv = tex_uv * texture_size/video_size;
+    OUT.video_uv = video_uv;
+    OUT.scanline_tex_uv = video_uv * MASKED_SCANLINES_video_size /
+        MASKED_SCANLINES_texture_size;
+    OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size / BLOOM_APPROX_texture_size;
+
+    //  Calculate a runtime bloom_sigma in case it's needed:
+    const float mask_tile_size_x = get_resized_mask_tile_size(
+        output_size, output_size * mask_resize_viewport_scale, false).x;
+    OUT.bloom_sigma_runtime = get_min_sigma_to_blur_triad(
+        mask_tile_size_x / mask_triads_per_tile, bloom_diff_thresh);
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Brightpass(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p8 VAR) : SV_Target
+{
+    //  Sample the masked scanlines:
+    const float3 intensity_dim =
+        tex2D_linearize(MASKED_SCANLINES, VAR.scanline_tex_uv).rgb;
+    //  Get the full intensity, including auto-undimming, and mask compensation:
+    const float auto_dim_factor = levels_autodim_temp;
+    const float undim_factor = 1.0/auto_dim_factor;
+    const float mask_amplify = get_mask_amplify();
+    const float3 intensity = intensity_dim * undim_factor * mask_amplify *
+        levels_contrast;
+
+    //  Sample BLOOM_APPROX to estimate what a straight blur of masked scanlines
+    //  would look like, so we can estimate how much energy we'll receive from
+    //  blooming neighbors:
+    const float3 phosphor_blur_approx = levels_contrast * tex2D_linearize(
+        BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
+
+    //  Compute the blur weight for the center texel and the maximum energy we
+    //  expect to receive from neighbors:
+    const float bloom_sigma = get_final_bloom_sigma(VAR.bloom_sigma_runtime);
+    const float center_weight = get_center_weight(bloom_sigma);
+    const float3 max_area_contribution_approx =
+        max(0.0.xxx, phosphor_blur_approx - center_weight * intensity);
+    //  Assume neighbors will blur 100% of their intensity (blur_ratio = 1.0),
+    //  because it actually gets better results (on top of being very simple),
+    //  but adjust all intensities for the user's desired underestimate factor:
+    const float3 area_contrib_underestimate =
+        bloom_underestimate_levels * max_area_contribution_approx;
+    const float3 intensity_underestimate =
+        bloom_underestimate_levels * intensity;
+    //  Calculate the blur_ratio, the ratio of intensity we want to blur:
+    #ifdef BRIGHTPASS_AREA_BASED
+        //  This area-based version changes blur_ratio more smoothly and blurs
+        //  more, clipping less but offering less phosphor differentiation:
+        const float3 phosphor_blur_underestimate = bloom_underestimate_levels *
+            phosphor_blur_approx;
+        const float3 soft_intensity = max(intensity_underestimate,
+            phosphor_blur_underestimate * mask_amplify);
+        const float3 blur_ratio_temp =
+            ((1.0.xxx - area_contrib_underestimate) /
+            soft_intensity - 1.0.xxx) / (center_weight - 1.0);
+    #else
+        const float3 blur_ratio_temp =
+            ((1.0.xxx - area_contrib_underestimate) /
+            intensity_underestimate - 1.0.xxx) / (center_weight - 1.0);
+    #endif
+    const float3 blur_ratio = clamp(blur_ratio_temp, 0.0, 1.0);
+    //  Calculate the brightpass based on the auto-dimmed, unamplified, masked
+    //  scanlines, encode if necessary, and return!
+    const float3 brightpass = intensity_dim *
+        lerp(blur_ratio, 1.0.xxx, bloom_excess);
+    return encode_output(float4(brightpass, 1.0));
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.fxh
@ -0,0 +1,109 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+//  PASS SETTINGS:
+//  gamma-management.h needs to know what kind of pipeline we're using and
+//  what pass this is in that pipeline.  This will become obsolete if/when we
+//  can #define things like this in the .cgp preset file.
+#define FIRST_PASS
+#define SIMULATE_CRT_ON_LCD
+
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/user-settings.fxh"
+#include "../include/bind-shader-params.fxh"
+#include "../include/gamma-management.fxh"
+#include "../include/scanline-functions.fxh"
+
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex
+{
+    float2 tex_uv           : TEXCOORD1;
+    float2 uv_step          : TEXCOORD2;
+    float interlaced        : TEXCOORD3;
+};
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Linearize(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    OUT.tex_uv = texcoord;
+//    OUT.tex_uv = (floor(texcoord / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
+    //  Save the uv distance between texels:
+    OUT.uv_step = NormalizedNativePixelSize;
+
+    //  Detect interlacing: 1.0 = true, 0.0 = false.
+    OUT.interlaced = is_interlaced(1.0/NormalizedNativePixelSize.y);
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+sampler2D sBackBuffer{Texture=ReShade::BackBufferTex;AddressU=BORDER;AddressV=BORDER;AddressW=BORDER;MagFilter=POINT;MinFilter=POINT;};
+
+#define input_texture sBackBuffer
+
+float4 PS_Linearize(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex VAR) : SV_Target
+{
+    //  Linearize the input based on CRT gamma and bob interlaced fields.
+    //  Bobbing ensures we can immediately blur without getting artifacts.
+    //  Note: TFF/BFF won't matter for sources that double-weave or similar.
+   // VAR.tex_uv = (floor(VAR.tex_uv / NormalizedNativePixelSize)+float2(0.5,0.5)) * NormalizedNativePixelSize;
+
+    if(interlace_detect)
+    {
+        //  Sample the current line and an average of the previous/next line;
+        //  tex2D_linearize will decode CRT gamma.  Don't bother branching:
+        const float2 tex_uv = VAR.tex_uv;
+        const float2 v_step = float2(0.0, VAR.uv_step.y);
+        const float3 curr_line = tex2D_linearize_first(
+            input_texture, tex_uv).rgb;
+        const float3 last_line = tex2D_linearize_first(
+            input_texture, tex_uv - v_step).rgb;
+        const float3 next_line = tex2D_linearize_first(
+            input_texture, tex_uv + v_step).rgb;
+        const float3 interpolated_line = 0.5 * (last_line + next_line);
+        //  If we're interlacing, determine which field curr_line is in:
+        const float modulus = VAR.interlaced + 1.0;
+        const float field_offset =
+            fmod(FrameCount + float(interlace_bff), modulus);
+        const float curr_line_texel = tex_uv.y / NormalizedNativePixelSize.y;
+        //  Use under_half to fix a rounding bug around exact texel locations.
+        const float line_num_last = floor(curr_line_texel - under_half);
+        const float wrong_field = fmod(line_num_last + field_offset, modulus);
+        //  Select the correct color, and output the result:
+        const float3 color = lerp(curr_line, interpolated_line, wrong_field);
+        return encode_output(float4(color, 1.0));
+    }
+    else
+    {
+        return encode_output(tex2D_linearize_first(input_texture, VAR.tex_uv));
+    }
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-horizontal.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-horizontal.fxh
@ -0,0 +1,130 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+#include "../include/user-settings.fxh"
+#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/phosphor-mask-resizing.fxh"
+
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p6
+{
+    float2 src_tex_uv_wrap              : TEXCOORD1;
+    float2 tile_uv_wrap                 : TEXCOORD2;
+    float2 resize_magnification_scale   : TEXCOORD3;
+    float2 src_dxdy                     : TEXCOORD4;
+    float2 tile_size_uv                 : TEXCOORD5;
+    float2 input_tiles_per_texture      : TEXCOORD6;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Mask_Resize_Horizontal(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p6 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    float2 tex_uv = texcoord;
+
+    float2 texture_size = MASK_RESIZE_texture_size;
+    float2 output_size  = 0.0625*(VIEWPORT_SIZE);
+
+    //  First estimate the viewport size (the user will get the wrong number of
+    //  triads if it's wrong and mask_specify_num_triads is 1.0/true).
+    const float2 estimated_viewport_size =
+        output_size / mask_resize_viewport_scale;
+    //  Find the final size of our resized phosphor mask tiles.  We probably
+    //  estimated the viewport size and MASK_RESIZE output size differently last
+    //  pass, so do not swear they were the same. ;)
+    const float2 mask_resize_tile_size = get_resized_mask_tile_size(
+        estimated_viewport_size, output_size, false);
+
+    //  We'll render resized tiles until filling the output FBO or meeting a
+    //  limit, so compute [wrapped] tile uv coords based on the output uv coords
+    //  and the number of tiles that will fit in the FBO.
+    const float2 output_tiles_this_pass = output_size / mask_resize_tile_size;
+    const float2 output_video_uv = tex_uv * texture_size / video_size;
+    const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
+
+    //  Get the texel size of an input tile and related values:
+    const float2 input_tile_size = float2(min(
+        mask_resize_src_lut_size.x, video_size.x), mask_resize_tile_size.y);
+    const float2 tile_size_uv = input_tile_size / texture_size;
+    const float2 input_tiles_per_texture = texture_size / input_tile_size;
+
+    //  Derive [wrapped] texture uv coords from [wrapped] tile uv coords and
+    //  the tile size in uv coords, and save frac() for the fragment shader.
+    const float2 src_tex_uv_wrap = tile_uv_wrap * tile_size_uv;
+
+    //  Output the values we need, including the magnification scale and step:
+    OUT.tile_uv_wrap = tile_uv_wrap;
+    OUT.src_tex_uv_wrap = src_tex_uv_wrap;
+    OUT.resize_magnification_scale = mask_resize_tile_size / input_tile_size;
+    OUT.src_dxdy = float2(1.0/texture_size.x, 0.0);
+    OUT.tile_size_uv = tile_size_uv;
+    OUT.input_tiles_per_texture = input_tiles_per_texture;
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Mask_Resize_Horizontal(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p6 VAR) : SV_Target
+{
+    //  The input contains one mask tile horizontally and a number vertically.
+    //  Resize the tile horizontally to its final screen size and repeat it
+    //  until drawing at least mask_resize_num_tiles, leaving it unchanged
+    //  vertically.  Lanczos-resizing the phosphor mask achieves much sharper
+    //  results than mipmapping, outputting >= mask_resize_num_tiles makes for
+    //  easier tiled sampling later.
+    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
+        //  Discard unneeded fragments in case our profile allows real branches.
+        float2 texture_size = MASK_RESIZE_texture_size;
+        const float2 tile_uv_wrap = VAR.tile_uv_wrap;
+        if(get_mask_sample_mode() < 0.5 &&
+            max(tile_uv_wrap.x, tile_uv_wrap.y) <= mask_resize_num_tiles)
+        {
+            const float src_dx = VAR.src_dxdy.x;
+            const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
+            const float3 pixel_color = downsample_horizontal_sinc_tiled(MASK_RESIZE_VERTICAL,
+                src_tex_uv, texture_size, VAR.src_dxdy.x,
+                VAR.resize_magnification_scale.x, VAR.tile_size_uv.x);
+            //  The input LUT was linear RGB, and so is our output:
+            return float4(pixel_color, 1.0);
+        }
+        else
+        {
+            discard;
+        }
+    #else
+        discard;
+        return 1.0.xxxx;
+    #endif
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-vertical.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-mask-resize-vertical.fxh
@ -0,0 +1,164 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+#include "../include/user-settings.fxh"
+#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/phosphor-mask-resizing.fxh"
+
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p5
+{
+    float2 src_tex_uv_wrap              : TEXCOORD1;
+    float2 resize_magnification_scale   : TEXCOORD2;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Mask_Resize_Vertical(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p5 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    float2 tex_uv = texcoord;
+
+    float2 texture_size = MASK_RESIZE_VERT_texture_size;
+    float2 output_size  = float2(64.0, 0.0625*((VIEWPORT_SIZE).y));
+
+    //  First estimate the viewport size (the user will get the wrong number of
+    //  triads if it's wrong and mask_specify_num_triads is 1.0/true).
+    const float viewport_y = output_size.y / mask_resize_viewport_scale.y;
+//  Now get aspect_ratio from texture_size. 
+//    const float aspect_ratio = geom_aspect_ratio_x / geom_aspect_ratio_y;
+    const float aspect_ratio = texture_size.x / texture_size.y;
+    const float2 estimated_viewport_size =
+        float2(viewport_y * aspect_ratio, viewport_y);
+    //  Estimate the output size of MASK_RESIZE (the next pass).  The estimated
+    //  x component shouldn't matter, because we're not using the x result, and
+    //  we're not swearing it's correct (if we did, the x result would influence
+    //  the y result to maintain the tile aspect ratio).
+    const float2 estimated_mask_resize_output_size =
+        float2(output_size.y * aspect_ratio, output_size.y);
+    //  Find the final intended [y] size of our resized phosphor mask tiles,
+    //  then the tile size for the current pass (resize y only):
+    const float2 mask_resize_tile_size = get_resized_mask_tile_size(
+        estimated_viewport_size, estimated_mask_resize_output_size, false);
+    const float2 pass_output_tile_size = float2(min(
+        mask_resize_src_lut_size.x, output_size.x), mask_resize_tile_size.y);
+
+    //  We'll render resized tiles until filling the output FBO or meeting a
+    //  limit, so compute [wrapped] tile uv coords based on the output uv coords
+    //  and the number of tiles that will fit in the FBO.
+    const float2 output_tiles_this_pass = output_size / pass_output_tile_size;
+    const float2 output_video_uv = tex_uv * texture_size / video_size;
+    const float2 tile_uv_wrap = output_video_uv * output_tiles_this_pass;
+
+    //  The input LUT is just a single mask tile, so texture uv coords are the
+    //  same as tile uv coords (save frac() for the fragment shader).  The
+    //  magnification scale is also straightforward:
+    OUT.src_tex_uv_wrap = tile_uv_wrap;
+    OUT.resize_magnification_scale =
+        pass_output_tile_size / mask_resize_src_lut_size;
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Mask_Resize_Vertical(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p5 VAR) : SV_Target
+{
+    //  Resize the input phosphor mask tile to the final vertical size it will
+    //  appear on screen.  Keep 1x horizontal size if possible (IN.output_size
+    //  >= mask_resize_src_lut_size), and otherwise linearly sample horizontally
+    //  to fit exactly one tile.  Lanczos-resizing the phosphor mask achieves
+    //  much sharper results than mipmapping, and vertically resizing first
+    //  minimizes the total number of taps required.  We output a number of
+    //  resized tiles >= mask_resize_num_tiles for easier tiled sampling later.
+    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
+        //  Discard unneeded fragments in case our profile allows real branches.
+        const float2 tile_uv_wrap = VAR.src_tex_uv_wrap;
+        if(get_mask_sample_mode() < 0.5 &&
+            tile_uv_wrap.y <= mask_resize_num_tiles)
+        {
+            static const float src_dy = 1.0/mask_resize_src_lut_size.y;
+            const float2 src_tex_uv = frac(VAR.src_tex_uv_wrap);
+            float3 pixel_color;
+            //  If mask_type is static, this branch will be resolved statically.
+			#ifdef PHOSPHOR_MASK_RESIZE_MIPMAPPED_LUT
+				if(mask_type < 0.5)
+				{
+					pixel_color = downsample_vertical_sinc_tiled(
+						mask_grille_texture_large, src_tex_uv, mask_resize_src_lut_size,
+						src_dy, VAR.resize_magnification_scale.y, 1.0);
+				}
+				else if(mask_type < 1.5)
+				{
+					pixel_color = downsample_vertical_sinc_tiled(
+						mask_slot_texture_large, src_tex_uv, mask_resize_src_lut_size,
+						src_dy, VAR.resize_magnification_scale.y, 1.0);
+				}
+				else
+				{
+					pixel_color = downsample_vertical_sinc_tiled(
+						mask_shadow_texture_large, src_tex_uv, mask_resize_src_lut_size,
+						src_dy, VAR.resize_magnification_scale.y, 1.0);
+				}
+			#else
+				if(mask_type < 0.5)
+				{
+					pixel_color = downsample_vertical_sinc_tiled(
+						mask_grille_texture_small, src_tex_uv, mask_resize_src_lut_size,
+						src_dy, VAR.resize_magnification_scale.y, 1.0);
+				}
+				else if(mask_type < 1.5)
+				{
+					pixel_color = downsample_vertical_sinc_tiled(
+						mask_slot_texture_small, src_tex_uv, mask_resize_src_lut_size,
+						src_dy, VAR.resize_magnification_scale.y, 1.0);
+				}
+				else
+				{
+					pixel_color = downsample_vertical_sinc_tiled(
+						mask_shadow_texture_small, src_tex_uv, mask_resize_src_lut_size,
+						src_dy, VAR.resize_magnification_scale.y, 1.0);
+				}
+			#endif
+            //  The input LUT was linear RGB, and so is our output:
+            return float4(pixel_color, 1.0);
+        }
+        else
+        {
+            discard;
+        }
+    #else
+        discard;
+        return 1.0.xxxx;
+    #endif
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.fxh
@ -0,0 +1,283 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+/////////////////////////////  SETTINGS MANAGEMENT  ////////////////////////////
+
+#include "../include/user-settings.fxh"
+#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+#include "../include/scanline-functions.fxh"
+#include "../include/phosphor-mask-resizing.fxh"
+#include "../include/bloom-functions.fxh"
+#include "../include/gamma-management.fxh"
+
+
+///////////////////////////////////  HELPERS  //////////////////////////////////
+
+float4 tex2Dtiled_mask_linearize(const sampler2D tex,
+    const float2 tex_uv)
+{
+    //  If we're manually tiling a texture, anisotropic filtering can get
+    //  confused.  One workaround is to just select the lowest mip level:
+    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
+        #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DLOD
+            //  TODO: Use tex2Dlod_linearize with a calculated mip level.
+            return tex2Dlod_linearize(tex, float4(tex_uv, 0.0, 0.0));
+        #else
+            #ifdef ANISOTROPIC_TILING_COMPAT_TEX2DBIAS
+                return tex2Dbias_linearize(tex, float4(tex_uv, 0.0, -16.0));
+            #else
+                return tex2D_linearize(tex, tex_uv);
+            #endif
+        #endif
+    #else
+        return tex2D_linearize(tex, tex_uv);
+    #endif
+}
+
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+
+struct out_vertex_p7
+{
+    //  Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
+    float2 video_uv                     : TEXCOORD1;
+    float2 scanline_tex_uv              : TEXCOORD2;
+    float2 blur3x3_tex_uv               : TEXCOORD3;
+    float2 halation_tex_uv              : TEXCOORD4;
+    float2 scanline_texture_size_inv    : TEXCOORD5;
+    float4 mask_tile_start_uv_and_size  : TEXCOORD6;
+    float2 mask_tiles_per_screen        : TEXCOORD7;
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Scanlines_Horizontal_Apply_Mask(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p7 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    float2 tex_uv = texcoord;
+
+    float2 texture_size = MASKED_SCANLINES_texture_size;
+    float2 output_size  = VIEWPORT_SIZE;
+
+    //  Our various input textures use different coords.
+    const float2 video_uv = tex_uv * texture_size/video_size;
+    const float2 scanline_texture_size_inv =
+        1.0.xx/VERTICAL_SCANLINES_texture_size;
+    OUT.video_uv = video_uv;
+    OUT.scanline_tex_uv = video_uv * VERTICAL_SCANLINES_video_size *
+        scanline_texture_size_inv;
+    OUT.blur3x3_tex_uv = video_uv * BLOOM_APPROX_video_size /
+        BLOOM_APPROX_texture_size;
+    OUT.halation_tex_uv = video_uv * HALATION_BLUR_video_size /
+        HALATION_BLUR_texture_size;
+    OUT.scanline_texture_size_inv = scanline_texture_size_inv;
+
+    //  Get a consistent name for the final mask texture size.  Sample mode 0
+    //  uses the manually resized mask, but ignore it if we never resized.
+    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
+        const float mask_sample_mode = get_mask_sample_mode();
+        const float2 mask_resize_texture_size = mask_sample_mode < 0.5 ?
+            MASKED_SCANLINES_texture_size : mask_texture_large_size;
+        const float2 mask_resize_video_size = mask_sample_mode < 0.5 ?
+            MASKED_SCANLINES_video_size : mask_texture_large_size;
+    #else
+        const float2 mask_resize_texture_size = mask_texture_large_size;
+        const float2 mask_resize_video_size = mask_texture_large_size;
+    #endif
+    //  Compute mask tile dimensions, starting points, etc.:
+    float2 mask_tiles_per_screen;
+    OUT.mask_tile_start_uv_and_size = get_mask_sampling_parameters(
+        mask_resize_texture_size, mask_resize_video_size, output_size,
+        mask_tiles_per_screen);
+    OUT.mask_tiles_per_screen = mask_tiles_per_screen;
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Scanlines_Horizontal_Apply_Mask(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p7 VAR) : SV_Target
+{
+    //  This pass: Sample (misconverged?) scanlines to the final horizontal
+    //  resolution, apply halation (bouncing electrons), and apply the phosphor
+    //  mask.  Fake a bloom if requested.  Unless we fake a bloom, the output
+    //  will be dim from the scanline auto-dim, mask dimming, and low gamma.
+
+    //  Horizontally sample the current row (a vertically interpolated scanline)
+    //  and account for horizontal convergence offsets, given in units of texels.
+  //  float2 VERTICAL_SCANLINES_texture_size = float2(1.0/NormalizedNativePixelSize.x, ViewportSize.y*BufferToViewportRatio.y);
+
+    float2 output_size  = VIEWPORT_SIZE;
+
+    const float3 scanline_color_dim = sample_rgb_scanline_horizontal(
+        VERTICAL_SCANLINES, VAR.scanline_tex_uv,
+        VERTICAL_SCANLINES_texture_size, VAR.scanline_texture_size_inv);
+    const float auto_dim_factor = levels_autodim_temp;
+
+    //  Sample the phosphor mask:
+    const float2 tile_uv_wrap = VAR.video_uv * VAR.mask_tiles_per_screen;
+    const float2 mask_tex_uv = convert_phosphor_tile_uv_wrap_to_tex_uv(
+        tile_uv_wrap, VAR.mask_tile_start_uv_and_size);
+    float3 phosphor_mask_sample;
+    #ifdef PHOSPHOR_MASK_MANUALLY_RESIZE
+        const bool sample_orig_luts = get_mask_sample_mode() > 0.5;
+    #else
+        static const bool sample_orig_luts = true;
+    #endif
+    if(sample_orig_luts)
+    {
+        //  If mask_type is static, this branch will be resolved statically.
+        if(mask_type < 0.5)
+        {
+            phosphor_mask_sample = tex2D_linearize(
+                mask_grille_texture_large, mask_tex_uv).rgb;
+        }
+        else if(mask_type < 1.5)
+        {
+            phosphor_mask_sample = tex2D_linearize(
+                mask_slot_texture_large, mask_tex_uv).rgb;
+        }
+        else
+        {
+            phosphor_mask_sample = tex2D_linearize(
+                mask_shadow_texture_large, mask_tex_uv).rgb;
+        }
+    }
+    else
+    {
+        //  Sample the resized mask, and avoid tiling artifacts:
+        phosphor_mask_sample = tex2Dtiled_mask_linearize(
+            MASK_RESIZE, mask_tex_uv).rgb;
+    }
+
+    //  Sample the halation texture (auto-dim to match the scanlines), and
+    //  account for both horizontal and vertical convergence offsets, given
+    //  in units of texels horizontally and same-field scanlines vertically:
+    const float3 halation_color = tex2D_linearize(
+        HALATION_BLUR, VAR.halation_tex_uv).rgb;
+
+    //  Apply halation: Halation models electrons flying around under the glass
+    //  and hitting the wrong phosphors (of any color).  It desaturates, so
+    //  average the halation electrons to a scalar.  Reduce the local scanline
+    //  intensity accordingly to conserve energy.
+    const float3 halation_intensity_dim =
+        dot(halation_color, auto_dim_factor.xxx/3.0).xxx;
+    const float3 electron_intensity_dim = lerp(scanline_color_dim,
+        halation_intensity_dim, halation_weight);
+
+    //  Apply the phosphor mask:
+    const float3 phosphor_emission_dim = electron_intensity_dim *
+        phosphor_mask_sample;
+
+    #ifdef PHOSPHOR_BLOOM_FAKE
+        //  The BLOOM_APPROX pass approximates a blurred version of a masked
+        //  and scanlined image.  It's usually used to compute the brightpass,
+        //  but we can also use it to fake the bloom stage entirely.  Caveats:
+        //  1.) A fake bloom is conceptually different, since we're mixing in a
+        //      fully blurred low-res image, and the biggest implication are:
+        //  2.) If mask_amplify is incorrect, results deteriorate more quickly.
+        //  3.) The inaccurate blurring hurts quality in high-contrast areas.
+        //  4.) The bloom_underestimate_levels parameter seems less sensitive.
+        //  Reverse the auto-dimming and amplify to compensate for mask dimming:
+        #define PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
+        #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
+            static const float blur_contrast = 1.05;
+        #else
+            static const float blur_contrast = 1.0;
+        #endif
+        const float mask_amplify = get_mask_amplify();
+        const float undim_factor = 1.0/auto_dim_factor;
+        const float3 phosphor_emission =
+            phosphor_emission_dim * undim_factor * mask_amplify;
+        //  Get a phosphor blur estimate, accounting for convergence offsets:
+        const float3 electron_intensity = electron_intensity_dim * undim_factor;
+        const float3 phosphor_blur_approx_soft = tex2D_linearize(
+            BLOOM_APPROX, VAR.blur3x3_tex_uv).rgb;
+        const float3 phosphor_blur_approx = lerp(phosphor_blur_approx_soft,
+            electron_intensity, 0.1) * blur_contrast;
+        //  We could blend between phosphor_emission and phosphor_blur_approx,
+        //  solving for the minimum blend_ratio that avoids clipping past 1.0:
+        //      1.0 >= total_intensity
+        //      1.0 >= phosphor_emission * (1.0 - blend_ratio) +
+        //              phosphor_blur_approx * blend_ratio
+        //      blend_ratio = (phosphor_emission - 1.0)/
+        //          (phosphor_emission - phosphor_blur_approx);
+        //  However, this blurs far more than necessary, because it aims for
+        //  full brightness, not minimal blurring.  To fix it, base blend_ratio
+        //  on a max area intensity only so it varies more smoothly:
+        const float3 phosphor_blur_underestimate =
+            phosphor_blur_approx * bloom_underestimate_levels;
+        const float3 area_max_underestimate =
+            phosphor_blur_underestimate * mask_amplify;
+        #ifdef PHOSPHOR_BLOOM_FAKE_WITH_SIMPLE_BLEND
+            const float3 blend_ratio_temp =
+                (area_max_underestimate - 1.0.xxx) /
+                (area_max_underestimate - phosphor_blur_underestimate);
+        #else
+            //  Try doing it like an area-based brightpass.  This is nearly
+            //  identical, but it's worth toying with the code in case I ever
+            //  find a way to make it look more like a real bloom.  (I've had
+            //  some promising textures from combining an area-based blend ratio
+            //  for the phosphor blur and a more brightpass-like blend-ratio for
+            //  the phosphor emission, but I haven't found a way to make the
+            //  brightness correct across the whole color range, especially with
+            //  different bloom_underestimate_levels values.)
+            const float desired_triad_size = lerp(mask_triad_size_desired,
+                output_size.x/mask_num_triads_desired,
+                mask_specify_num_triads);
+            const float bloom_sigma = get_min_sigma_to_blur_triad(
+                desired_triad_size, bloom_diff_thresh);
+            const float center_weight = get_center_weight(bloom_sigma);
+            const float3 max_area_contribution_approx =
+                max(0.0.xxx, phosphor_blur_approx -
+                center_weight * phosphor_emission);
+            const float3 area_contrib_underestimate =
+                bloom_underestimate_levels * max_area_contribution_approx;
+            const float3 blend_ratio_temp =
+                ((1.0.xxx - area_contrib_underestimate) /
+                area_max_underestimate - 1.0.xxx) / (center_weight - 1.0);
+        #endif
+        //  Clamp blend_ratio in case it's out-of-range, but be SUPER careful:
+        //  min/max/clamp are BIZARRELY broken with lerp (optimization bug?),
+        //  and this redundant sequence avoids bugs, at least on nVidia cards:
+        const float3 blend_ratio_clamped = max(clamp(blend_ratio_temp, 0.0, 1.0), 0.0);
+        const float3 blend_ratio = lerp(blend_ratio_clamped, 1.0.xxx, bloom_excess);
+        //  Blend the blurred and unblurred images:
+        const float3 phosphor_emission_unclipped =
+            lerp(phosphor_emission, phosphor_blur_approx, blend_ratio);
+        //  Simulate refractive diffusion by reusing the halation sample.
+        const float3 pixel_color = lerp(phosphor_emission_unclipped,
+            halation_color, diffusion_weight);
+    #else
+        const float3 pixel_color = phosphor_emission_dim;
+    #endif
+    //  Encode if necessary, and output.
+    return encode_output(float4(pixel_color, 1.0));
+}
+
--- a/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh
+++ b/data/resources/shaders/reshade/Shaders/crt/crt-royale/src/crt-royale-scanlines-vertical-interlacing.fxh
@ -0,0 +1,241 @@
+/////////////////////////////  GPL LICENSE NOTICE  /////////////////////////////
+
+//  crt-royale: A full-featured CRT shader, with cheese.
+//  Copyright (C) 2014 TroggleMonkey <trogglemonkey@gmx.com>
+//
+//  This program is free software; you can redistribute it and/or modify it
+//  under the terms of the GNU General Public License as published by the Free
+//  Software Foundation; either version 2 of the License, or any later version.
+//
+//  This program is distributed in the hope that it will be useful, but WITHOUT
+//  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+//  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+//  more details.
+//
+//  You should have received a copy of the GNU General Public License along with
+//  this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+//  Place, Suite 330, Boston, MA 02111-1307 USA
+
+#undef FIRST_PASS
+//////////////////////////////////  INCLUDES  //////////////////////////////////
+
+//#include "../include/user-settings.fxh"
+//#include "../include/derived-settings-and-constants.fxh"
+#include "../include/bind-shader-params.fxh"
+#include "../include/scanline-functions.fxh"
+//#include "../include/gamma-management.fxh"
+
+/////////////////////////////////  STRUCTURES  /////////////////////////////////
+
+struct out_vertex_p1
+{
+    //  Use explicit semantics so COLORx doesn't clamp values outside [0, 1].
+    float2 tex_uv                   : TEXCOORD1;
+    float2 uv_step                  : TEXCOORD2;    //  uv size of a texel (x) and scanline (y)
+    float2 il_step_multiple         : TEXCOORD3;    //  (1, 1) = progressive, (1, 2) = interlaced
+    float pixel_height_in_scanlines : TEXCOORD4;    //  Height of an output pixel in scanlines
+};
+
+
+////////////////////////////////  VERTEX SHADER  ///////////////////////////////
+
+// Vertex shader generating a triangle covering the entire screen
+void VS_Scanlines_Vertical_Interlacing(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 texcoord : TEXCOORD, out out_vertex_p1 OUT)
+{
+    texcoord.x = (id == 2) ? 2.0 : 0.0;
+    texcoord.y = (id == 1) ? 2.0 : 0.0;
+    position = float4(texcoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
+
+    OUT.tex_uv = texcoord;
+
+    float2 texture_size = VERTICAL_SCANLINES_texture_size;
+    float2 output_size  = float2(TEXTURE_SIZE.x, VIEWPORT_SIZE.y);
+
+    //  Detect interlacing: il_step_multiple indicates the step multiple between
+    //  lines: 1 is for progressive sources, and 2 is for interlaced sources.
+//    const float2 video_size = 1.0/NormalizedNativePixelSize;
+    const float y_step = 1.0 + float(is_interlaced(video_size.y));
+    OUT.il_step_multiple = float2(1.0, y_step);
+    //  Get the uv tex coords step between one texel (x) and scanline (y):
+    OUT.uv_step = OUT.il_step_multiple / texture_size;
+
+    //  If shader parameters are used, {min, max}_{sigma, shape} are runtime
+    //  values.  Compute {sigma, shape}_range outside of scanline_contrib() so
+    //  they aren't computed once per scanline (6 times per fragment and up to
+    //  18 times per vertex):
+/*    const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
+        beam_min_sigma;
+    const float shape_range = max(beam_max_shape, beam_min_shape) -
+        beam_min_shape;
+*/
+    //  We need the pixel height in scanlines for antialiased/integral sampling:
+    const float ph = (video_size.y / output_size.y) / 
+        OUT.il_step_multiple.y;
+    OUT.pixel_height_in_scanlines = ph;
+
+}
+
+
+///////////////////////////////  FRAGMENT SHADER  //////////////////////////////
+
+float4 PS_Scanlines_Vertical_Interlacing(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in out_vertex_p1 VAR) : SV_Target
+{
+    //  This pass: Sample multiple (misconverged?) scanlines to the final
+    //  vertical resolution.  Temporarily auto-dim the output to avoid clipping.
+
+    //  Read some attributes into local variables:
+    const float2 texture_size = VERTICAL_SCANLINES_texture_size;
+    const float2 texture_size_inv = 1.0/texture_size;
+    const float2 uv_step = VAR.uv_step;
+    const float2 il_step_multiple = VAR.il_step_multiple;
+    const float frame_count = FrameCount;
+    const float ph = VAR.pixel_height_in_scanlines;
+
+    //  Get the uv coords of the previous scanline (in this field), and the
+    //  scanline's distance from this sample, in scanlines.
+    float dist;
+    const float2 scanline_uv = get_last_scanline_uv(VAR.tex_uv, texture_size,
+        texture_size_inv, il_step_multiple, frame_count, dist);
+
+    //  Consider 2, 3, 4, or 6 scanlines numbered 0-5: The previous and next
+    //  scanlines are numbered 2 and 3.  Get scanline colors colors (ignore
+    //  horizontal sampling, since since IN.output_size.x = video_size.x).
+    //  NOTE: Anisotropic filtering creates interlacing artifacts, which is why
+    //  ORIG_LINEARIZED bobbed any interlaced input before this pass.
+    const float2 v_step = float2(0.0, uv_step.y);
+    const float3 scanline2_color = tex2D_linearize(ORIG_LINEARIZED, scanline_uv).rgb;
+    const float3 scanline3_color =
+        tex2D_linearize(ORIG_LINEARIZED, scanline_uv + v_step).rgb;
+    float3 scanline0_color, scanline1_color, scanline4_color, scanline5_color,
+        scanline_outside_color;
+    float dist_round;
+    //  Use scanlines 0, 1, 4, and 5 for a total of 6 scanlines:
+    if(beam_num_scanlines > 5.5)
+    {
+        scanline1_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
+        scanline4_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
+        scanline0_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - 2.0 * v_step).rgb;
+        scanline5_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 3.0 * v_step).rgb;
+    }
+    //  Use scanlines 1, 4, and either 0 or 5 for a total of 5 scanlines:
+    else if(beam_num_scanlines > 4.5)
+    {
+        scanline1_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
+        scanline4_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
+        //  dist is in [0, 1]
+        dist_round = round(dist);
+        const float2 sample_0_or_5_uv_off =
+            lerp(-2.0 * v_step, 3.0 * v_step, dist_round);
+        //  Call this "scanline_outside_color" to cope with the conditional
+        //  scanline number:
+        scanline_outside_color = tex2D_linearize(
+            ORIG_LINEARIZED, scanline_uv + sample_0_or_5_uv_off).rgb;
+    }
+    //  Use scanlines 1 and 4 for a total of 4 scanlines:
+    else if(beam_num_scanlines > 3.5)
+    {
+        scanline1_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv - v_step).rgb;
+        scanline4_color =
+            tex2D_linearize(ORIG_LINEARIZED, scanline_uv + 2.0 * v_step).rgb;
+    }
+    //  Use scanline 1 or 4 for a total of 3 scanlines:
+    else if(beam_num_scanlines > 2.5)
+    {
+        //  dist is in [0, 1]
+        dist_round = round(dist);
+        const float2 sample_1or4_uv_off =
+            lerp(-v_step, 2.0 * v_step, dist_round);
+        scanline_outside_color = tex2D_linearize(
+            ORIG_LINEARIZED, scanline_uv + sample_1or4_uv_off).rgb;
+    }
+    
+    //  Compute scanline contributions, accounting for vertical convergence.
+    //  Vertical convergence offsets are in units of current-field scanlines.
+    //  dist2 means "positive sample distance from scanline 2, in scanlines:"
+    float3 dist2 = dist.xxx;
+    if(beam_misconvergence)
+    {
+        const float3 convergence_offsets_vert_rgb =
+            get_convergence_offsets_y_vector();
+        dist2 = dist.xxx - convergence_offsets_vert_rgb;
+    }
+    //  Calculate {sigma, shape}_range outside of scanline_contrib so it's only
+    //  done once per pixel (not 6 times) with runtime params.  Don't reuse the
+    //  vertex shader calculations, so static versions can be constant-folded.
+    const float sigma_range = max(beam_max_sigma, beam_min_sigma) -
+        beam_min_sigma;
+    const float shape_range = max(beam_max_shape, beam_min_shape) -
+        beam_min_shape;
+    //  Calculate and sum final scanline contributions, starting with lines 2/3.
+    //  There is no normalization step, because we're not interpolating a
+    //  continuous signal.  Instead, each scanline is an additive light source.
+    const float3 scanline2_contrib = scanline_contrib(dist2,
+        scanline2_color, ph, sigma_range, shape_range);
+    const float3 scanline3_contrib = scanline_contrib(abs(1.0.xxx - dist2),
+        scanline3_color, ph, sigma_range, shape_range);
+    float3 scanline_intensity = scanline2_contrib + scanline3_contrib;
+    if(beam_num_scanlines > 5.5)
+    {
+        const float3 scanline0_contrib =
+            scanline_contrib(dist2 + 2.0.xxx, scanline0_color,
+                ph, sigma_range, shape_range);
+        const float3 scanline1_contrib =
+            scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
+                ph, sigma_range, shape_range);
+        const float3 scanline4_contrib =
+            scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
+                ph, sigma_range, shape_range);
+        const float3 scanline5_contrib =
+            scanline_contrib(abs(3.0.xxx - dist2), scanline5_color,
+                ph, sigma_range, shape_range);
+        scanline_intensity += scanline0_contrib + scanline1_contrib +
+            scanline4_contrib + scanline5_contrib;
+    }
+    else if(beam_num_scanlines > 4.5)
+    {
+        const float3 scanline1_contrib =
+            scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
+                ph, sigma_range, shape_range);
+        const float3 scanline4_contrib =
+            scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
+                ph, sigma_range, shape_range);
+        const float3 dist0or5 = lerp(
+            dist2 + 2.0.xxx, 3.0.xxx - dist2, dist_round);
+        const float3 scanline0or5_contrib = scanline_contrib(
+            dist0or5, scanline_outside_color, ph, sigma_range, shape_range);
+        scanline_intensity += scanline1_contrib + scanline4_contrib +
+            scanline0or5_contrib;
+    }
+    else if(beam_num_scanlines > 3.5)
+    {
+        const float3 scanline1_contrib =
+            scanline_contrib(dist2 + 1.0.xxx, scanline1_color,
+                ph, sigma_range, shape_range);
+        const float3 scanline4_contrib =
+            scanline_contrib(abs(2.0.xxx - dist2), scanline4_color,
+                ph, sigma_range, shape_range);
+        scanline_intensity += scanline1_contrib + scanline4_contrib;
+    }
+    else if(beam_num_scanlines > 2.5)
+    {
+        const float3 dist1or4 = lerp(
+            dist2 + 1.0.xxx, 2.0.xxx - dist2, dist_round);
+        const float3 scanline1or4_contrib = scanline_contrib(
+            dist1or4, scanline_outside_color, ph, sigma_range, shape_range);
+        scanline_intensity += scanline1or4_contrib;
+    }
+
+    //  Auto-dim the image to avoid clipping, encode if necessary, and output.
+    //  My original idea was to compute a minimal auto-dim factor and put it in
+    //  the alpha channel, but it wasn't working, at least not reliably.  This
+    //  is faster anyway, levels_autodim_temp = 0.5 isn't causing banding.
+    return encode_output(float4(scanline_intensity * levels_autodim_temp, 1.0));
+}
+