duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

ntsc-adaptive-lite.fx (15289B)


      1 #include "ReShade.fxh"
      2 
      3 // NTSC-Adaptive-Lite  -  Faster for 2-Phase games (only 15 taps!)
      4 // based on Themaister's NTSC shader
      5 
      6 
      7 uniform int quality <
      8     ui_type = "combo";
      9     ui_items = "Custom\0Svideo\0Composite\0RF\0";
     10     ui_label = "NTSC Preset";
     11 > = 2;
     12 
     13 uniform bool ntsc_fields <
     14     ui_type = "radio";
     15     ui_label = "NTSC Merge Fields";
     16 > = false;
     17 
     18 uniform int ntsc_phase <
     19     ui_type = "combo";
     20     ui_items = "Auto\0(2-Phase)\0(3-Phase)\0";
     21     ui_label = "NTSC Phase";
     22 > = 0;
     23 
     24 uniform float ntsc_scale <
     25     ui_type = "drag";
     26     ui_min = 0.20;
     27     ui_max = 3.0;
     28     ui_step = 0.05;
     29     ui_label = "NTSC Resolution Scaling";
     30 > = 1.0;
     31 
     32 uniform float ntsc_sat <
     33     ui_type = "drag";
     34     ui_min = 0.0;
     35     ui_max = 2.0;
     36     ui_step = 0.01;
     37     ui_label = "NTSC Color Saturation";
     38 > = 1.0;
     39 
     40 uniform float ntsc_bright <
     41     ui_type = "drag";
     42     ui_min = 0.0;
     43     ui_max = 1.5;
     44     ui_step = 0.01;
     45     ui_label = "NTSC Brightness";
     46 > = 1.0;
     47 
     48 uniform float cust_fringing <
     49     ui_type = "drag";
     50     ui_min = 0.0;
     51     ui_max = 5.0;
     52     ui_step = 0.1;
     53     ui_label = "NTSC Custom Fringing Value";
     54 > = 0.0;
     55 
     56 uniform float cust_artifacting <
     57     ui_type = "drag";
     58     ui_min = 0.0;
     59     ui_max = 5.0;
     60     ui_step = 0.1;
     61     ui_label = "NTSC Custom Artifacting Value";
     62 > = 0.0;
     63 
     64 uniform float chroma_scale <
     65     ui_type = "drag";
     66     ui_min = 0.2;
     67     ui_max = 4.0;
     68     ui_step = 0.1;
     69     ui_label = "NTSC Chroma Scaling";
     70 > = 1.0;
     71 
     72 uniform float ntsc_artifacting_rainbow <
     73     ui_type = "drag";
     74     ui_min = -1.0;
     75     ui_max = 1.0;
     76     ui_step = 0.1;
     77     ui_label = "NTSC Artifacting Rainbow Effect";
     78 > = 0.0;
     79 
     80 uniform bool linearize <
     81     ui_type = "radio";
     82     ui_label = "NTSC Linearize Output Gamma";
     83 > = false;
     84 
     85 
     86 uniform float  FrameCount < source = "framecount"; >;
     87 uniform float2 NormalizedNativePixelSize < source = "normalized_native_pixel_size"; >;
     88 uniform float  BufferWidth < source = "bufferwidth"; >;
     89 uniform float  BufferHeight < source = "bufferheight"; >;
     90 
     91 
     92 // RGB16f is the same as float_framebuffer.
     93 texture2D tNTSC_P0 < pooled = false; > {Width=BUFFER_WIDTH;Height=BUFFER_HEIGHT;Format=RGBA16f;};
     94 sampler2D sNTSC_P0{Texture=tNTSC_P0;AddressU=CLAMP;AddressV=CLAMP;AddressW=CLAMP;MagFilter=LINEAR;MinFilter=LINEAR;};
     95 
     96 #define PI 3.14159265
     97 #define OutputSize float2(BufferWidth,BufferHeight)
     98 
     99 struct ST_VertexOut
    100 {
    101     float2 pix_no          : TEXCOORD1;
    102     float  phase           : TEXCOORD2;
    103     float  BRIGHTNESS      : TEXCOORD3;
    104     float  SATURATION      : TEXCOORD4;
    105     float  FRINGING        : TEXCOORD5;
    106     float  ARTIFACTING     : TEXCOORD6;
    107     float  CHROMA_MOD_FREQ : TEXCOORD7;
    108     float  MERGE           : TEXCOORD8;
    109 };
    110 
    111 
    112 // Vertex shader generating a triangle covering the entire screen
    113 void VS_NTSC_ADAPTIVE_P0(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 TexCoord : TEXCOORD, out ST_VertexOut vVARS)
    114 {
    115     TexCoord.x = (id == 2) ? 2.0 : 0.0;
    116     TexCoord.y = (id == 1) ? 2.0 : 0.0;
    117     position = float4(TexCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
    118 
    119     float res = ntsc_scale;
    120     float OriginalSize = 1.0/NormalizedNativePixelSize.x;
    121     float2 SourceSize  = 1.0/NormalizedNativePixelSize;
    122 
    123     if (res < 1.0) vVARS.pix_no = TexCoord * SourceSize.xy * (res * OutputSize.xy / SourceSize.xy); else
    124                    vVARS.pix_no = TexCoord * SourceSize.xy * (      OutputSize.xy / SourceSize.xy);
    125     vVARS.phase = (ntsc_phase < 1) ? ((OriginalSize > 300.0) ? 2.0 : 3.0) : ((ntsc_phase > 2) ? 3.0 : 2.0);
    126     
    127     float Quality = float(quality-1);
    128 
    129     res = max(res, 1.0);    
    130     vVARS.CHROMA_MOD_FREQ = (vVARS.phase < 2.5) ? (4.0 * PI / 15.0) : (PI / 3.0);
    131     vVARS.ARTIFACTING = (Quality > -0.5) ? Quality * 0.5*(res+1.0) : cust_artifacting;
    132     vVARS.FRINGING = (Quality > -0.5) ? Quality : cust_fringing;
    133     vVARS.SATURATION = ntsc_sat;
    134     vVARS.BRIGHTNESS = ntsc_bright;    
    135     vVARS.pix_no.x = vVARS.pix_no.x * res;
    136 
    137     vVARS.MERGE = (Quality == 2.0 || vVARS.phase < 2.5) ? 0.0 : 1.0;
    138     vVARS.MERGE = (Quality == -1.0) ? float(ntsc_fields == true) : vVARS.MERGE;
    139 }
    140 
    141 #define mix_mat float3x3(vVARS.BRIGHTNESS, vVARS.FRINGING, vVARS.FRINGING, vVARS.ARTIFACTING, 2.0 * vVARS.SATURATION, 0.0, vVARS.ARTIFACTING, 0.0, 2.0 * vVARS.SATURATION)
    142 
    143 static const float3x3 yiq2rgb_mat = float3x3(
    144    1.0, 0.956, 0.6210,
    145    1.0, -0.2720, -0.6474,
    146    1.0, -1.1060, 1.7046);
    147 
    148 float3 yiq2rgb(float3 yiq)
    149 {
    150    return mul(yiq2rgb_mat, yiq);
    151 }
    152 
    153 static const float3x3 yiq_mat = float3x3(
    154       0.2989, 0.5870, 0.1140,
    155       0.5959, -0.2744, -0.3216,
    156       0.2115, -0.5229, 0.3114
    157 );
    158 
    159 float3 rgb2yiq(float3 col)
    160 {
    161    return mul(yiq_mat, col);
    162 }
    163 
    164 static const float3 Y = float3( 0.299,  0.587,  0.114);
    165 
    166 float df3(float3 a, float3 b, float3 c)
    167 {
    168     return dot(smoothstep(0.0, 0.56, 3.0*(b - a) * (b - c)), Y);
    169 }
    170 
    171 
    172 float4 PS_NTSC_ADAPTIVE_P0(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD, in ST_VertexOut vVARS) : SV_Target
    173 {
    174    float3 col = tex2D(ReShade::BackBuffer, vTexCoord).rgb;
    175    float3 yiq = rgb2yiq(col);
    176    float3 yiq2 = yiq;
    177 
    178     float4 SourceSize  = float4(1.0/NormalizedNativePixelSize, NormalizedNativePixelSize);
    179 
    180    float mod1 = 2.0;
    181    float mod2 = 3.0;
    182 
    183    float2 dx = float2(1.0, 0.0)*SourceSize.zw;
    184    float2 dy = float2(0.0, 1.0)*SourceSize.zw;
    185 
    186    float3 C = tex2D(ReShade::BackBuffer, vTexCoord    ).xyz;
    187    float3 L = tex2D(ReShade::BackBuffer, vTexCoord -dx).xyz;
    188    float3 R = tex2D(ReShade::BackBuffer, vTexCoord +dx).xyz;
    189    float3 U = tex2D(ReShade::BackBuffer, vTexCoord -dy).xyz;
    190    float3 D = tex2D(ReShade::BackBuffer, vTexCoord +dy).xyz;
    191    float3 UL = tex2D(ReShade::BackBuffer, vTexCoord -dx -dy).xyz;
    192    float3 UR = tex2D(ReShade::BackBuffer, vTexCoord +dx -dy).xyz;
    193    float3 DL = tex2D(ReShade::BackBuffer, vTexCoord -dx +dy).xyz;
    194    float3 DR = tex2D(ReShade::BackBuffer, vTexCoord +dx +dy).xyz;
    195 
    196    float hori = step(0.01,(df3(L, C, R) * df3(UL, U, UR) * df3(DL, D, DR)));
    197    float vert = 1.0 - step(0.01,(df3(U, C, D) * df3(UL, L, DL) * df3(UR, R, DR)));
    198 
    199    float blend = hori * vert * ntsc_artifacting_rainbow;
    200 
    201 if (vVARS.MERGE > 0.5)
    202 {
    203    float chroma_phase2 = (vVARS.phase < 2.5) ? PI * ((vVARS.pix_no.y % mod1) + ((FrameCount+1.) % 2.)) : 0.6667 * PI * ((vVARS.pix_no.y % mod2) + ((FrameCount+1.) % 2.));
    204    float mod_phase2 = (blend + 1.0) * chroma_phase2 + vVARS.pix_no.x * vVARS.CHROMA_MOD_FREQ;
    205    float i_mod2 = cos(mod_phase2);
    206    float q_mod2 = sin(mod_phase2);
    207    yiq2.yz *= float2(i_mod2, q_mod2); // Modulate.
    208    yiq2 = mul(mix_mat, yiq2); // Cross-talk.
    209    yiq2.yz *= float2(i_mod2, q_mod2); // Demodulate.   
    210 }
    211   
    212    float chroma_phase = (vVARS.phase < 2.5) ? PI * ((vVARS.pix_no.y % mod1) + ((FrameCount+1.) % 2.)) : 0.6667 * PI * ((vVARS.pix_no.y % mod2) + ((FrameCount+1.) % 2.));
    213    float mod_phase = (blend + 1.0) * chroma_phase + vVARS.pix_no.x * vVARS.CHROMA_MOD_FREQ;
    214 
    215 
    216    float i_mod = cos(mod_phase);
    217    float q_mod = sin(mod_phase);
    218 
    219    yiq.yz *= float2(i_mod, q_mod); // Modulate.
    220    yiq = mul(mix_mat, yiq); // Cross-talk.
    221    yiq.yz *= float2(i_mod, q_mod); // Demodulate.
    222       
    223    yiq = (vVARS.MERGE < 0.5) ? yiq : 0.5*(yiq+yiq2);
    224    
    225    return float4(yiq, 1.0);
    226 }
    227 
    228 
    229 // Vertex shader generating a triangle covering the entire screen
    230 void VS_NTSC_ADAPTIVE_P1(in uint id : SV_VertexID, out float4 position : SV_Position, out float2 TexCoord : TEXCOORD)
    231 {
    232     TexCoord.x = (id == 2) ? 2.0 : 0.0;
    233     TexCoord.y = (id == 1) ? 2.0 : 0.0;
    234     position = float4(TexCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0);
    235 }
    236 
    237 
    238 float3 fetch_offset(sampler2D Source, float2 tex, float offset, float2 one_x)
    239 {
    240    /* Insert chroma scaling. Thanks to guest.r ideas. */
    241 
    242    float3 yiq;
    243 
    244    yiq.x  = tex2D(Source, tex + float2((offset) * (one_x.x), 0.0)).x;
    245    yiq.yz = tex2D(Source, tex + float2((offset) * (one_x.y), 0.0)).yz;
    246 
    247    return yiq;
    248 
    249 /*  Old code
    250    return texture(Source, vTexCoord + float2((offset) * (one_x), 0.0)).xyz;
    251 */
    252 }
    253 
    254 /* These are accurate and normalized coeffs. */
    255 static const int TAPS_3_phase = 24;
    256 static const float luma_filter_3_phase[25] = {
    257 -0.0000120203033684164,
    258 -0.0000221465589348544,
    259 -0.0000131553320142694,
    260 -0.0000120203033684164,
    261 -0.0000499802614018372,
    262 -0.000113942875690297,
    263 -0.000122153082899506,
    264 -5.61214E-06,
    265 0.000170520303591422,
    266 0.000237204986579451,
    267 0.000169644281482376,
    268 0.000285695210375719,
    269 0.000984598849305758,
    270 0.0020187339488074,
    271 0.00200232553469184,
    272 -0.000909904964181485,
    273 -0.00704925890919635,
    274 -0.0132231937269633,
    275 -0.0126072491817548,
    276 0.00246092210875218,
    277 0.0358691302651096,
    278 0.0840185734607569,
    279 0.135566921437963,
    280 0.175265691355518,
    281 0.190181351796957};
    282 
    283 /* These are accurate and normalized coeffs. */
    284 static const float chroma_filter_3_phase[25] = {
    285 -0.000135741056915795,
    286 -0.000568115749081878,
    287 -0.00130605691082327,
    288 -0.00231369942971182,
    289 -0.00350569685928248,
    290 -0.00474731062446688,
    291 -0.00585980203774502,
    292 -0.00663114046295865,
    293 -0.00683148404964774,
    294 -0.00623234997205773,
    295 -0.00462792764511295,
    296 -0.00185665431957684,
    297 0.00217899013894782,
    298 0.00749647783836479,
    299 0.0140227874371299,
    300 0.021590863169257,
    301 0.0299437436530477,
    302 0.0387464461271303,
    303 0.0476049759842373,
    304 0.0560911497485196,
    305 0.0637713405314321,
    306 0.0702368383153846,
    307 0.0751333078160781,
    308 0.0781868487834974,
    309 0.0792244191487085};
    310 
    311 
    312 /* These are accurate and normalized coeffs. Though they don't produce ideal smooth vertical lines transparency. */
    313 static const int TAPS_2_phase = 15;
    314 static const float luma_filter_2_phase[16] = {
    315 0.00134372867555492,
    316 0.00294231678339247,
    317 0.00399617683765551,
    318 0.00303632635732925,
    319 -0.00110556727614119,
    320 -0.00839970341605087,
    321 -0.0169515379999301,
    322 -0.0229874881474188,
    323 -0.0217113019865528,
    324 -0.00889151239892142,
    325 0.0173269874254282,
    326 0.0550969075027442,
    327 0.098655909675851,
    328 0.139487291941771,
    329 0.168591277052964,
    330 0.17914037794465};
    331 
    332 
    333 /* These are accurate and normalized coeffs. */
    334 static const float chroma_filter_2_phase[16] = {
    335 0.00406084767413046,
    336 0.00578573638571078,
    337 0.00804447474387669,
    338 0.0109152541019797,
    339 0.0144533032717188,
    340 0.0186765858322351,
    341 0.0235518468184291,
    342 0.0289834149989225,
    343 0.034807373222651,
    344 0.0407934139180355,
    345 0.0466558344725586,
    346 0.0520737649339226,
    347 0.0567190701585739,
    348 0.0602887575746322,
    349 0.0625375226221969,
    350 0.0633055985408521};
    351 
    352 
    353 
    354 float4 PS_NTSC_ADAPTIVE_P1(float4 vpos: SV_Position, float2 vTexCoord : TEXCOORD) : SV_Target
    355 {
    356     float4 SourceSize  = float4(BufferWidth, 1.0/NormalizedNativePixelSize.y, 1.0/BufferWidth, NormalizedNativePixelSize.y);
    357 
    358    float res = ntsc_scale;
    359    float OriginalSize = 1.0/NormalizedNativePixelSize.x;
    360    float3 signal = float3(0.0, 0.0, 0.0);
    361    float phase = (ntsc_phase < 1) ? ((OriginalSize > 300.0) ? 2.0 : 3.0) : ((ntsc_phase > 1) ? 3.0 : 2.0);
    362 
    363    float chroma_scale = phase > 2.5 ? min(chroma_scale, 2.2) : chroma_scale/2.0;
    364    float2 one_x = (SourceSize.z / res) * float2(1.0, 1.0 / chroma_scale);
    365 
    366    float2 tex = vTexCoord;
    367 
    368    if(phase < 2.5)
    369    {
    370       float3 sums = fetch_offset(sNTSC_P0, tex, 0.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 0.0, one_x);
    371       signal += sums * float3(luma_filter_2_phase[0], chroma_filter_2_phase[0], chroma_filter_2_phase[0]);
    372       sums = fetch_offset(sNTSC_P0, tex, 1.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 1.0, one_x);
    373       signal += sums * float3(luma_filter_2_phase[1], chroma_filter_2_phase[1], chroma_filter_2_phase[1]);
    374       sums = fetch_offset(sNTSC_P0, tex, 2.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 2.0, one_x);
    375       signal += sums * float3(luma_filter_2_phase[2], chroma_filter_2_phase[2], chroma_filter_2_phase[2]);
    376       sums = fetch_offset(sNTSC_P0, tex, 3.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 3.0, one_x);
    377       signal += sums * float3(luma_filter_2_phase[3], chroma_filter_2_phase[3], chroma_filter_2_phase[3]);
    378       sums = fetch_offset(sNTSC_P0, tex, 4.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 4.0, one_x);
    379       signal += sums * float3(luma_filter_2_phase[4], chroma_filter_2_phase[4], chroma_filter_2_phase[4]);
    380       sums = fetch_offset(sNTSC_P0, tex, 5.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 5.0, one_x);
    381       signal += sums * float3(luma_filter_2_phase[5], chroma_filter_2_phase[5], chroma_filter_2_phase[5]);
    382       sums = fetch_offset(sNTSC_P0, tex, 6.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 6.0, one_x);
    383       signal += sums * float3(luma_filter_2_phase[6], chroma_filter_2_phase[6], chroma_filter_2_phase[6]);
    384       sums = fetch_offset(sNTSC_P0, tex, 7.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 7.0, one_x);
    385       signal += sums * float3(luma_filter_2_phase[7], chroma_filter_2_phase[7], chroma_filter_2_phase[7]);
    386       sums = fetch_offset(sNTSC_P0, tex, 8.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 8.0, one_x);
    387       signal += sums * float3(luma_filter_2_phase[8], chroma_filter_2_phase[8], chroma_filter_2_phase[8]);
    388       sums = fetch_offset(sNTSC_P0, tex, 9.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 9.0, one_x);
    389       signal += sums * float3(luma_filter_2_phase[9], chroma_filter_2_phase[9], chroma_filter_2_phase[9]);
    390       sums = fetch_offset(sNTSC_P0, tex, 10.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 10.0, one_x);
    391       signal += sums * float3(luma_filter_2_phase[10], chroma_filter_2_phase[10], chroma_filter_2_phase[10]);
    392       sums = fetch_offset(sNTSC_P0, tex, 11.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 11.0, one_x);
    393       signal += sums * float3(luma_filter_2_phase[11], chroma_filter_2_phase[11], chroma_filter_2_phase[11]);
    394       sums = fetch_offset(sNTSC_P0, tex, 12.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 12.0, one_x);
    395       signal += sums * float3(luma_filter_2_phase[12], chroma_filter_2_phase[12], chroma_filter_2_phase[12]);
    396       sums = fetch_offset(sNTSC_P0, tex, 13.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 13.0, one_x);
    397       signal += sums * float3(luma_filter_2_phase[13], chroma_filter_2_phase[13], chroma_filter_2_phase[13]);
    398       sums = fetch_offset(sNTSC_P0, tex, 14.0 - 15.0, one_x) + fetch_offset(sNTSC_P0, tex, 15.0 - 14.0, one_x);
    399       signal += sums * float3(luma_filter_2_phase[14], chroma_filter_2_phase[14], chroma_filter_2_phase[14]);
    400       
    401       signal += tex2D(sNTSC_P0, vTexCoord).xyz *
    402          float3(luma_filter_2_phase[TAPS_2_phase], chroma_filter_2_phase[TAPS_2_phase], chroma_filter_2_phase[TAPS_2_phase]);
    403    }
    404    else if(phase > 2.5)
    405    {
    406       for (int i = 0; i < TAPS_3_phase; i++)
    407       {
    408          float offset = float(i);
    409 
    410          float3 sums = fetch_offset(sNTSC_P0, tex, offset - float(TAPS_3_phase), one_x) +
    411             fetch_offset(sNTSC_P0, tex, float(TAPS_3_phase) - offset, one_x);
    412          signal += sums * float3(luma_filter_3_phase[i], chroma_filter_3_phase[i], chroma_filter_3_phase[i]);
    413       }
    414       signal += tex2D(sNTSC_P0, vTexCoord).xyz *
    415          float3(luma_filter_3_phase[TAPS_3_phase], chroma_filter_3_phase[TAPS_3_phase], chroma_filter_3_phase[TAPS_3_phase]);
    416    }
    417 
    418    float3 rgb = yiq2rgb(signal);
    419 
    420    if(linearize == false) return float4(rgb, 1.0);
    421    else return pow(float4(rgb, 1.0), float4(2.2, 2.2, 2.2, 2.2));
    422 }
    423 
    424 technique NTSC_ADAPTIVE
    425 {
    426     pass
    427     {
    428         VertexShader = VS_NTSC_ADAPTIVE_P0;
    429         PixelShader  = PS_NTSC_ADAPTIVE_P0;
    430         RenderTarget = tNTSC_P0;
    431     }
    432     pass
    433     {
    434         VertexShader = PostProcessVS;
    435         PixelShader  = PS_NTSC_ADAPTIVE_P1;
    436     }
    437 }