From 1d1296e7aeefdf774e2c8a473fe0415755acc060 Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Thu, 20 Nov 2025 02:44:36 +0100 Subject: [PATCH 1/8] shaders: add horizontal RGB subpixel-aware downscaling filter --- src/Backends/DRMBackend.cpp | 3 +- src/Backends/OpenVRBackend.cpp | 2 +- src/Backends/WaylandBackend.cpp | 2 +- src/main.cpp | 4 +- src/main.hpp | 18 +++--- src/rendervulkan.cpp | 98 +++++++++++++++++++++++++--- src/shaders/composite.h | 31 ++++++--- src/shaders/descriptor_set.h | 1 + src/shaders/subpixel_scaler.h | 110 ++++++++++++++++++++++++++++++++ 9 files changed, 237 insertions(+), 32 deletions(-) create mode 100644 src/shaders/subpixel_scaler.h diff --git a/src/Backends/DRMBackend.cpp b/src/Backends/DRMBackend.cpp index 1c7345ddcf..b77d38e8ad 100644 --- a/src/Backends/DRMBackend.cpp +++ b/src/Backends/DRMBackend.cpp @@ -3451,7 +3451,7 @@ namespace gamescope bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB) && !bLayer0ScreenSize; bool bNeedsFullComposite = false; bNeedsFullComposite |= cv_composite_force; @@ -4009,4 +4009,3 @@ int HackyDRMPresent( const FrameInfo_t *pFrameInfo, bool bAsync ) { return static_cast( GetBackend() )->Present( pFrameInfo, bAsync ); } - diff --git a/src/Backends/OpenVRBackend.cpp b/src/Backends/OpenVRBackend.cpp index 13e28e5438..8989f32233 100644 --- a/src/Backends/OpenVRBackend.cpp +++ b/src/Backends/OpenVRBackend.cpp @@ -1494,7 +1494,7 @@ namespace gamescope // TODO: Dedupe some of this composite check code between us and drm.cpp bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB) && !bLayer0ScreenSize; bNeedsFullComposite |= cv_composite_force; bNeedsFullComposite |= pFrameInfo->useFSRLayer0; diff --git a/src/Backends/WaylandBackend.cpp b/src/Backends/WaylandBackend.cpp index ab6d30c397..1bceafb072 100644 --- a/src/Backends/WaylandBackend.cpp +++ b/src/Backends/WaylandBackend.cpp @@ -1057,7 +1057,7 @@ namespace gamescope // TODO: Dedupe some of this composite check code between us and drm.cpp bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB) && !bLayer0ScreenSize; bNeedsFullComposite |= cv_composite_force; bNeedsFullComposite |= pFrameInfo->useFSRLayer0; diff --git a/src/main.cpp b/src/main.cpp index 1397028e7f..5d6ec6e1fb 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -172,7 +172,7 @@ const char usage[] = " -r, --nested-refresh game refresh rate (frames per second)\n" " -m, --max-scale maximum scale factor\n" " -S, --scaler upscaler type (auto, integer, fit, fill, stretch)\n" - " -F, --filter upscaler filter (linear, nearest, fsr, nis, pixel)\n" + " -F, --filter upscaler filter (linear, nearest, fsr, nis, pixel, subpixel_rgb)\n" " fsr => AMD FidelityFX™ Super Resolution 1.0\n" " nis => NVIDIA Image Scaling v1.0.3\n" " --sharpness, --fsr-sharpness upscaler sharpness from 0 (max) to 20 (min)\n" @@ -410,6 +410,8 @@ static enum GamescopeUpscaleFilter parse_upscaler_filter(const char *str) return GamescopeUpscaleFilter::NIS; } else if (strcmp(str, "pixel") == 0) { return GamescopeUpscaleFilter::PIXEL; + } else if (strcmp(str, "subpixel_rgb") == 0) { + return GamescopeUpscaleFilter::SUBPIXEL_RGB; } else { fprintf( stderr, "gamescope: invalid value for --filter\n" ); exit(1); diff --git a/src/main.hpp b/src/main.hpp index 2e6fb833af..ad28c9c1b5 100644 --- a/src/main.hpp +++ b/src/main.hpp @@ -29,15 +29,16 @@ extern bool g_bGrabbed; extern float g_mouseSensitivity; extern const char *g_sOutputName; -enum class GamescopeUpscaleFilter : uint32_t + enum class GamescopeUpscaleFilter : uint32_t { - LINEAR = 0, - NEAREST, - FSR, - NIS, - PIXEL, - - FROM_VIEW = 0xF, // internal + LINEAR = 0, + NEAREST, + FSR, + NIS, + PIXEL, + SUBPIXEL_RGB, + + FROM_VIEW = 0xF, // internal }; static constexpr bool DoesHardwareSupportUpscaleFilter( GamescopeUpscaleFilter eFilter ) @@ -72,4 +73,3 @@ extern int g_nXWaylandCount; extern uint32_t g_preferVendorID; extern uint32_t g_preferDeviceID; - diff --git a/src/rendervulkan.cpp b/src/rendervulkan.cpp index df013147e4..6c4401c182 100644 --- a/src/rendervulkan.cpp +++ b/src/rendervulkan.cpp @@ -3635,6 +3635,93 @@ float g_flInternalDisplayBrightnessNits = 500.0f; float g_flHDRItmSdrNits = 100.f; float g_flHDRItmTargetNits = 1000.f; +struct SubpixelFilterDefinition +{ + GamescopeUpscaleFilter eFilter; + vec2_t downscaleRatio; +}; + +static constexpr std::array g_SubpixelFilterDefinitions = {{ + { GamescopeUpscaleFilter::SUBPIXEL_RGB, { 3.0f, 3.0f } }, +}}; + +static const SubpixelFilterDefinition *FindSubpixelFilterDefinition( GamescopeUpscaleFilter eFilter ) +{ + for ( const auto &definition : g_SubpixelFilterDefinitions ) + { + if ( definition.eFilter == eFilter ) + return &definition; + } + + return nullptr; +} + +static GamescopeUpscaleFilter GetLayerShaderFilter( const FrameInfo_t::Layer_t &layer ) +{ + if ( layer.isScreenSize() || ( layer.filter == GamescopeUpscaleFilter::LINEAR && layer.viewConvertsToLinearAutomatically() ) ) + return GamescopeUpscaleFilter::FROM_VIEW; + + if ( layer.filter == GamescopeUpscaleFilter::SUBPIXEL_RGB ) + { + static int s_lastState = -1; // -1 unknown, 0 inactive, 1 active + + const auto *definition = FindSubpixelFilterDefinition( layer.filter ); + bool haveDefinition = definition != nullptr; + + float dimRatioX = 0.0f; + float dimRatioY = 0.0f; + if ( layer.tex ) + { + dimRatioX = layer.tex->width() / (float)std::max(1u, currentOutputWidth); + dimRatioY = layer.tex->height() / (float)std::max(1u, currentOutputHeight); + } + + auto scaleToRatio = []( float s ) -> float { + if ( s == 0.0f ) + return 0.0f; + return s >= 1.0f ? s : (1.0f / s); + }; + + float scaleRatioX = scaleToRatio( layer.scale.x ); + float scaleRatioY = scaleToRatio( layer.scale.y ); + + // Prefer dimensional ratio; fall back to scale-derived ratio. + float observedX = dimRatioX > 0.0f ? dimRatioX : scaleRatioX; + float observedY = dimRatioY > 0.0f ? dimRatioY : scaleRatioY; + + const float tolerance = 0.05f; + bool ratioOk = haveDefinition + && close_enough( observedX, definition->downscaleRatio.x, tolerance ) + && close_enough( observedY, definition->downscaleRatio.y, tolerance ); + + int state = ratioOk ? 1 : 0; + if ( state != s_lastState ) + { + if ( ratioOk ) + { + vk_log.infof( "Subpixel RGB filter active: scale=(%.3f, %.3f) ratio=(%.3f, %.3f) tex=%ux%u out=%ux%u target=(%.1f, %.1f)", + layer.scale.x, layer.scale.y, observedX, observedY, + layer.tex ? layer.tex->width() : 0, layer.tex ? layer.tex->height() : 0, + currentOutputWidth, currentOutputHeight, + definition ? definition->downscaleRatio.x : 0.0f, definition ? definition->downscaleRatio.y : 0.0f ); + } + else + { + vk_log.warnf( "Subpixel RGB filter disabled (ratio mismatch): scale=(%.3f, %.3f) ratio=(%.3f, %.3f) tex=%ux%u out=%ux%u target=(3.0, 3.0)", + layer.scale.x, layer.scale.y, observedX, observedY, + layer.tex ? layer.tex->width() : 0, layer.tex ? layer.tex->height() : 0, + currentOutputWidth, currentOutputHeight ); + } + s_lastState = state; + } + + if ( !haveDefinition ) + return GamescopeUpscaleFilter::LINEAR; + } + + return layer.filter; +} + #pragma pack(push, 1) struct BlitPushData_t { @@ -3664,10 +3751,7 @@ struct BlitPushData_t scale[i] = layer->scale; offset[i] = layer->offsetPixelCenter(); opacity[i] = layer->opacity; - if (layer->isScreenSize() || (layer->filter == GamescopeUpscaleFilter::LINEAR && layer->viewConvertsToLinearAutomatically())) - u_shaderFilter |= ((uint32_t)GamescopeUpscaleFilter::FROM_VIEW) << (i * 4); - else - u_shaderFilter |= ((uint32_t)layer->filter) << (i * 4); + u_shaderFilter |= ((uint32_t)GetLayerShaderFilter(*layer)) << (i * 4); u_alphaMode |= ((uint32_t)layer->eAlphaBlendingMode) << ( i * 4 ); @@ -3804,10 +3888,8 @@ struct RcasPushData_t { const FrameInfo_t::Layer_t *layer = &frameInfo->layers[i]; - if (i == 0 || layer->isScreenSize() || (layer->filter == GamescopeUpscaleFilter::LINEAR && layer->viewConvertsToLinearAutomatically())) - u_shaderFilter |= ((uint32_t)GamescopeUpscaleFilter::FROM_VIEW) << (i * 4); - else - u_shaderFilter |= ((uint32_t)layer->filter) << (i * 4); + GamescopeUpscaleFilter shaderFilter = i == 0 ? GamescopeUpscaleFilter::FROM_VIEW : GetLayerShaderFilter(*layer); + u_shaderFilter |= ((uint32_t)shaderFilter) << (i * 4); u_alphaMode |= ((uint32_t)layer->eAlphaBlendingMode) << ( i * 4 ); diff --git a/src/shaders/composite.h b/src/shaders/composite.h index 961379b0ab..5f1c85af6b 100644 --- a/src/shaders/composite.h +++ b/src/shaders/composite.h @@ -2,6 +2,7 @@ #include "shaderfilter.h" #include "alphamode.h" +#include "subpixel_scaler.h" vec4 sampleRegular(sampler2D tex, vec2 coord, uint colorspace) { vec4 color = textureLod(tex, coord, 0); @@ -148,7 +149,9 @@ vec4 sampleBilinear(sampler2D tex, vec2 coord, uint colorspace, bool unnormalize vec4 sampleLayerEx(sampler2D layerSampler, uint offsetLayerIdx, uint colorspaceLayerIdx, vec2 uv, bool unnormalized) { vec2 coord = ((uv + u_offset[offsetLayerIdx]) * u_scale[offsetLayerIdx]); - vec2 texSize = textureSize(layerSampler, 0); + vec2 unnormalizedCoord = coord; + ivec2 texSizeInt = textureSize(layerSampler, 0); + vec2 texSize = vec2(texSizeInt); if (coord.x < 0.0f || coord.y < 0.0f || coord.x >= texSize.x || coord.y >= texSize.y) { @@ -165,16 +168,24 @@ vec4 sampleLayerEx(sampler2D layerSampler, uint offsetLayerIdx, uint colorspaceL uint colorspace = get_layer_colorspace(colorspaceLayerIdx); vec4 color; - if (get_layer_shaderfilter(offsetLayerIdx) == filter_pixel) { - vec2 output_res = texSize / u_scale[offsetLayerIdx]; - vec2 extent = max((texSize / output_res), vec2(1.0 / 256.0)); - color = sampleBandLimited(layerSampler, coord, unnormalized ? vec2(1.0f) : texSize, unnormalized ? vec2(1.0f) : vec2(1.0f) / texSize, extent, colorspace, unnormalized); + bool sampled = false; + uint shaderFilter = get_layer_shaderfilter(offsetLayerIdx); + if (shaderFilter == filter_subpixel_rgb && unnormalized) { + sampled = try_sample_subpixel_filter(shaderFilter, layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); } - else if (get_layer_shaderfilter(offsetLayerIdx) == filter_linear_emulated) { - color = sampleBilinear(layerSampler, coord, colorspace, unnormalized); - } - else { - color = sampleRegular(layerSampler, coord, colorspace); + + if (!sampled) { + if (shaderFilter == filter_pixel) { + vec2 output_res = texSize / u_scale[offsetLayerIdx]; + vec2 extent = max((texSize / output_res), vec2(1.0 / 256.0)); + color = sampleBandLimited(layerSampler, coord, unnormalized ? vec2(1.0f) : texSize, unnormalized ? vec2(1.0f) : vec2(1.0f) / texSize, extent, colorspace, unnormalized); + } + else if (shaderFilter == filter_linear_emulated) { + color = sampleBilinear(layerSampler, coord, colorspace, unnormalized); + } + else { + color = sampleRegular(layerSampler, coord, colorspace); + } } // JoshA: AMDGPU applies 3x4 CTM like this, where A is 1.0, but it only affects .rgb. color.rgb = vec4(color.rgb, 1.0f) * u_ctm[colorspaceLayerIdx]; diff --git a/src/shaders/descriptor_set.h b/src/shaders/descriptor_set.h index f2b8527c89..5b6d51a7ca 100644 --- a/src/shaders/descriptor_set.h +++ b/src/shaders/descriptor_set.h @@ -21,6 +21,7 @@ const int filter_nearest = 1; const int filter_fsr = 2; const int filter_nis = 3; const int filter_pixel = 4; +const int filter_subpixel_rgb = 5; const int filter_from_view = 255; const int EOTF_Gamma22 = 0; diff --git a/src/shaders/subpixel_scaler.h b/src/shaders/subpixel_scaler.h new file mode 100644 index 0000000000..5556a2f10a --- /dev/null +++ b/src/shaders/subpixel_scaler.h @@ -0,0 +1,110 @@ +#ifndef SUBPIXEL_SCALER_H +#define SUBPIXEL_SCALER_H + +// Generic subpixel-aware downscale helpers. +// Current implementation targets RGB vertical stripe layouts with a fixed 3:1 ratio. + +const float kSubpixelRatioTolerance = 0.05f; +const vec2 kSubpixelRGBDownscale = vec2(3.0f, 3.0f); +const ivec2 kSubpixelRGBDownscaleInt = ivec2(3); +const float kSubpixelAlphaWeight = 1.0f / 49.0f; + +const vec3 kSubpixelRGBVerticalKernel[7][7] = vec3[7][7]( + vec3[7]( + vec3(-1.0526e-02f, 3.8514e-03f, 1.1707e-02f), + vec3(-5.1071e-02f, -5.4484e-03f, -1.5588e-02f), + vec3(-8.5119e-02f, -5.0128e-02f, -2.6335e-02f), + vec3(-6.1192e-02f, -8.9105e-02f, -5.8017e-02f), + vec3(-9.1344e-03f, -5.3422e-02f, -5.6514e-02f), + vec3( 3.5897e-03f, -6.5463e-03f, -3.8170e-02f), + vec3( 5.5279e-03f, 4.3577e-03f, -2.2234e-02f) + ), + vec3[7]( + vec3(-2.4249e-03f, -4.5839e-03f, -2.0525e-02f), + vec3( 2.6682e-02f, 1.0168e-02f, 2.2323e-02f), + vec3( 2.3826e-02f, 2.2712e-02f, -5.5703e-03f), + vec3( 2.4192e-02f, 2.5182e-02f, 4.2159e-02f), + vec3( 1.0538e-02f, 2.1549e-02f, 1.9901e-03f), + vec3( 1.9939e-03f, 1.0217e-02f, 3.5535e-02f), + vec3(-1.1452e-03f, -3.9492e-03f, -1.5025e-02f) + ), + vec3[7]( + vec3(-1.9276e-03f, -3.3225e-02f, -1.5280e-02f), + vec3( 1.1129e-01f, 1.9505e-02f, -4.1840e-02f), + vec3( 1.5322e-01f, 1.0794e-01f, 2.8163e-02f), + vec3( 1.1598e-01f, 1.6799e-01f, 1.2661e-01f), + vec3( 2.3158e-02f, 1.0714e-01f, 1.7142e-01f), + vec3(-2.1166e-02f, 1.9900e-02f, 1.0636e-01f), + vec3(-1.9158e-02f, -3.2187e-02f, -3.2487e-03f) + ), + vec3[7]( + vec3(-9.8290e-03f, -5.6365e-02f, -4.0351e-02f), + vec3( 1.7032e-01f, 2.1324e-02f, 5.7782e-03f), + vec3( 2.3965e-01f, 1.6300e-01f, 5.7683e-02f), + vec3( 1.8491e-01f, 2.5932e-01f, 1.6544e-01f), + vec3( 2.8468e-02f, 1.6246e-01f, 1.8679e-01f), + vec3(-4.0587e-02f, 2.2935e-02f, 1.5421e-01f), + vec3(-4.3002e-02f, -5.5869e-02f, 1.2911e-02f) + ), + vec3[7]( + vec3(-2.0463e-03f, -3.3211e-02f, -4.0655e-03f), + vec3( 1.1078e-01f, 1.9467e-02f, -3.8520e-02f), + vec3( 1.5392e-01f, 1.0804e-01f, 4.1863e-02f), + vec3( 1.1533e-01f, 1.6802e-01f, 9.2533e-02f), + vec3( 2.3271e-02f, 1.0719e-01f, 1.6531e-01f), + vec3(-2.1582e-02f, 1.9986e-02f, 1.0219e-01f), + vec3(-1.7926e-02f, -3.2075e-02f, 1.5415e-02f) + ), + vec3[7]( + vec3(-2.5467e-03f, -4.5877e-03f, -1.0176e-02f), + vec3( 2.6298e-02f, 1.0157e-02f, 2.1141e-02f), + vec3( 2.4420e-02f, 2.2687e-02f, 9.2225e-03f), + vec3( 2.3553e-02f, 2.5167e-02f, 1.2310e-02f), + vec3( 1.0961e-02f, 2.1636e-02f, 1.4885e-03f), + vec3( 1.5187e-03f, 1.0232e-02f, 3.3014e-02f), + vec3(-1.6591e-04f, -3.8778e-03f, -4.7787e-04f) + ), + vec3[7]( + vec3(-1.0415e-02f, 3.8308e-03f, 9.5637e-03f), + vec3(-5.1437e-02f, -5.4800e-03f, -2.3571e-02f), + vec3(-8.4871e-02f, -5.0085e-02f, -2.7867e-02f), + vec3(-6.2043e-02f, -8.9107e-02f, -4.9899e-02f), + vec3(-8.7220e-03f, -5.3422e-02f, -4.5404e-02f), + vec3( 3.4067e-03f, -6.5015e-03f, -3.7778e-02f), + vec3( 6.0927e-03f, 4.4328e-03f, -3.0901e-02f) + ) +); + +bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 coord, ivec2 texSize, vec2 scale, uint colorspace, out vec4 outColor) +{ + if (shaderFilter != filter_subpixel_rgb) + return false; + + if (any(greaterThan(abs(scale - kSubpixelRGBDownscale), vec2(kSubpixelRatioTolerance)))) + return false; + + ivec2 outputIndex = ivec2(floor((coord - vec2(0.5f)) / kSubpixelRGBDownscale)); + ivec2 start = outputIndex * kSubpixelRGBDownscaleInt - ivec2(2); + ivec2 maxCoord = texSize - ivec2(1); + + vec3 accum = vec3(0.0f); + float alpha = 0.0f; + + for (int ky = 0; ky < 7; ky++) { + int sy = clamp(start.y + ky, 0, maxCoord.y); + for (int kx = 0; kx < 7; kx++) { + int sx = clamp(start.x + kx, 0, maxCoord.x); + vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); + vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + + vec3 kernel = kSubpixelRGBVerticalKernel[ky][kx]; + accum += linearSample * kernel; + alpha += texel.a * kSubpixelAlphaWeight; + } + } + + outColor = vec4(accum, clamp(alpha, 0.0f, 1.0f)); + return true; +} + +#endif From 4f9f9f68ef6e8f3ba9702faa540c2ad91d4326c2 Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Fri, 21 Nov 2025 22:45:11 +0100 Subject: [PATCH 2/8] shaders: update horizontal RGB subpixel-aware downscaling filter weights --- src/shaders/subpixel_scaler.h | 104 +++++++++++++++++----------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/src/shaders/subpixel_scaler.h b/src/shaders/subpixel_scaler.h index 5556a2f10a..338bf5aa48 100644 --- a/src/shaders/subpixel_scaler.h +++ b/src/shaders/subpixel_scaler.h @@ -2,76 +2,76 @@ #define SUBPIXEL_SCALER_H // Generic subpixel-aware downscale helpers. -// Current implementation targets RGB vertical stripe layouts with a fixed 3:1 ratio. +// Current implementation targets a horizontal RGB subpixel-aware downscale filter with a fixed 3:1 ratio. const float kSubpixelRatioTolerance = 0.05f; const vec2 kSubpixelRGBDownscale = vec2(3.0f, 3.0f); const ivec2 kSubpixelRGBDownscaleInt = ivec2(3); const float kSubpixelAlphaWeight = 1.0f / 49.0f; -const vec3 kSubpixelRGBVerticalKernel[7][7] = vec3[7][7]( +const vec3 kSubpixelHorizontalRGBKernel[7][7] = vec3[7][7]( vec3[7]( - vec3(-1.0526e-02f, 3.8514e-03f, 1.1707e-02f), - vec3(-5.1071e-02f, -5.4484e-03f, -1.5588e-02f), - vec3(-8.5119e-02f, -5.0128e-02f, -2.6335e-02f), - vec3(-6.1192e-02f, -8.9105e-02f, -5.8017e-02f), - vec3(-9.1344e-03f, -5.3422e-02f, -5.6514e-02f), - vec3( 3.5897e-03f, -6.5463e-03f, -3.8170e-02f), - vec3( 5.5279e-03f, 4.3577e-03f, -2.2234e-02f) + vec3( 7.1068e-03f, -5.4910e-03f, -1.7302e-03f), + vec3(-4.9596e-02f, 1.4541e-03f, -8.1770e-03f), + vec3(-1.1168e-01f, -3.9356e-02f, 1.0656e-02f), + vec3(-3.7548e-02f, -8.4949e-02f, -6.5272e-02f), + vec3( 1.8490e-02f, -4.7630e-02f, -5.5001e-02f), + vec3(-2.2353e-03f, 2.1439e-03f, -4.2040e-02f), + vec3(-9.0465e-04f, -3.0190e-03f, -8.3075e-03f) ), vec3[7]( - vec3(-2.4249e-03f, -4.5839e-03f, -2.0525e-02f), - vec3( 2.6682e-02f, 1.0168e-02f, 2.2323e-02f), - vec3( 2.3826e-02f, 2.2712e-02f, -5.5703e-03f), - vec3( 2.4192e-02f, 2.5182e-02f, 4.2159e-02f), - vec3( 1.0538e-02f, 2.1549e-02f, 1.9901e-03f), - vec3( 1.9939e-03f, 1.0217e-02f, 3.5535e-02f), - vec3(-1.1452e-03f, -3.9492e-03f, -1.5025e-02f) + vec3(-6.2506e-03f, -3.5835e-03f, -8.5086e-03f), + vec3( 3.9232e-02f, 8.1267e-03f, 1.0763e-02f), + vec3( 1.3306e-02f, 2.3506e-02f, 3.9553e-03f), + vec3( 2.7662e-02f, 1.9066e-02f, 2.0329e-02f), + vec3(-5.6325e-03f, 2.6609e-02f, 2.3552e-02f), + vec3( 7.6947e-03f, 5.1926e-03f, 2.7644e-02f), + vec3(-1.1685e-02f, -3.7455e-03f, 2.3616e-03f) ), vec3[7]( - vec3(-1.9276e-03f, -3.3225e-02f, -1.5280e-02f), - vec3( 1.1129e-01f, 1.9505e-02f, -4.1840e-02f), - vec3( 1.5322e-01f, 1.0794e-01f, 2.8163e-02f), - vec3( 1.1598e-01f, 1.6799e-01f, 1.2661e-01f), - vec3( 2.3158e-02f, 1.0714e-01f, 1.7142e-01f), - vec3(-2.1166e-02f, 1.9900e-02f, 1.0636e-01f), - vec3(-1.9158e-02f, -3.2187e-02f, -3.2487e-03f) + vec3(-5.0055e-03f, -1.6617e-02f, -1.8024e-02f), + vec3( 8.8724e-02f, 2.8274e-02f, 2.3960e-04f), + vec3( 1.6728e-01f, 8.3065e-02f, 3.8955e-02f), + vec3( 8.9757e-02f, 1.4706e-01f, 1.0101e-01f), + vec3( 1.7448e-02f, 8.5608e-02f, 1.0663e-01f), + vec3(-1.6968e-02f, 2.8939e-02f, 6.7206e-02f), + vec3(-5.2292e-03f, -1.8133e-02f, 2.9039e-02f) ), vec3[7]( - vec3(-9.8290e-03f, -5.6365e-02f, -4.0351e-02f), - vec3( 1.7032e-01f, 2.1324e-02f, 5.7782e-03f), - vec3( 2.3965e-01f, 1.6300e-01f, 5.7683e-02f), - vec3( 1.8491e-01f, 2.5932e-01f, 1.6544e-01f), - vec3( 2.8468e-02f, 1.6246e-01f, 1.8679e-01f), - vec3(-4.0587e-02f, 2.2935e-02f, 1.5421e-01f), - vec3(-4.3002e-02f, -5.5869e-02f, 1.2911e-02f) + vec3(-6.1220e-02f, -4.9825e-02f, -5.5558e-02f), + vec3( 1.8421e-01f, 1.9944e-02f, -1.7329e-02f), + vec3( 2.6878e-01f, 1.4827e-01f, 3.4439e-02f), + vec3( 1.8127e-01f, 2.5076e-01f, 2.2284e-01f), + vec3(-6.3037e-03f, 1.6033e-01f, 1.7886e-01f), + vec3(-4.9562e-02f, 2.8671e-02f, 1.3686e-01f), + vec3(-1.5705e-02f, -5.4865e-02f, 5.6473e-03f) ), vec3[7]( - vec3(-2.0463e-03f, -3.3211e-02f, -4.0655e-03f), - vec3( 1.1078e-01f, 1.9467e-02f, -3.8520e-02f), - vec3( 1.5392e-01f, 1.0804e-01f, 4.1863e-02f), - vec3( 1.1533e-01f, 1.6802e-01f, 9.2533e-02f), - vec3( 2.3271e-02f, 1.0719e-01f, 1.6531e-01f), - vec3(-2.1582e-02f, 1.9986e-02f, 1.0219e-01f), - vec3(-1.7926e-02f, -3.2075e-02f, 1.5415e-02f) + vec3(-5.4844e-03f, -1.6043e-02f, -1.6392e-02f), + vec3( 8.8965e-02f, 2.8861e-02f, 2.0795e-03f), + vec3( 1.6636e-01f, 8.2952e-02f, 3.8352e-02f), + vec3( 8.7956e-02f, 1.4735e-01f, 1.0193e-01f), + vec3( 1.8432e-02f, 8.5476e-02f, 1.0605e-01f), + vec3(-1.5480e-02f, 2.9659e-02f, 6.8861e-02f), + vec3(-5.6236e-03f, -1.8234e-02f, 2.9642e-02f) ), vec3[7]( - vec3(-2.5467e-03f, -4.5877e-03f, -1.0176e-02f), - vec3( 2.6298e-02f, 1.0157e-02f, 2.1141e-02f), - vec3( 2.4420e-02f, 2.2687e-02f, 9.2225e-03f), - vec3( 2.3553e-02f, 2.5167e-02f, 1.2310e-02f), - vec3( 1.0961e-02f, 2.1636e-02f, 1.4885e-03f), - vec3( 1.5187e-03f, 1.0232e-02f, 3.3014e-02f), - vec3(-1.6591e-04f, -3.8778e-03f, -4.7787e-04f) + vec3(-6.7048e-03f, -3.8673e-03f, -1.0044e-02f), + vec3( 3.9388e-02f, 6.9483e-03f, 1.0357e-02f), + vec3( 1.3296e-02f, 2.3415e-02f, 4.4685e-03f), + vec3( 2.9055e-02f, 1.8918e-02f, 2.0069e-02f), + vec3(-6.1559e-03f, 2.5721e-02f, 2.3438e-02f), + vec3( 7.0322e-03f, 5.6895e-03f, 2.6485e-02f), + vec3(-1.2151e-02f, -4.4146e-03f, 1.3875e-03f) ), vec3[7]( - vec3(-1.0415e-02f, 3.8308e-03f, 9.5637e-03f), - vec3(-5.1437e-02f, -5.4800e-03f, -2.3571e-02f), - vec3(-8.4871e-02f, -5.0085e-02f, -2.7867e-02f), - vec3(-6.2043e-02f, -8.9107e-02f, -4.9899e-02f), - vec3(-8.7220e-03f, -5.3422e-02f, -4.5404e-02f), - vec3( 3.4067e-03f, -6.5015e-03f, -3.7778e-02f), - vec3( 6.0927e-03f, 4.4328e-03f, -3.0901e-02f) + vec3( 7.2912e-03f, -5.3521e-03f, -1.2737e-03f), + vec3(-4.9607e-02f, 1.6422e-03f, -7.8235e-03f), + vec3(-1.1154e-01f, -3.9406e-02f, 1.0291e-02f), + vec3(-3.7869e-02f, -8.5334e-02f, -6.5227e-02f), + vec3( 1.8729e-02f, -4.6851e-02f, -5.4638e-02f), + vec3(-1.2923e-03f, 1.7708e-03f, -4.1082e-02f), + vec3(-2.4411e-04f, -2.8019e-03f, -8.5312e-03f) ) ); @@ -97,7 +97,7 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); - vec3 kernel = kSubpixelRGBVerticalKernel[ky][kx]; + vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx]; accum += linearSample * kernel; alpha += texel.a * kSubpixelAlphaWeight; } From 4d91cde39d3204f24161ffe5b11f1d76a09534ae Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Fri, 21 Nov 2025 23:57:35 +0100 Subject: [PATCH 3/8] shaders: add RG/B subpixel-aware downscaling filter --- src/Backends/DRMBackend.cpp | 2 +- src/Backends/OpenVRBackend.cpp | 2 +- src/Backends/WaylandBackend.cpp | 2 +- src/main.cpp | 4 +- src/main.hpp | 15 +++--- src/rendervulkan.cpp | 39 +++++++------- src/shaders/descriptor_set.h | 1 + src/shaders/subpixel_scaler.h | 93 +++++++++++++++++++++++++++------ subprojects/glm | 1 + 9 files changed, 113 insertions(+), 46 deletions(-) create mode 160000 subprojects/glm diff --git a/src/Backends/DRMBackend.cpp b/src/Backends/DRMBackend.cpp index b77d38e8ad..c03195b194 100644 --- a/src/Backends/DRMBackend.cpp +++ b/src/Backends/DRMBackend.cpp @@ -3451,7 +3451,7 @@ namespace gamescope bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED) && !bLayer0ScreenSize; bool bNeedsFullComposite = false; bNeedsFullComposite |= cv_composite_force; diff --git a/src/Backends/OpenVRBackend.cpp b/src/Backends/OpenVRBackend.cpp index 8989f32233..23d3075827 100644 --- a/src/Backends/OpenVRBackend.cpp +++ b/src/Backends/OpenVRBackend.cpp @@ -1494,7 +1494,7 @@ namespace gamescope // TODO: Dedupe some of this composite check code between us and drm.cpp bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED) && !bLayer0ScreenSize; bNeedsFullComposite |= cv_composite_force; bNeedsFullComposite |= pFrameInfo->useFSRLayer0; diff --git a/src/Backends/WaylandBackend.cpp b/src/Backends/WaylandBackend.cpp index 1bceafb072..4f8e5aa166 100644 --- a/src/Backends/WaylandBackend.cpp +++ b/src/Backends/WaylandBackend.cpp @@ -1057,7 +1057,7 @@ namespace gamescope // TODO: Dedupe some of this composite check code between us and drm.cpp bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED) && !bLayer0ScreenSize; bNeedsFullComposite |= cv_composite_force; bNeedsFullComposite |= pFrameInfo->useFSRLayer0; diff --git a/src/main.cpp b/src/main.cpp index 5d6ec6e1fb..b2316ed120 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -172,7 +172,7 @@ const char usage[] = " -r, --nested-refresh game refresh rate (frames per second)\n" " -m, --max-scale maximum scale factor\n" " -S, --scaler upscaler type (auto, integer, fit, fill, stretch)\n" - " -F, --filter upscaler filter (linear, nearest, fsr, nis, pixel, subpixel_rgb)\n" + " -F, --filter upscaler filter (linear, nearest, fsr, nis, pixel, subpixel_rgb, subpixel_oled)\n" " fsr => AMD FidelityFX™ Super Resolution 1.0\n" " nis => NVIDIA Image Scaling v1.0.3\n" " --sharpness, --fsr-sharpness upscaler sharpness from 0 (max) to 20 (min)\n" @@ -412,6 +412,8 @@ static enum GamescopeUpscaleFilter parse_upscaler_filter(const char *str) return GamescopeUpscaleFilter::PIXEL; } else if (strcmp(str, "subpixel_rgb") == 0) { return GamescopeUpscaleFilter::SUBPIXEL_RGB; + } else if (strcmp(str, "subpixel_oled") == 0) { + return GamescopeUpscaleFilter::SUBPIXEL_OLED; } else { fprintf( stderr, "gamescope: invalid value for --filter\n" ); exit(1); diff --git a/src/main.hpp b/src/main.hpp index ad28c9c1b5..323d80d998 100644 --- a/src/main.hpp +++ b/src/main.hpp @@ -32,13 +32,14 @@ extern const char *g_sOutputName; enum class GamescopeUpscaleFilter : uint32_t { LINEAR = 0, - NEAREST, - FSR, - NIS, - PIXEL, - SUBPIXEL_RGB, - - FROM_VIEW = 0xF, // internal + NEAREST, + FSR, + NIS, + PIXEL, + SUBPIXEL_RGB, + SUBPIXEL_OLED, + + FROM_VIEW = 0xF, // internal }; static constexpr bool DoesHardwareSupportUpscaleFilter( GamescopeUpscaleFilter eFilter ) diff --git a/src/rendervulkan.cpp b/src/rendervulkan.cpp index 6c4401c182..09eedcc5ae 100644 --- a/src/rendervulkan.cpp +++ b/src/rendervulkan.cpp @@ -3639,10 +3639,12 @@ struct SubpixelFilterDefinition { GamescopeUpscaleFilter eFilter; vec2_t downscaleRatio; + const char *pName; }; -static constexpr std::array g_SubpixelFilterDefinitions = {{ - { GamescopeUpscaleFilter::SUBPIXEL_RGB, { 3.0f, 3.0f } }, +static constexpr std::array g_SubpixelFilterDefinitions = {{ + { GamescopeUpscaleFilter::SUBPIXEL_RGB, { 3.0f, 3.0f }, "horizontal RGB" }, + { GamescopeUpscaleFilter::SUBPIXEL_OLED, { 2.0f, 2.0f }, "RG/B OLED" }, }}; static const SubpixelFilterDefinition *FindSubpixelFilterDefinition( GamescopeUpscaleFilter eFilter ) @@ -3661,12 +3663,10 @@ static GamescopeUpscaleFilter GetLayerShaderFilter( const FrameInfo_t::Layer_t & if ( layer.isScreenSize() || ( layer.filter == GamescopeUpscaleFilter::LINEAR && layer.viewConvertsToLinearAutomatically() ) ) return GamescopeUpscaleFilter::FROM_VIEW; - if ( layer.filter == GamescopeUpscaleFilter::SUBPIXEL_RGB ) + if ( const auto *definition = FindSubpixelFilterDefinition( layer.filter ) ) { - static int s_lastState = -1; // -1 unknown, 0 inactive, 1 active - - const auto *definition = FindSubpixelFilterDefinition( layer.filter ); - bool haveDefinition = definition != nullptr; + static int s_lastState[ g_SubpixelFilterDefinitions.size() ] = { -1, -1 }; + size_t idx = definition - g_SubpixelFilterDefinitions.data(); float dimRatioX = 0.0f; float dimRatioY = 0.0f; @@ -3685,38 +3685,37 @@ static GamescopeUpscaleFilter GetLayerShaderFilter( const FrameInfo_t::Layer_t & float scaleRatioX = scaleToRatio( layer.scale.x ); float scaleRatioY = scaleToRatio( layer.scale.y ); - // Prefer dimensional ratio; fall back to scale-derived ratio. float observedX = dimRatioX > 0.0f ? dimRatioX : scaleRatioX; float observedY = dimRatioY > 0.0f ? dimRatioY : scaleRatioY; const float tolerance = 0.05f; - bool ratioOk = haveDefinition - && close_enough( observedX, definition->downscaleRatio.x, tolerance ) - && close_enough( observedY, definition->downscaleRatio.y, tolerance ); + bool ratioOk = + close_enough( observedX, definition->downscaleRatio.x, tolerance ) && + close_enough( observedY, definition->downscaleRatio.y, tolerance ); int state = ratioOk ? 1 : 0; - if ( state != s_lastState ) + if ( state != s_lastState[idx] ) { if ( ratioOk ) { - vk_log.infof( "Subpixel RGB filter active: scale=(%.3f, %.3f) ratio=(%.3f, %.3f) tex=%ux%u out=%ux%u target=(%.1f, %.1f)", + vk_log.infof( "Subpixel %s filter active: scale=(%.3f, %.3f) ratio=(%.3f, %.3f) tex=%ux%u out=%ux%u target=(%.1f, %.1f)", + definition->pName, layer.scale.x, layer.scale.y, observedX, observedY, layer.tex ? layer.tex->width() : 0, layer.tex ? layer.tex->height() : 0, currentOutputWidth, currentOutputHeight, - definition ? definition->downscaleRatio.x : 0.0f, definition ? definition->downscaleRatio.y : 0.0f ); + definition->downscaleRatio.x, definition->downscaleRatio.y ); } else { - vk_log.warnf( "Subpixel RGB filter disabled (ratio mismatch): scale=(%.3f, %.3f) ratio=(%.3f, %.3f) tex=%ux%u out=%ux%u target=(3.0, 3.0)", + vk_log.warnf( "Subpixel %s filter disabled (ratio mismatch): scale=(%.3f, %.3f) ratio=(%.3f, %.3f) tex=%ux%u out=%ux%u target=(%.1f, %.1f)", + definition->pName, layer.scale.x, layer.scale.y, observedX, observedY, layer.tex ? layer.tex->width() : 0, layer.tex ? layer.tex->height() : 0, - currentOutputWidth, currentOutputHeight ); + currentOutputWidth, currentOutputHeight, + definition->downscaleRatio.x, definition->downscaleRatio.y ); } - s_lastState = state; + s_lastState[idx] = state; } - - if ( !haveDefinition ) - return GamescopeUpscaleFilter::LINEAR; } return layer.filter; diff --git a/src/shaders/descriptor_set.h b/src/shaders/descriptor_set.h index 5b6d51a7ca..148db7aa5d 100644 --- a/src/shaders/descriptor_set.h +++ b/src/shaders/descriptor_set.h @@ -22,6 +22,7 @@ const int filter_fsr = 2; const int filter_nis = 3; const int filter_pixel = 4; const int filter_subpixel_rgb = 5; +const int filter_subpixel_oled = 6; const int filter_from_view = 255; const int EOTF_Gamma22 = 0; diff --git a/src/shaders/subpixel_scaler.h b/src/shaders/subpixel_scaler.h index 338bf5aa48..cc8c353531 100644 --- a/src/shaders/subpixel_scaler.h +++ b/src/shaders/subpixel_scaler.h @@ -2,12 +2,18 @@ #define SUBPIXEL_SCALER_H // Generic subpixel-aware downscale helpers. -// Current implementation targets a horizontal RGB subpixel-aware downscale filter with a fixed 3:1 ratio. +// Current implementations: +// - horizontal RGB subpixel-aware downscale filter with a fixed 3:1 ratio. +// - RG/B OLED subpixel-aware downscale filter with a fixed 2:1 ratio. const float kSubpixelRatioTolerance = 0.05f; const vec2 kSubpixelRGBDownscale = vec2(3.0f, 3.0f); const ivec2 kSubpixelRGBDownscaleInt = ivec2(3); -const float kSubpixelAlphaWeight = 1.0f / 49.0f; +const float kSubpixelRGBAlphaWeight = 1.0f / 49.0f; + +const vec2 kSubpixelOLEDDownscale = vec2(2.0f, 2.0f); +const ivec2 kSubpixelOLEDDownscaleInt = ivec2(2); +const float kSubpixelOLEDAlphaWeight = 1.0f / 25.0f; const vec3 kSubpixelHorizontalRGBKernel[7][7] = vec3[7][7]( vec3[7]( @@ -75,31 +81,88 @@ const vec3 kSubpixelHorizontalRGBKernel[7][7] = vec3[7][7]( ) ); +const vec3 kSubpixelOLEDKernel[5][5] = vec3[5][5]( + vec3[5]( + vec3( 8.8942e-03f, -3.2269e-03f, -4.4989e-04f), + vec3(-3.0907e-03f, 9.8704e-03f, -1.9181e-02f), + vec3(-1.3623e-01f, -1.7137e-02f, -8.9794e-03f), + vec3(-6.1139e-03f, -9.3100e-02f, -6.3452e-02f), + vec3( 1.2717e-02f, -1.0195e-02f, 6.3495e-04f) + ), + vec3[5]( + vec3(-1.4033e-02f, -1.0390e-02f, 5.3642e-03f), + vec3( 7.0923e-02f, -2.0851e-02f, 1.5784e-02f), + vec3( 1.5164e-01f, 9.3378e-02f, -2.6439e-02f), + vec3( 6.2676e-02f, 1.6882e-01f, -9.8268e-03f), + vec3(-1.4588e-02f, 5.1943e-02f, 1.3150e-02f) + ), + vec3[5]( + vec3(-1.3383e-01f, -1.8797e-02f, -3.9692e-02f), + vec3( 1.8129e-01f, -6.1458e-02f, 4.2175e-02f), + vec3( 5.6204e-01f, 1.6187e-01f, 1.4268e-01f), + vec3( 1.9378e-01f, 4.4632e-01f, 2.1392e-01f), + vec3(-1.2192e-01f, 8.3973e-02f, -8.6816e-03f) + ), + vec3[5]( + vec3(-1.5648e-02f, -1.0165e-02f, -6.0513e-02f), + vec3( 6.6628e-02f, -2.8706e-02f, -1.8292e-02f), + vec3( 1.5260e-01f, 9.4369e-02f, 2.9209e-01f), + vec3( 5.9264e-02f, 1.6689e-01f, 3.2808e-01f), + vec3(-1.6114e-02f, 4.3363e-02f, -8.8848e-02f) + ), + vec3[5]( + vec3( 1.1299e-02f, -1.3180e-03f, -3.0147e-02f), + vec3(-3.8403e-04f, 1.3891e-02f, 3.9877e-02f), + vec3(-1.2776e-01f, -1.7060e-02f, 9.1108e-02f), + vec3(-4.5907e-03f, -7.7945e-02f, 1.5202e-01f), + vec3( 1.4749e-02f, -4.2571e-03f, -8.5528e-03f) + ) +); + bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 coord, ivec2 texSize, vec2 scale, uint colorspace, out vec4 outColor) { - if (shaderFilter != filter_subpixel_rgb) + if (shaderFilter != filter_subpixel_rgb && shaderFilter != filter_subpixel_oled) return false; - if (any(greaterThan(abs(scale - kSubpixelRGBDownscale), vec2(kSubpixelRatioTolerance)))) + vec2 ratio = shaderFilter == filter_subpixel_oled ? kSubpixelOLEDDownscale : kSubpixelRGBDownscale; + ivec2 ratioInt = shaderFilter == filter_subpixel_oled ? kSubpixelOLEDDownscaleInt : kSubpixelRGBDownscaleInt; + float alphaWeight = shaderFilter == filter_subpixel_oled ? kSubpixelOLEDAlphaWeight : kSubpixelRGBAlphaWeight; + + if (any(greaterThan(abs(scale - ratio), vec2(kSubpixelRatioTolerance)))) return false; - ivec2 outputIndex = ivec2(floor((coord - vec2(0.5f)) / kSubpixelRGBDownscale)); - ivec2 start = outputIndex * kSubpixelRGBDownscaleInt - ivec2(2); + ivec2 outputIndex = ivec2(floor((coord - vec2(0.5f)) / ratio)); + ivec2 start = outputIndex * ratioInt - ivec2(2); ivec2 maxCoord = texSize - ivec2(1); vec3 accum = vec3(0.0f); float alpha = 0.0f; - for (int ky = 0; ky < 7; ky++) { - int sy = clamp(start.y + ky, 0, maxCoord.y); - for (int kx = 0; kx < 7; kx++) { - int sx = clamp(start.x + kx, 0, maxCoord.x); - vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + if (shaderFilter == filter_subpixel_oled) { + for (int ky = 0; ky < 5; ky++) { + int sy = clamp(start.y + ky, 0, maxCoord.y); + for (int kx = 0; kx < 5; kx++) { + int sx = clamp(start.x + kx, 0, maxCoord.x); + vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); + vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + + vec3 kernel = kSubpixelOLEDKernel[ky][kx]; + accum += linearSample * kernel; + alpha += texel.a * alphaWeight; + } + } + } else { + for (int ky = 0; ky < 7; ky++) { + int sy = clamp(start.y + ky, 0, maxCoord.y); + for (int kx = 0; kx < 7; kx++) { + int sx = clamp(start.x + kx, 0, maxCoord.x); + vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); + vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); - vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx]; - accum += linearSample * kernel; - alpha += texel.a * kSubpixelAlphaWeight; + vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx]; + accum += linearSample * kernel; + alpha += texel.a * alphaWeight; + } } } diff --git a/subprojects/glm b/subprojects/glm new file mode 160000 index 0000000000..0af55ccecd --- /dev/null +++ b/subprojects/glm @@ -0,0 +1 @@ +Subproject commit 0af55ccecd98d4e5a8d1fad7de25ba429d60e863 From ea5576c6d9afcac21080ffe235734690948575bf Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Mon, 24 Nov 2025 03:31:27 +0100 Subject: [PATCH 4/8] shaders: fix subpixel-aware downscaling filters --- src/shaders/composite.h | 2 +- src/shaders/subpixel_scaler.h | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/shaders/composite.h b/src/shaders/composite.h index 5f1c85af6b..4c317dcfa2 100644 --- a/src/shaders/composite.h +++ b/src/shaders/composite.h @@ -170,7 +170,7 @@ vec4 sampleLayerEx(sampler2D layerSampler, uint offsetLayerIdx, uint colorspaceL vec4 color; bool sampled = false; uint shaderFilter = get_layer_shaderfilter(offsetLayerIdx); - if (shaderFilter == filter_subpixel_rgb && unnormalized) { + if ((shaderFilter == filter_subpixel_rgb || shaderFilter == filter_subpixel_oled) && unnormalized) { sampled = try_sample_subpixel_filter(shaderFilter, layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); } diff --git a/src/shaders/subpixel_scaler.h b/src/shaders/subpixel_scaler.h index cc8c353531..8da9a802f7 100644 --- a/src/shaders/subpixel_scaler.h +++ b/src/shaders/subpixel_scaler.h @@ -119,6 +119,14 @@ const vec3 kSubpixelOLEDKernel[5][5] = vec3[5][5]( ) ); +// Explicit sRGB decode for subpixel sampling when source is sRGB/linear. +// Avoids depending on colorspace state for OLED path. +vec3 SubpixelSRGBToLinear(vec3 c) +{ + // If the source is already linear, skip decode. + return c; +} + bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 coord, ivec2 texSize, vec2 scale, uint colorspace, out vec4 outColor) { if (shaderFilter != filter_subpixel_rgb && shaderFilter != filter_subpixel_oled) @@ -144,7 +152,7 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 for (int kx = 0; kx < 5; kx++) { int sx = clamp(start.x + kx, 0, maxCoord.x); vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + vec3 linearSample = SubpixelSRGBToLinear(texel.rgb); vec3 kernel = kSubpixelOLEDKernel[ky][kx]; accum += linearSample * kernel; @@ -156,10 +164,17 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 int sy = clamp(start.y + ky, 0, maxCoord.y); for (int kx = 0; kx < 7; kx++) { int sx = clamp(start.x + kx, 0, maxCoord.x); + // Shift green footprint by +2 taps horizontally (tuned for RGB path). + int sxG = clamp(start.x + 2 + kx, 0, maxCoord.x); + vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); + vec4 texelG = texelFetch(layerSampler, ivec2(sxG, sy), 0); + vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + linearSample.g = colorspace_plane_degamma_tf(texelG.rgb, colorspace).g; - vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx]; + // Apply kernel with channel permutation r->r, g->b, b->g. + vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx].rbg; accum += linearSample * kernel; alpha += texel.a * alphaWeight; } From 8351cb10c65fe277b110109a7be38b44ae4de7d3 Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Mon, 24 Nov 2025 03:35:01 +0100 Subject: [PATCH 5/8] submodules: drop stray glm gitlink --- subprojects/glm | 1 - 1 file changed, 1 deletion(-) delete mode 160000 subprojects/glm diff --git a/subprojects/glm b/subprojects/glm deleted file mode 160000 index 0af55ccecd..0000000000 --- a/subprojects/glm +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 0af55ccecd98d4e5a8d1fad7de25ba429d60e863 From e5b69c4452e5aed01906489c741b91824b4f0f26 Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Mon, 24 Nov 2025 05:58:39 +0100 Subject: [PATCH 6/8] shaders: fix subpixel-aware downscaling filters kernels with aligned vec4 storage --- src/shaders/subpixel_scaler.h | 197 ++++++++++++++++------------------ 1 file changed, 91 insertions(+), 106 deletions(-) diff --git a/src/shaders/subpixel_scaler.h b/src/shaders/subpixel_scaler.h index 8da9a802f7..37c3954d58 100644 --- a/src/shaders/subpixel_scaler.h +++ b/src/shaders/subpixel_scaler.h @@ -15,118 +15,110 @@ const vec2 kSubpixelOLEDDownscale = vec2(2.0f, 2.0f); const ivec2 kSubpixelOLEDDownscaleInt = ivec2(2); const float kSubpixelOLEDAlphaWeight = 1.0f / 25.0f; -const vec3 kSubpixelHorizontalRGBKernel[7][7] = vec3[7][7]( - vec3[7]( - vec3( 7.1068e-03f, -5.4910e-03f, -1.7302e-03f), - vec3(-4.9596e-02f, 1.4541e-03f, -8.1770e-03f), - vec3(-1.1168e-01f, -3.9356e-02f, 1.0656e-02f), - vec3(-3.7548e-02f, -8.4949e-02f, -6.5272e-02f), - vec3( 1.8490e-02f, -4.7630e-02f, -5.5001e-02f), - vec3(-2.2353e-03f, 2.1439e-03f, -4.2040e-02f), - vec3(-9.0465e-04f, -3.0190e-03f, -8.3075e-03f) +const vec4 kSubpixelHorizontalRGBKernel[7][7] = vec4[7][7]( + vec4[7]( + vec4( 7.1068e-03f, -5.4910e-03f, -1.7302e-03f, 0.0f), + vec4(-4.9596e-02f, 1.4541e-03f, -8.1770e-03f, 0.0f), + vec4(-1.1168e-01f, -3.9356e-02f, 1.0656e-02f, 0.0f), + vec4(-3.7548e-02f, -8.4949e-02f, -6.5272e-02f, 0.0f), + vec4( 1.8490e-02f, -4.7630e-02f, -5.5001e-02f, 0.0f), + vec4(-2.2353e-03f, 2.1439e-03f, -4.2040e-02f, 0.0f), + vec4(-9.0465e-04f, -3.0190e-03f, -8.3075e-03f, 0.0f) ), - vec3[7]( - vec3(-6.2506e-03f, -3.5835e-03f, -8.5086e-03f), - vec3( 3.9232e-02f, 8.1267e-03f, 1.0763e-02f), - vec3( 1.3306e-02f, 2.3506e-02f, 3.9553e-03f), - vec3( 2.7662e-02f, 1.9066e-02f, 2.0329e-02f), - vec3(-5.6325e-03f, 2.6609e-02f, 2.3552e-02f), - vec3( 7.6947e-03f, 5.1926e-03f, 2.7644e-02f), - vec3(-1.1685e-02f, -3.7455e-03f, 2.3616e-03f) + vec4[7]( + vec4(-6.2506e-03f, -3.5835e-03f, -8.5086e-03f, 0.0f), + vec4( 3.9232e-02f, 8.1267e-03f, 1.0763e-02f, 0.0f), + vec4( 1.3306e-02f, 2.3506e-02f, 3.9553e-03f, 0.0f), + vec4( 2.7662e-02f, 1.9066e-02f, 2.0329e-02f, 0.0f), + vec4(-5.6325e-03f, 2.6609e-02f, 2.3552e-02f, 0.0f), + vec4( 7.6947e-03f, 5.1926e-03f, 2.7644e-02f, 0.0f), + vec4(-1.1685e-02f, -3.7455e-03f, 2.3616e-03f, 0.0f) ), - vec3[7]( - vec3(-5.0055e-03f, -1.6617e-02f, -1.8024e-02f), - vec3( 8.8724e-02f, 2.8274e-02f, 2.3960e-04f), - vec3( 1.6728e-01f, 8.3065e-02f, 3.8955e-02f), - vec3( 8.9757e-02f, 1.4706e-01f, 1.0101e-01f), - vec3( 1.7448e-02f, 8.5608e-02f, 1.0663e-01f), - vec3(-1.6968e-02f, 2.8939e-02f, 6.7206e-02f), - vec3(-5.2292e-03f, -1.8133e-02f, 2.9039e-02f) + vec4[7]( + vec4(-5.0055e-03f, -1.6617e-02f, -1.8024e-02f, 0.0f), + vec4( 8.8724e-02f, 2.8274e-02f, 2.3960e-04f, 0.0f), + vec4( 1.6728e-01f, 8.3065e-02f, 3.8955e-02f, 0.0f), + vec4( 8.9757e-02f, 1.4706e-01f, 1.0101e-01f, 0.0f), + vec4( 1.7448e-02f, 8.5608e-02f, 1.0663e-01f, 0.0f), + vec4(-1.6968e-02f, 2.8939e-02f, 6.7206e-02f, 0.0f), + vec4(-5.2292e-03f, -1.8133e-02f, 2.9039e-02f, 0.0f) ), - vec3[7]( - vec3(-6.1220e-02f, -4.9825e-02f, -5.5558e-02f), - vec3( 1.8421e-01f, 1.9944e-02f, -1.7329e-02f), - vec3( 2.6878e-01f, 1.4827e-01f, 3.4439e-02f), - vec3( 1.8127e-01f, 2.5076e-01f, 2.2284e-01f), - vec3(-6.3037e-03f, 1.6033e-01f, 1.7886e-01f), - vec3(-4.9562e-02f, 2.8671e-02f, 1.3686e-01f), - vec3(-1.5705e-02f, -5.4865e-02f, 5.6473e-03f) + vec4[7]( + vec4(-6.1220e-02f, -4.9825e-02f, -5.5558e-02f, 0.0f), + vec4( 1.8421e-01f, 1.9944e-02f, -1.7329e-02f, 0.0f), + vec4( 2.6878e-01f, 1.4827e-01f, 3.4439e-02f, 0.0f), + vec4( 1.8127e-01f, 2.5076e-01f, 2.2284e-01f, 0.0f), + vec4(-6.3037e-03f, 1.6033e-01f, 1.7886e-01f, 0.0f), + vec4(-4.9562e-02f, 2.8671e-02f, 1.3686e-01f, 0.0f), + vec4(-1.5705e-02f, -5.4865e-02f, 5.6473e-03f, 0.0f) ), - vec3[7]( - vec3(-5.4844e-03f, -1.6043e-02f, -1.6392e-02f), - vec3( 8.8965e-02f, 2.8861e-02f, 2.0795e-03f), - vec3( 1.6636e-01f, 8.2952e-02f, 3.8352e-02f), - vec3( 8.7956e-02f, 1.4735e-01f, 1.0193e-01f), - vec3( 1.8432e-02f, 8.5476e-02f, 1.0605e-01f), - vec3(-1.5480e-02f, 2.9659e-02f, 6.8861e-02f), - vec3(-5.6236e-03f, -1.8234e-02f, 2.9642e-02f) + vec4[7]( + vec4(-5.4844e-03f, -1.6043e-02f, -1.6392e-02f, 0.0f), + vec4( 8.8965e-02f, 2.8861e-02f, 2.0795e-03f, 0.0f), + vec4( 1.6636e-01f, 8.2952e-02f, 3.8352e-02f, 0.0f), + vec4( 8.7956e-02f, 1.4735e-01f, 1.0193e-01f, 0.0f), + vec4( 1.8432e-02f, 8.5476e-02f, 1.0605e-01f, 0.0f), + vec4(-1.5480e-02f, 2.9659e-02f, 6.8861e-02f, 0.0f), + vec4(-5.6236e-03f, -1.8234e-02f, 2.9642e-02f, 0.0f) ), - vec3[7]( - vec3(-6.7048e-03f, -3.8673e-03f, -1.0044e-02f), - vec3( 3.9388e-02f, 6.9483e-03f, 1.0357e-02f), - vec3( 1.3296e-02f, 2.3415e-02f, 4.4685e-03f), - vec3( 2.9055e-02f, 1.8918e-02f, 2.0069e-02f), - vec3(-6.1559e-03f, 2.5721e-02f, 2.3438e-02f), - vec3( 7.0322e-03f, 5.6895e-03f, 2.6485e-02f), - vec3(-1.2151e-02f, -4.4146e-03f, 1.3875e-03f) + vec4[7]( + vec4(-6.7048e-03f, -3.8673e-03f, -1.0044e-02f, 0.0f), + vec4( 3.9388e-02f, 6.9483e-03f, 1.0357e-02f, 0.0f), + vec4( 1.3296e-02f, 2.3415e-02f, 4.4685e-03f, 0.0f), + vec4( 2.9055e-02f, 1.8918e-02f, 2.0069e-02f, 0.0f), + vec4(-6.1559e-03f, 2.5721e-02f, 2.3438e-02f, 0.0f), + vec4( 7.0322e-03f, 5.6895e-03f, 2.6485e-02f, 0.0f), + vec4(-1.2151e-02f, -4.4146e-03f, 1.3875e-03f, 0.0f) ), - vec3[7]( - vec3( 7.2912e-03f, -5.3521e-03f, -1.2737e-03f), - vec3(-4.9607e-02f, 1.6422e-03f, -7.8235e-03f), - vec3(-1.1154e-01f, -3.9406e-02f, 1.0291e-02f), - vec3(-3.7869e-02f, -8.5334e-02f, -6.5227e-02f), - vec3( 1.8729e-02f, -4.6851e-02f, -5.4638e-02f), - vec3(-1.2923e-03f, 1.7708e-03f, -4.1082e-02f), - vec3(-2.4411e-04f, -2.8019e-03f, -8.5312e-03f) + vec4[7]( + vec4( 7.2912e-03f, -5.3521e-03f, -1.2737e-03f, 0.0f), + vec4(-4.9607e-02f, 1.6422e-03f, -7.8235e-03f, 0.0f), + vec4(-1.1154e-01f, -3.9406e-02f, 1.0291e-02f, 0.0f), + vec4(-3.7869e-02f, -8.5334e-02f, -6.5227e-02f, 0.0f), + vec4( 1.8729e-02f, -4.6851e-02f, -5.4638e-02f, 0.0f), + vec4(-1.2923e-03f, 1.7708e-03f, -4.1082e-02f, 0.0f), + vec4(-2.4411e-04f, -2.8019e-03f, -8.5312e-03f, 0.0f) ) ); -const vec3 kSubpixelOLEDKernel[5][5] = vec3[5][5]( - vec3[5]( - vec3( 8.8942e-03f, -3.2269e-03f, -4.4989e-04f), - vec3(-3.0907e-03f, 9.8704e-03f, -1.9181e-02f), - vec3(-1.3623e-01f, -1.7137e-02f, -8.9794e-03f), - vec3(-6.1139e-03f, -9.3100e-02f, -6.3452e-02f), - vec3( 1.2717e-02f, -1.0195e-02f, 6.3495e-04f) +const vec4 kSubpixelOLEDKernel[5][5] = vec4[5][5]( + vec4[5]( + vec4( 8.8942e-03f, -3.2269e-03f, -4.4989e-04f, 0.0f), + vec4(-3.0907e-03f, 9.8704e-03f, -1.9181e-02f, 0.0f), + vec4(-1.3623e-01f, -1.7137e-02f, -8.9794e-03f, 0.0f), + vec4(-6.1139e-03f, -9.3100e-02f, -6.3452e-02f, 0.0f), + vec4( 1.2717e-02f, -1.0195e-02f, 6.3495e-04f, 0.0f) ), - vec3[5]( - vec3(-1.4033e-02f, -1.0390e-02f, 5.3642e-03f), - vec3( 7.0923e-02f, -2.0851e-02f, 1.5784e-02f), - vec3( 1.5164e-01f, 9.3378e-02f, -2.6439e-02f), - vec3( 6.2676e-02f, 1.6882e-01f, -9.8268e-03f), - vec3(-1.4588e-02f, 5.1943e-02f, 1.3150e-02f) + vec4[5]( + vec4(-1.4033e-02f, -1.0390e-02f, 5.3642e-03f, 0.0f), + vec4( 7.0923e-02f, -2.0851e-02f, 1.5784e-02f, 0.0f), + vec4( 1.5164e-01f, 9.3378e-02f, -2.6439e-02f, 0.0f), + vec4( 6.2676e-02f, 1.6882e-01f, -9.8268e-03f, 0.0f), + vec4(-1.4588e-02f, 5.1943e-02f, 1.3150e-02f, 0.0f) ), - vec3[5]( - vec3(-1.3383e-01f, -1.8797e-02f, -3.9692e-02f), - vec3( 1.8129e-01f, -6.1458e-02f, 4.2175e-02f), - vec3( 5.6204e-01f, 1.6187e-01f, 1.4268e-01f), - vec3( 1.9378e-01f, 4.4632e-01f, 2.1392e-01f), - vec3(-1.2192e-01f, 8.3973e-02f, -8.6816e-03f) + vec4[5]( + vec4(-1.3383e-01f, -1.8797e-02f, -3.9692e-02f, 0.0f), + vec4( 1.8129e-01f, -6.1458e-02f, 4.2175e-02f, 0.0f), + vec4( 5.6204e-01f, 1.6187e-01f, 1.4268e-01f, 0.0f), + vec4( 1.9378e-01f, 4.4632e-01f, 2.1392e-01f, 0.0f), + vec4(-1.2192e-01f, 8.3973e-02f, -8.6816e-03f, 0.0f) ), - vec3[5]( - vec3(-1.5648e-02f, -1.0165e-02f, -6.0513e-02f), - vec3( 6.6628e-02f, -2.8706e-02f, -1.8292e-02f), - vec3( 1.5260e-01f, 9.4369e-02f, 2.9209e-01f), - vec3( 5.9264e-02f, 1.6689e-01f, 3.2808e-01f), - vec3(-1.6114e-02f, 4.3363e-02f, -8.8848e-02f) + vec4[5]( + vec4(-1.5648e-02f, -1.0165e-02f, -6.0513e-02f, 0.0f), + vec4( 6.6628e-02f, -2.8706e-02f, -1.8292e-02f, 0.0f), + vec4( 1.5260e-01f, 9.4369e-02f, 2.9209e-01f, 0.0f), + vec4( 5.9264e-02f, 1.6689e-01f, 3.2808e-01f, 0.0f), + vec4(-1.6114e-02f, 4.3363e-02f, -8.8848e-02f, 0.0f) ), - vec3[5]( - vec3( 1.1299e-02f, -1.3180e-03f, -3.0147e-02f), - vec3(-3.8403e-04f, 1.3891e-02f, 3.9877e-02f), - vec3(-1.2776e-01f, -1.7060e-02f, 9.1108e-02f), - vec3(-4.5907e-03f, -7.7945e-02f, 1.5202e-01f), - vec3( 1.4749e-02f, -4.2571e-03f, -8.5528e-03f) + vec4[5]( + vec4( 1.1299e-02f, -1.3180e-03f, -3.0147e-02f, 0.0f), + vec4(-3.8403e-04f, 1.3891e-02f, 3.9877e-02f, 0.0f), + vec4(-1.2776e-01f, -1.7060e-02f, 9.1108e-02f, 0.0f), + vec4(-4.5907e-03f, -7.7945e-02f, 1.5202e-01f, 0.0f), + vec4( 1.4749e-02f, -4.2571e-03f, -8.5528e-03f, 0.0f) ) ); -// Explicit sRGB decode for subpixel sampling when source is sRGB/linear. -// Avoids depending on colorspace state for OLED path. -vec3 SubpixelSRGBToLinear(vec3 c) -{ - // If the source is already linear, skip decode. - return c; -} - bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 coord, ivec2 texSize, vec2 scale, uint colorspace, out vec4 outColor) { if (shaderFilter != filter_subpixel_rgb && shaderFilter != filter_subpixel_oled) @@ -152,9 +144,9 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 for (int kx = 0; kx < 5; kx++) { int sx = clamp(start.x + kx, 0, maxCoord.x); vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec3 linearSample = SubpixelSRGBToLinear(texel.rgb); + vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + vec3 kernel = kSubpixelOLEDKernel[ky][kx].rgb; - vec3 kernel = kSubpixelOLEDKernel[ky][kx]; accum += linearSample * kernel; alpha += texel.a * alphaWeight; } @@ -164,17 +156,10 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 int sy = clamp(start.y + ky, 0, maxCoord.y); for (int kx = 0; kx < 7; kx++) { int sx = clamp(start.x + kx, 0, maxCoord.x); - // Shift green footprint by +2 taps horizontally (tuned for RGB path). - int sxG = clamp(start.x + 2 + kx, 0, maxCoord.x); - vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec4 texelG = texelFetch(layerSampler, ivec2(sxG, sy), 0); - vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); - linearSample.g = colorspace_plane_degamma_tf(texelG.rgb, colorspace).g; + vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx].rgb; - // Apply kernel with channel permutation r->r, g->b, b->g. - vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx].rbg; accum += linearSample * kernel; alpha += texel.a * alphaWeight; } From 9c5978e5c63a811afc036f38313ca22e712f36a0 Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Mon, 24 Nov 2025 06:23:09 +0100 Subject: [PATCH 7/8] shaders: add G/RB and vertical BGR subpixel-aware downscaling filters --- src/Backends/DRMBackend.cpp | 2 +- src/Backends/OpenVRBackend.cpp | 2 +- src/Backends/WaylandBackend.cpp | 2 +- src/main.cpp | 10 +- src/main.hpp | 22 +- src/rendervulkan.cpp | 6 +- src/shaders/composite.h | 2 +- src/shaders/descriptor_set.h | 2 + src/shaders/subpixel_scaler.h | 344 +++++++++++++++++++++++--------- 9 files changed, 277 insertions(+), 115 deletions(-) diff --git a/src/Backends/DRMBackend.cpp b/src/Backends/DRMBackend.cpp index c03195b194..17aa0569e2 100644 --- a/src/Backends/DRMBackend.cpp +++ b/src/Backends/DRMBackend.cpp @@ -3451,7 +3451,7 @@ namespace gamescope bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_VBGR || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_QDOLED) && !bLayer0ScreenSize; bool bNeedsFullComposite = false; bNeedsFullComposite |= cv_composite_force; diff --git a/src/Backends/OpenVRBackend.cpp b/src/Backends/OpenVRBackend.cpp index 23d3075827..7944cf34c4 100644 --- a/src/Backends/OpenVRBackend.cpp +++ b/src/Backends/OpenVRBackend.cpp @@ -1494,7 +1494,7 @@ namespace gamescope // TODO: Dedupe some of this composite check code between us and drm.cpp bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_VBGR || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_QDOLED) && !bLayer0ScreenSize; bNeedsFullComposite |= cv_composite_force; bNeedsFullComposite |= pFrameInfo->useFSRLayer0; diff --git a/src/Backends/WaylandBackend.cpp b/src/Backends/WaylandBackend.cpp index 4f8e5aa166..c5f508b5fd 100644 --- a/src/Backends/WaylandBackend.cpp +++ b/src/Backends/WaylandBackend.cpp @@ -1057,7 +1057,7 @@ namespace gamescope // TODO: Dedupe some of this composite check code between us and drm.cpp bool bLayer0ScreenSize = close_enough(pFrameInfo->layers[0].scale.x, 1.0f) && close_enough(pFrameInfo->layers[0].scale.y, 1.0f); - bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED) && !bLayer0ScreenSize; + bool bNeedsCompositeFromFilter = (g_upscaleFilter == GamescopeUpscaleFilter::NEAREST || g_upscaleFilter == GamescopeUpscaleFilter::PIXEL || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_RGB || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_OLED || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_VBGR || g_upscaleFilter == GamescopeUpscaleFilter::SUBPIXEL_QDOLED) && !bLayer0ScreenSize; bNeedsFullComposite |= cv_composite_force; bNeedsFullComposite |= pFrameInfo->useFSRLayer0; diff --git a/src/main.cpp b/src/main.cpp index b2316ed120..fb781dda91 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -172,9 +172,13 @@ const char usage[] = " -r, --nested-refresh game refresh rate (frames per second)\n" " -m, --max-scale maximum scale factor\n" " -S, --scaler upscaler type (auto, integer, fit, fill, stretch)\n" - " -F, --filter upscaler filter (linear, nearest, fsr, nis, pixel, subpixel_rgb, subpixel_oled)\n" +" -F, --filter upscaler/downscaler filter (linear, nearest, fsr, nis, pixel, subpixel_rgb, subpixel_vbgr, subpixel_oled, subpixel_qdoled)\n" " fsr => AMD FidelityFX™ Super Resolution 1.0\n" " nis => NVIDIA Image Scaling v1.0.3\n" + " subpixel_rgb => horizontal RGB subpixel layout (3:1 downscale)\n" + " subpixel_vbgr => vertical BGR subpixel layout (3:1 downscale)\n" + " subpixel_oled => RG/B subpixel layout (2:1 downscale)\n" + " subpixel_qdoled => G/RB subpixel layout (2:1 downscale)\n" " --sharpness, --fsr-sharpness upscaler sharpness from 0 (max) to 20 (min)\n" " --expose-wayland support wayland clients using xdg-shell\n" " -s, --mouse-sensitivity multiply mouse movement by given decimal number\n" @@ -414,6 +418,10 @@ static enum GamescopeUpscaleFilter parse_upscaler_filter(const char *str) return GamescopeUpscaleFilter::SUBPIXEL_RGB; } else if (strcmp(str, "subpixel_oled") == 0) { return GamescopeUpscaleFilter::SUBPIXEL_OLED; + } else if (strcmp(str, "subpixel_vbgr") == 0) { + return GamescopeUpscaleFilter::SUBPIXEL_VBGR; + } else if (strcmp(str, "subpixel_qdoled") == 0) { + return GamescopeUpscaleFilter::SUBPIXEL_QDOLED; } else { fprintf( stderr, "gamescope: invalid value for --filter\n" ); exit(1); diff --git a/src/main.hpp b/src/main.hpp index 323d80d998..6d47d5d1bb 100644 --- a/src/main.hpp +++ b/src/main.hpp @@ -29,17 +29,19 @@ extern bool g_bGrabbed; extern float g_mouseSensitivity; extern const char *g_sOutputName; - enum class GamescopeUpscaleFilter : uint32_t +enum class GamescopeUpscaleFilter : uint32_t { - LINEAR = 0, - NEAREST, - FSR, - NIS, - PIXEL, - SUBPIXEL_RGB, - SUBPIXEL_OLED, - - FROM_VIEW = 0xF, // internal + LINEAR = 0, + NEAREST, + FSR, + NIS, + PIXEL, + SUBPIXEL_RGB, + SUBPIXEL_OLED, + SUBPIXEL_VBGR, + SUBPIXEL_QDOLED, + + FROM_VIEW = 0xF, // internal }; static constexpr bool DoesHardwareSupportUpscaleFilter( GamescopeUpscaleFilter eFilter ) diff --git a/src/rendervulkan.cpp b/src/rendervulkan.cpp index 09eedcc5ae..a2bcd47a2e 100644 --- a/src/rendervulkan.cpp +++ b/src/rendervulkan.cpp @@ -3642,9 +3642,11 @@ struct SubpixelFilterDefinition const char *pName; }; -static constexpr std::array g_SubpixelFilterDefinitions = {{ +static constexpr std::array g_SubpixelFilterDefinitions = {{ { GamescopeUpscaleFilter::SUBPIXEL_RGB, { 3.0f, 3.0f }, "horizontal RGB" }, { GamescopeUpscaleFilter::SUBPIXEL_OLED, { 2.0f, 2.0f }, "RG/B OLED" }, + { GamescopeUpscaleFilter::SUBPIXEL_VBGR, { 3.0f, 3.0f }, "vertical BGR" }, + { GamescopeUpscaleFilter::SUBPIXEL_QDOLED,{ 2.0f, 2.0f }, "G/RB QD-OLED" }, }}; static const SubpixelFilterDefinition *FindSubpixelFilterDefinition( GamescopeUpscaleFilter eFilter ) @@ -3665,7 +3667,7 @@ static GamescopeUpscaleFilter GetLayerShaderFilter( const FrameInfo_t::Layer_t & if ( const auto *definition = FindSubpixelFilterDefinition( layer.filter ) ) { - static int s_lastState[ g_SubpixelFilterDefinitions.size() ] = { -1, -1 }; + static int s_lastState[ g_SubpixelFilterDefinitions.size() ] = { -1, -1, -1, -1 }; size_t idx = definition - g_SubpixelFilterDefinitions.data(); float dimRatioX = 0.0f; diff --git a/src/shaders/composite.h b/src/shaders/composite.h index 4c317dcfa2..61e44989d6 100644 --- a/src/shaders/composite.h +++ b/src/shaders/composite.h @@ -170,7 +170,7 @@ vec4 sampleLayerEx(sampler2D layerSampler, uint offsetLayerIdx, uint colorspaceL vec4 color; bool sampled = false; uint shaderFilter = get_layer_shaderfilter(offsetLayerIdx); - if ((shaderFilter == filter_subpixel_rgb || shaderFilter == filter_subpixel_oled) && unnormalized) { + if (unnormalized) { sampled = try_sample_subpixel_filter(shaderFilter, layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); } diff --git a/src/shaders/descriptor_set.h b/src/shaders/descriptor_set.h index 148db7aa5d..6a89fe6c09 100644 --- a/src/shaders/descriptor_set.h +++ b/src/shaders/descriptor_set.h @@ -23,6 +23,8 @@ const int filter_nis = 3; const int filter_pixel = 4; const int filter_subpixel_rgb = 5; const int filter_subpixel_oled = 6; +const int filter_subpixel_vbgr = 7; +const int filter_subpixel_qdoled = 8; const int filter_from_view = 255; const int EOTF_Gamma22 = 0; diff --git a/src/shaders/subpixel_scaler.h b/src/shaders/subpixel_scaler.h index 37c3954d58..74eea2a352 100644 --- a/src/shaders/subpixel_scaler.h +++ b/src/shaders/subpixel_scaler.h @@ -3,8 +3,9 @@ // Generic subpixel-aware downscale helpers. // Current implementations: -// - horizontal RGB subpixel-aware downscale filter with a fixed 3:1 ratio. -// - RG/B OLED subpixel-aware downscale filter with a fixed 2:1 ratio. +// - horizontal RGB subpixel-aware downscale filter (3:1 ratio). +// - vertical BGR subpixel-aware downscale kernel (3:1 ratio). +// - RG/B subpixel-aware downscale filter (2:1 ratio). const float kSubpixelRatioTolerance = 0.05f; const vec2 kSubpixelRGBDownscale = vec2(3.0f, 3.0f); @@ -14,119 +15,240 @@ const float kSubpixelRGBAlphaWeight = 1.0f / 49.0f; const vec2 kSubpixelOLEDDownscale = vec2(2.0f, 2.0f); const ivec2 kSubpixelOLEDDownscaleInt = ivec2(2); const float kSubpixelOLEDAlphaWeight = 1.0f / 25.0f; +const vec2 kSubpixelQDOLEDDownscale = vec2(2.0f, 2.0f); +const ivec2 kSubpixelQDOLEDDownscaleInt = ivec2(2); +const float kSubpixelQDOLEDAlphaWeight = 1.0f / 25.0f; const vec4 kSubpixelHorizontalRGBKernel[7][7] = vec4[7][7]( + vec4[7]( + vec4( 7.1068e-03f, -5.4910e-03f, -1.7302e-03f, 0.0f), + vec4(-4.9596e-02f, 1.4541e-03f, -8.1770e-03f, 0.0f), + vec4(-1.1168e-01f, -3.9356e-02f, 1.0656e-02f, 0.0f), + vec4(-3.7548e-02f, -8.4949e-02f, -6.5272e-02f, 0.0f), + vec4( 1.8490e-02f, -4.7630e-02f, -5.5001e-02f, 0.0f), + vec4(-2.2353e-03f, 2.1439e-03f, -4.2040e-02f, 0.0f), + vec4(-9.0465e-04f, -3.0190e-03f, -8.3075e-03f, 0.0f) + ), + vec4[7]( + vec4(-6.2506e-03f, -3.5835e-03f, -8.5086e-03f, 0.0f), + vec4( 3.9232e-02f, 8.1267e-03f, 1.0763e-02f, 0.0f), + vec4( 1.3306e-02f, 2.3506e-02f, 3.9553e-03f, 0.0f), + vec4( 2.7662e-02f, 1.9066e-02f, 2.0329e-02f, 0.0f), + vec4(-5.6325e-03f, 2.6609e-02f, 2.3552e-02f, 0.0f), + vec4( 7.6947e-03f, 5.1926e-03f, 2.7644e-02f, 0.0f), + vec4(-1.1685e-02f, -3.7455e-03f, 2.3616e-03f, 0.0f) + ), + vec4[7]( + vec4(-5.0055e-03f, -1.6617e-02f, -1.8024e-02f, 0.0f), + vec4( 8.8724e-02f, 2.8274e-02f, 2.3960e-04f, 0.0f), + vec4( 1.6728e-01f, 8.3065e-02f, 3.8955e-02f, 0.0f), + vec4( 8.9757e-02f, 1.4706e-01f, 1.0101e-01f, 0.0f), + vec4( 1.7448e-02f, 8.5608e-02f, 1.0663e-01f, 0.0f), + vec4(-1.6968e-02f, 2.8939e-02f, 6.7206e-02f, 0.0f), + vec4(-5.2292e-03f, -1.8133e-02f, 2.9039e-02f, 0.0f) + ), + vec4[7]( + vec4(-6.1220e-02f, -4.9825e-02f, -5.5558e-02f, 0.0f), + vec4( 1.8421e-01f, 1.9944e-02f, -1.7329e-02f, 0.0f), + vec4( 2.6878e-01f, 1.4827e-01f, 3.4439e-02f, 0.0f), + vec4( 1.8127e-01f, 2.5076e-01f, 2.2284e-01f, 0.0f), + vec4(-6.3037e-03f, 1.6033e-01f, 1.7886e-01f, 0.0f), + vec4(-4.9562e-02f, 2.8671e-02f, 1.3686e-01f, 0.0f), + vec4(-1.5705e-02f, -5.4865e-02f, 5.6473e-03f, 0.0f) + ), + vec4[7]( + vec4(-5.4844e-03f, -1.6043e-02f, -1.6392e-02f, 0.0f), + vec4( 8.8965e-02f, 2.8861e-02f, 2.0795e-03f, 0.0f), + vec4( 1.6636e-01f, 8.2952e-02f, 3.8352e-02f, 0.0f), + vec4( 8.7956e-02f, 1.4735e-01f, 1.0193e-01f, 0.0f), + vec4( 1.8432e-02f, 8.5476e-02f, 1.0605e-01f, 0.0f), + vec4(-1.5480e-02f, 2.9659e-02f, 6.8861e-02f, 0.0f), + vec4(-5.6236e-03f, -1.8234e-02f, 2.9642e-02f, 0.0f) + ), + vec4[7]( + vec4(-6.7048e-03f, -3.8673e-03f, -1.0044e-02f, 0.0f), + vec4( 3.9388e-02f, 6.9483e-03f, 1.0357e-02f, 0.0f), + vec4( 1.3296e-02f, 2.3415e-02f, 4.4685e-03f, 0.0f), + vec4( 2.9055e-02f, 1.8918e-02f, 2.0069e-02f, 0.0f), + vec4(-6.1559e-03f, 2.5721e-02f, 2.3438e-02f, 0.0f), + vec4( 7.0322e-03f, 5.6895e-03f, 2.6485e-02f, 0.0f), + vec4(-1.2151e-02f, -4.4146e-03f, 1.3875e-03f, 0.0f) + ), + vec4[7]( + vec4( 7.2912e-03f, -5.3521e-03f, -1.2737e-03f, 0.0f), + vec4(-4.9607e-02f, 1.6422e-03f, -7.8235e-03f, 0.0f), + vec4(-1.1154e-01f, -3.9406e-02f, 1.0291e-02f, 0.0f), + vec4(-3.7869e-02f, -8.5334e-02f, -6.5227e-02f, 0.0f), + vec4( 1.8729e-02f, -4.6851e-02f, -5.4638e-02f, 0.0f), + vec4(-1.2923e-03f, 1.7708e-03f, -4.1082e-02f, 0.0f), + vec4(-2.4411e-04f, -2.8019e-03f, -8.5312e-03f, 0.0f) + ) +); + +const vec4 kSubpixelVerticalBGRKernel[7][7] = vec4[7][7]( vec4[7]( - vec4( 7.1068e-03f, -5.4910e-03f, -1.7302e-03f, 0.0f), - vec4(-4.9596e-02f, 1.4541e-03f, -8.1770e-03f, 0.0f), - vec4(-1.1168e-01f, -3.9356e-02f, 1.0656e-02f, 0.0f), - vec4(-3.7548e-02f, -8.4949e-02f, -6.5272e-02f, 0.0f), - vec4( 1.8490e-02f, -4.7630e-02f, -5.5001e-02f, 0.0f), - vec4(-2.2353e-03f, 2.1439e-03f, -4.2040e-02f, 0.0f), - vec4(-9.0465e-04f, -3.0190e-03f, -8.3075e-03f, 0.0f) - ), - vec4[7]( - vec4(-6.2506e-03f, -3.5835e-03f, -8.5086e-03f, 0.0f), - vec4( 3.9232e-02f, 8.1267e-03f, 1.0763e-02f, 0.0f), - vec4( 1.3306e-02f, 2.3506e-02f, 3.9553e-03f, 0.0f), - vec4( 2.7662e-02f, 1.9066e-02f, 2.0329e-02f, 0.0f), - vec4(-5.6325e-03f, 2.6609e-02f, 2.3552e-02f, 0.0f), - vec4( 7.6947e-03f, 5.1926e-03f, 2.7644e-02f, 0.0f), - vec4(-1.1685e-02f, -3.7455e-03f, 2.3616e-03f, 0.0f) - ), - vec4[7]( - vec4(-5.0055e-03f, -1.6617e-02f, -1.8024e-02f, 0.0f), - vec4( 8.8724e-02f, 2.8274e-02f, 2.3960e-04f, 0.0f), - vec4( 1.6728e-01f, 8.3065e-02f, 3.8955e-02f, 0.0f), - vec4( 8.9757e-02f, 1.4706e-01f, 1.0101e-01f, 0.0f), - vec4( 1.7448e-02f, 8.5608e-02f, 1.0663e-01f, 0.0f), - vec4(-1.6968e-02f, 2.8939e-02f, 6.7206e-02f, 0.0f), - vec4(-5.2292e-03f, -1.8133e-02f, 2.9039e-02f, 0.0f) - ), - vec4[7]( - vec4(-6.1220e-02f, -4.9825e-02f, -5.5558e-02f, 0.0f), - vec4( 1.8421e-01f, 1.9944e-02f, -1.7329e-02f, 0.0f), - vec4( 2.6878e-01f, 1.4827e-01f, 3.4439e-02f, 0.0f), - vec4( 1.8127e-01f, 2.5076e-01f, 2.2284e-01f, 0.0f), - vec4(-6.3037e-03f, 1.6033e-01f, 1.7886e-01f, 0.0f), - vec4(-4.9562e-02f, 2.8671e-02f, 1.3686e-01f, 0.0f), - vec4(-1.5705e-02f, -5.4865e-02f, 5.6473e-03f, 0.0f) - ), - vec4[7]( - vec4(-5.4844e-03f, -1.6043e-02f, -1.6392e-02f, 0.0f), - vec4( 8.8965e-02f, 2.8861e-02f, 2.0795e-03f, 0.0f), - vec4( 1.6636e-01f, 8.2952e-02f, 3.8352e-02f, 0.0f), - vec4( 8.7956e-02f, 1.4735e-01f, 1.0193e-01f, 0.0f), - vec4( 1.8432e-02f, 8.5476e-02f, 1.0605e-01f, 0.0f), - vec4(-1.5480e-02f, 2.9659e-02f, 6.8861e-02f, 0.0f), - vec4(-5.6236e-03f, -1.8234e-02f, 2.9642e-02f, 0.0f) - ), - vec4[7]( - vec4(-6.7048e-03f, -3.8673e-03f, -1.0044e-02f, 0.0f), - vec4( 3.9388e-02f, 6.9483e-03f, 1.0357e-02f, 0.0f), - vec4( 1.3296e-02f, 2.3415e-02f, 4.4685e-03f, 0.0f), - vec4( 2.9055e-02f, 1.8918e-02f, 2.0069e-02f, 0.0f), - vec4(-6.1559e-03f, 2.5721e-02f, 2.3438e-02f, 0.0f), - vec4( 7.0322e-03f, 5.6895e-03f, 2.6485e-02f, 0.0f), - vec4(-1.2151e-02f, -4.4146e-03f, 1.3875e-03f, 0.0f) - ), - vec4[7]( - vec4( 7.2912e-03f, -5.3521e-03f, -1.2737e-03f, 0.0f), - vec4(-4.9607e-02f, 1.6422e-03f, -7.8235e-03f, 0.0f), - vec4(-1.1154e-01f, -3.9406e-02f, 1.0291e-02f, 0.0f), - vec4(-3.7869e-02f, -8.5334e-02f, -6.5227e-02f, 0.0f), - vec4( 1.8729e-02f, -4.6851e-02f, -5.4638e-02f, 0.0f), - vec4(-1.2923e-03f, 1.7708e-03f, -4.1082e-02f, 0.0f), - vec4(-2.4411e-04f, -2.8019e-03f, -8.5312e-03f, 0.0f) + vec4(-1.3703e-03f, -2.7128e-03f, -8.9400e-03f, 0.0f), + vec4(-1.3076e-02f, -2.9120e-03f, 2.4325e-03f, 0.0f), + vec4(-4.6088e-03f, -1.8820e-02f, 2.6776e-02f, 0.0f), + vec4(-1.8680e-02f, -5.5206e-02f, 2.7558e-03f, 0.0f), + vec4(-4.2233e-03f, -1.8265e-02f, 2.8303e-02f, 0.0f), + vec4(-1.3331e-02f, -3.5675e-03f, 1.5966e-03f, 0.0f), + vec4(-7.5229e-04f, -2.3044e-03f, -9.3453e-03f, 0.0f) + ), + vec4[7]( + vec4(-1.7172e-03f, 1.0680e-03f, -4.2052e-02f, 0.0f), + vec4( 9.5862e-03f, 5.8752e-03f, 2.8153e-02f, 0.0f), + vec4(-1.7920e-02f, 2.8416e-02f, 6.7089e-02f, 0.0f), + vec4(-4.6624e-02f, 2.9508e-02f, 1.3613e-01f, 0.0f), + vec4(-1.7250e-02f, 2.9594e-02f, 6.9378e-02f, 0.0f), + vec4( 8.7751e-03f, 5.6507e-03f, 2.6455e-02f, 0.0f), + vec4(-1.9686e-03f, 7.4416e-04f, -4.1384e-02f, 0.0f) + ), + vec4[7]( + vec4( 1.5303e-02f, -4.8473e-02f, -5.8085e-02f, 0.0f), + vec4(-7.1747e-03f, 2.7484e-02f, 2.3522e-02f, 0.0f), + vec4( 1.6983e-02f, 8.5399e-02f, 1.0710e-01f, 0.0f), + vec4(-7.9377e-03f, 1.6152e-01f, 1.8166e-01f, 0.0f), + vec4( 1.7515e-02f, 8.5978e-02f, 1.0779e-01f, 0.0f), + vec4(-6.5856e-03f, 2.6297e-02f, 2.2493e-02f, 0.0f), + vec4( 1.4895e-02f, -4.8141e-02f, -5.8012e-02f, 0.0f) + ), + vec4[7]( + vec4(-3.7719e-02f, -8.3531e-02f, -6.4092e-02f, 0.0f), + vec4( 2.9740e-02f, 1.9817e-02f, 2.1634e-02f, 0.0f), + vec4( 8.6868e-02f, 1.4623e-01f, 1.0123e-01f, 0.0f), + vec4( 1.8146e-01f, 2.4817e-01f, 2.1861e-01f, 0.0f), + vec4( 8.7783e-02f, 1.4662e-01f, 1.0216e-01f, 0.0f), + vec4( 2.9442e-02f, 1.9655e-02f, 1.9912e-02f, 0.0f), + vec4(-3.7865e-02f, -8.3557e-02f, -6.3263e-02f, 0.0f) + ), + vec4[7]( + vec4(-1.1062e-01f, -3.9883e-02f, 1.0886e-02f, 0.0f), + vec4( 1.3182e-02f, 2.3998e-02f, 3.9794e-04f, 0.0f), + vec4( 1.6555e-01f, 8.1207e-02f, 3.6751e-02f, 0.0f), + vec4( 2.6950e-01f, 1.4875e-01f, 3.4342e-02f, 0.0f), + vec4( 1.6829e-01f, 8.2641e-02f, 3.5206e-02f, 0.0f), + vec4( 1.0881e-02f, 2.3315e-02f, 3.9485e-03f, 0.0f), + vec4(-1.0958e-01f, -3.9958e-02f, 9.7981e-03f, 0.0f) + ), + vec4[7]( + vec4(-4.9198e-02f, 9.1998e-04f, -7.3902e-03f, 0.0f), + vec4( 3.9535e-02f, 9.1899e-03f, 1.3642e-02f, 0.0f), + vec4( 8.7285e-02f, 2.7588e-02f, -1.0662e-04f, 0.0f), + vec4( 1.8349e-01f, 2.1068e-02f, -1.6866e-02f, 0.0f), + vec4( 8.8583e-02f, 2.9153e-02f, 1.8183e-03f, 0.0f), + vec4( 3.8491e-02f, 7.7285e-03f, 1.2305e-02f, 0.0f), + vec4(-4.9016e-02f, 1.0721e-03f, -8.0278e-03f, 0.0f) + ), + vec4[7]( + vec4( 6.1241e-03f, -4.8275e-03f, -9.3540e-04f, 0.0f), + vec4(-5.4319e-03f, -2.6706e-03f, -8.6848e-03f, 0.0f), + vec4(-6.0226e-03f, -1.7122e-02f, -1.7736e-02f, 0.0f), + vec4(-6.1247e-02f, -4.9733e-02f, -5.5650e-02f, 0.0f), + vec4(-4.0417e-03f, -1.6621e-02f, -1.7116e-02f, 0.0f), + vec4(-6.7143e-03f, -3.3482e-03f, -8.2083e-03f, 0.0f), + vec4( 6.7830e-03f, -4.3232e-03f, -7.8619e-04f, 0.0f) ) ); -const vec4 kSubpixelOLEDKernel[5][5] = vec4[5][5]( +// G/RB QD-OLED subpixel-aware downscale kernel (2:1 ratio). +const vec4 kSubpixelQDOLEDKernel[5][5] = vec4[5][5]( vec4[5]( - vec4( 8.8942e-03f, -3.2269e-03f, -4.4989e-04f, 0.0f), - vec4(-3.0907e-03f, 9.8704e-03f, -1.9181e-02f, 0.0f), - vec4(-1.3623e-01f, -1.7137e-02f, -8.9794e-03f, 0.0f), - vec4(-6.1139e-03f, -9.3100e-02f, -6.3452e-02f, 0.0f), - vec4( 1.2717e-02f, -1.0195e-02f, 6.3495e-04f, 0.0f) + vec4(-2.2206e-03f, -1.7716e-03f, 2.1434e-03f, 0.0f), + vec4(-1.4392e-02f, 4.7502e-03f, 1.3392e-02f, 0.0f), + vec4(-1.9470e-02f, -6.1152e-02f, -4.5265e-02f, 0.0f), + vec4(-1.2906e-02f, -6.2232e-02f, -4.2550e-02f, 0.0f), + vec4( 2.6931e-03f, -1.4399e-04f, -1.1285e-02f, 0.0f) ), vec4[5]( - vec4(-1.4033e-02f, -1.0390e-02f, 5.3642e-03f, 0.0f), - vec4( 7.0923e-02f, -2.0851e-02f, 1.5784e-02f, 0.0f), - vec4( 1.5164e-01f, 9.3378e-02f, -2.6439e-02f, 0.0f), - vec4( 6.2676e-02f, 1.6882e-01f, -9.8268e-03f, 0.0f), - vec4(-1.4588e-02f, 5.1943e-02f, 1.3150e-02f, 0.0f) + vec4( 1.6583e-02f, -3.0740e-02f, -7.2153e-03f, 0.0f), + vec4(-5.8073e-03f, 3.0652e-02f, 1.6279e-02f, 0.0f), + vec4(-1.1306e-01f, 1.5502e-01f, 1.4186e-02f, 0.0f), + vec4(-3.2931e-03f, 1.4972e-01f, -3.5163e-02f, 0.0f), + vec4( 1.4865e-02f, 8.6307e-03f, 1.2385e-02f, 0.0f) ), vec4[5]( - vec4(-1.3383e-01f, -1.8797e-02f, -3.9692e-02f, 0.0f), - vec4( 1.8129e-01f, -6.1458e-02f, 4.2175e-02f, 0.0f), - vec4( 5.6204e-01f, 1.6187e-01f, 1.4268e-01f, 0.0f), - vec4( 1.9378e-01f, 4.4632e-01f, 2.1392e-01f, 0.0f), - vec4(-1.2192e-01f, 8.3973e-02f, -8.6816e-03f, 0.0f) + vec4(-5.5262e-03f, -7.3926e-02f, -2.5914e-02f, 0.0f), + vec4( 7.8208e-02f, -2.7367e-02f, -4.4307e-02f, 0.0f), + vec4( 1.7748e-01f, 3.8761e-01f, 1.5539e-01f, 0.0f), + vec4( 8.9775e-02f, 3.8712e-01f, 1.8995e-01f, 0.0f), + vec4(-2.2427e-02f, -1.0061e-01f, 5.5693e-02f, 0.0f) ), vec4[5]( - vec4(-1.5648e-02f, -1.0165e-02f, -6.0513e-02f, 0.0f), - vec4( 6.6628e-02f, -2.8706e-02f, -1.8292e-02f, 0.0f), - vec4( 1.5260e-01f, 9.4369e-02f, 2.9209e-01f, 0.0f), - vec4( 5.9264e-02f, 1.6689e-01f, 3.2808e-01f, 0.0f), - vec4(-1.6114e-02f, 4.3363e-02f, -8.8848e-02f, 0.0f) + vec4(-1.4519e-01f, -3.1293e-02f, -3.9569e-03f, 0.0f), + vec4( 1.7445e-01f, 1.9616e-02f, -5.5174e-02f, 0.0f), + vec4( 5.6130e-01f, 1.5526e-01f, 1.4462e-01f, 0.0f), + vec4( 1.7025e-01f, 1.5754e-01f, 3.1903e-01f, 0.0f), + vec4(-1.2829e-01f, -5.1410e-03f, 8.4568e-02f, 0.0f) ), vec4[5]( - vec4( 1.1299e-02f, -1.3180e-03f, -3.0147e-02f, 0.0f), - vec4(-3.8403e-04f, 1.3891e-02f, 3.9877e-02f, 0.0f), - vec4(-1.2776e-01f, -1.7060e-02f, 9.1108e-02f, 0.0f), - vec4(-4.5907e-03f, -7.7945e-02f, 1.5202e-01f, 0.0f), - vec4( 1.4749e-02f, -4.2571e-03f, -8.5528e-03f, 0.0f) + vec4( 5.4248e-03f, 4.0236e-03f, -1.7954e-02f, 0.0f), + vec4( 5.4035e-02f, 3.5357e-03f, -2.3790e-02f, 0.0f), + vec4( 4.6402e-02f, -4.9542e-02f, 1.2279e-01f, 0.0f), + vec4( 6.5886e-02f, -5.4416e-02f, 1.1734e-01f, 0.0f), + vec4(-4.2627e-03f, 5.0940e-03f, 4.4279e-02f, 0.0f) ) ); +const vec4 kSubpixelOLEDKernel[5][5] = vec4[5][5]( + vec4[5]( + vec4( 8.8942e-03f, -3.2269e-03f, -4.4989e-04f, 0.0f), + vec4(-3.0907e-03f, 9.8704e-03f, -1.9181e-02f, 0.0f), + vec4(-1.3623e-01f, -1.7137e-02f, -8.9794e-03f, 0.0f), + vec4(-6.1139e-03f, -9.3100e-02f, -6.3452e-02f, 0.0f), + vec4( 1.2717e-02f, -1.0195e-02f, 6.3495e-04f, 0.0f) + ), + vec4[5]( + vec4(-1.4033e-02f, -1.0390e-02f, 5.3642e-03f, 0.0f), + vec4( 7.0923e-02f, -2.0851e-02f, 1.5784e-02f, 0.0f), + vec4( 1.5164e-01f, 9.3378e-02f, -2.6439e-02f, 0.0f), + vec4( 6.2676e-02f, 1.6882e-01f, -9.8268e-03f, 0.0f), + vec4(-1.4588e-02f, 5.1943e-02f, 1.3150e-02f, 0.0f) + ), + vec4[5]( + vec4(-1.3383e-01f, -1.8797e-02f, -3.9692e-02f, 0.0f), + vec4( 1.8129e-01f, -6.1458e-02f, 4.2175e-02f, 0.0f), + vec4( 5.6204e-01f, 1.6187e-01f, 1.4268e-01f, 0.0f), + vec4( 1.9378e-01f, 4.4632e-01f, 2.1392e-01f, 0.0f), + vec4(-1.2192e-01f, 8.3973e-02f, -8.6816e-03f, 0.0f) + ), + vec4[5]( + vec4(-1.5648e-02f, -1.0165e-02f, -6.0513e-02f, 0.0f), + vec4( 6.6628e-02f, -2.8706e-02f, -1.8292e-02f, 0.0f), + vec4( 1.5260e-01f, 9.4369e-02f, 2.9209e-01f, 0.0f), + vec4( 5.9264e-02f, 1.6689e-01f, 3.2808e-01f, 0.0f), + vec4(-1.6114e-02f, 4.3363e-02f, -8.8848e-02f, 0.0f) + ), + vec4[5]( + vec4( 1.1299e-02f, -1.3180e-03f, -3.0147e-02f, 0.0f), + vec4(-3.8403e-04f, 1.3891e-02f, 3.9877e-02f, 0.0f), + vec4(-1.2776e-01f, -1.7060e-02f, 9.1108e-02f, 0.0f), + vec4(-4.5907e-03f, -7.7945e-02f, 1.5202e-01f, 0.0f), + vec4( 1.4749e-02f, -4.2571e-03f, -8.5528e-03f, 0.0f) + ) +); bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 coord, ivec2 texSize, vec2 scale, uint colorspace, out vec4 outColor) { - if (shaderFilter != filter_subpixel_rgb && shaderFilter != filter_subpixel_oled) + if (shaderFilter != filter_subpixel_rgb && shaderFilter != filter_subpixel_oled && shaderFilter != filter_subpixel_vbgr && shaderFilter != filter_subpixel_qdoled) return false; - vec2 ratio = shaderFilter == filter_subpixel_oled ? kSubpixelOLEDDownscale : kSubpixelRGBDownscale; - ivec2 ratioInt = shaderFilter == filter_subpixel_oled ? kSubpixelOLEDDownscaleInt : kSubpixelRGBDownscaleInt; - float alphaWeight = shaderFilter == filter_subpixel_oled ? kSubpixelOLEDAlphaWeight : kSubpixelRGBAlphaWeight; + // Select ratio/stride/alpha per filter. + vec2 ratio; + ivec2 ratioInt; + float alphaWeight; + if (shaderFilter == filter_subpixel_oled) { + ratio = kSubpixelOLEDDownscale; + ratioInt = kSubpixelOLEDDownscaleInt; + alphaWeight = kSubpixelOLEDAlphaWeight; + } else if (shaderFilter == filter_subpixel_qdoled) { + ratio = kSubpixelQDOLEDDownscale; + ratioInt = kSubpixelQDOLEDDownscaleInt; + alphaWeight = kSubpixelQDOLEDAlphaWeight; + } else { + ratio = kSubpixelRGBDownscale; + ratioInt = kSubpixelRGBDownscaleInt; + alphaWeight = kSubpixelRGBAlphaWeight; + } if (any(greaterThan(abs(scale - ratio), vec2(kSubpixelRatioTolerance)))) return false; @@ -138,7 +260,8 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 vec3 accum = vec3(0.0f); float alpha = 0.0f; - if (shaderFilter == filter_subpixel_oled) { + switch (shaderFilter) { + case filter_subpixel_oled: for (int ky = 0; ky < 5; ky++) { int sy = clamp(start.y + ky, 0, maxCoord.y); for (int kx = 0; kx < 5; kx++) { @@ -146,12 +269,25 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); vec3 kernel = kSubpixelOLEDKernel[ky][kx].rgb; - accum += linearSample * kernel; alpha += texel.a * alphaWeight; } } - } else { + break; + case filter_subpixel_qdoled: + for (int ky = 0; ky < 5; ky++) { + int sy = clamp(start.y + ky, 0, maxCoord.y); + for (int kx = 0; kx < 5; kx++) { + int sx = clamp(start.x + kx, 0, maxCoord.x); + vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); + vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + vec3 kernel = kSubpixelQDOLEDKernel[ky][kx].rgb; + accum += linearSample * kernel; + alpha += texel.a * alphaWeight; + } + } + break; + case filter_subpixel_rgb: for (int ky = 0; ky < 7; ky++) { int sy = clamp(start.y + ky, 0, maxCoord.y); for (int kx = 0; kx < 7; kx++) { @@ -159,15 +295,27 @@ bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx].rgb; - accum += linearSample * kernel; alpha += texel.a * alphaWeight; } } + break; + case filter_subpixel_vbgr: + for (int ky = 0; ky < 7; ky++) { + int sy = clamp(start.y + ky, 0, maxCoord.y); + for (int kx = 0; kx < 7; kx++) { + int sx = clamp(start.x + kx, 0, maxCoord.x); + vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); + vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); + vec3 kernel = kSubpixelVerticalBGRKernel[ky][kx].rgb; + accum += linearSample * kernel; + alpha += texel.a * alphaWeight; + } + } + break; } outColor = vec4(accum, clamp(alpha, 0.0f, 1.0f)); return true; } - #endif From f5d422bbf06681cc99ef7f025960aa67d8ceb510 Mon Sep 17 00:00:00 2001 From: Daniel Koukola Date: Sat, 13 Dec 2025 23:12:47 +0100 Subject: [PATCH 8/8] shaders: clean up subpixel-aware downscaling filters --- src/rendervulkan.cpp | 8 +- src/shaders/composite.h | 50 +++++-- src/shaders/subpixel_scaler.h | 273 ++++++++++++++++------------------ 3 files changed, 170 insertions(+), 161 deletions(-) diff --git a/src/rendervulkan.cpp b/src/rendervulkan.cpp index a2bcd47a2e..cbde2f6bff 100644 --- a/src/rendervulkan.cpp +++ b/src/rendervulkan.cpp @@ -3690,10 +3690,9 @@ static GamescopeUpscaleFilter GetLayerShaderFilter( const FrameInfo_t::Layer_t & float observedX = dimRatioX > 0.0f ? dimRatioX : scaleRatioX; float observedY = dimRatioY > 0.0f ? dimRatioY : scaleRatioY; - const float tolerance = 0.05f; bool ratioOk = - close_enough( observedX, definition->downscaleRatio.x, tolerance ) && - close_enough( observedY, definition->downscaleRatio.y, tolerance ); + close_enough( observedX, definition->downscaleRatio.x ) && + close_enough( observedY, definition->downscaleRatio.y ); int state = ratioOk ? 1 : 0; if ( state != s_lastState[idx] ) @@ -3718,6 +3717,9 @@ static GamescopeUpscaleFilter GetLayerShaderFilter( const FrameInfo_t::Layer_t & } s_lastState[idx] = state; } + + if ( !ratioOk ) + return GamescopeUpscaleFilter::LINEAR; } return layer.filter; diff --git a/src/shaders/composite.h b/src/shaders/composite.h index 61e44989d6..d04e84688e 100644 --- a/src/shaders/composite.h +++ b/src/shaders/composite.h @@ -168,25 +168,45 @@ vec4 sampleLayerEx(sampler2D layerSampler, uint offsetLayerIdx, uint colorspaceL uint colorspace = get_layer_colorspace(colorspaceLayerIdx); vec4 color; - bool sampled = false; uint shaderFilter = get_layer_shaderfilter(offsetLayerIdx); - if (unnormalized) { - sampled = try_sample_subpixel_filter(shaderFilter, layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); - } - if (!sampled) { - if (shaderFilter == filter_pixel) { - vec2 output_res = texSize / u_scale[offsetLayerIdx]; - vec2 extent = max((texSize / output_res), vec2(1.0 / 256.0)); - color = sampleBandLimited(layerSampler, coord, unnormalized ? vec2(1.0f) : texSize, unnormalized ? vec2(1.0f) : vec2(1.0f) / texSize, extent, colorspace, unnormalized); - } - else if (shaderFilter == filter_linear_emulated) { - color = sampleBilinear(layerSampler, coord, colorspace, unnormalized); - } - else { - color = sampleRegular(layerSampler, coord, colorspace); + if (!unnormalized) { + if (shaderFilter == filter_subpixel_oled || + shaderFilter == filter_subpixel_qdoled || + shaderFilter == filter_subpixel_rgb || + shaderFilter == filter_subpixel_vbgr) + { + shaderFilter = filter_nearest; } } + + switch (shaderFilter) { + case filter_subpixel_oled: + SAMPLE_SUBPIXEL_OLED_FILTER(layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); + break; + case filter_subpixel_qdoled: + SAMPLE_SUBPIXEL_QDOLED_FILTER(layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); + break; + case filter_subpixel_rgb: + SAMPLE_SUBPIXEL_RGB_FILTER(layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); + break; + case filter_subpixel_vbgr: + SAMPLE_SUBPIXEL_VBGR_FILTER(layerSampler, unnormalizedCoord, texSizeInt, u_scale[offsetLayerIdx], colorspace, color); + break; + case filter_pixel: { + vec2 output_res = texSize / u_scale[offsetLayerIdx]; + vec2 extent = max((texSize / output_res), vec2(1.0 / 256.0)); + color = sampleBandLimited(layerSampler, coord, unnormalized ? vec2(1.0f) : texSize, unnormalized ? vec2(1.0f) : vec2(1.0f) / texSize, extent, colorspace, unnormalized); + break; + } + case filter_linear_emulated: + color = sampleBilinear(layerSampler, coord, colorspace, unnormalized); + break; + default: + color = sampleRegular(layerSampler, coord, colorspace); + break; + } + // JoshA: AMDGPU applies 3x4 CTM like this, where A is 1.0, but it only affects .rgb. color.rgb = vec4(color.rgb, 1.0f) * u_ctm[colorspaceLayerIdx]; color.rgb = apply_layer_color_mgmt(color.rgb, offsetLayerIdx, colorspace); diff --git a/src/shaders/subpixel_scaler.h b/src/shaders/subpixel_scaler.h index 74eea2a352..575b778c13 100644 --- a/src/shaders/subpixel_scaler.h +++ b/src/shaders/subpixel_scaler.h @@ -1,24 +1,6 @@ #ifndef SUBPIXEL_SCALER_H #define SUBPIXEL_SCALER_H -// Generic subpixel-aware downscale helpers. -// Current implementations: -// - horizontal RGB subpixel-aware downscale filter (3:1 ratio). -// - vertical BGR subpixel-aware downscale kernel (3:1 ratio). -// - RG/B subpixel-aware downscale filter (2:1 ratio). - -const float kSubpixelRatioTolerance = 0.05f; -const vec2 kSubpixelRGBDownscale = vec2(3.0f, 3.0f); -const ivec2 kSubpixelRGBDownscaleInt = ivec2(3); -const float kSubpixelRGBAlphaWeight = 1.0f / 49.0f; - -const vec2 kSubpixelOLEDDownscale = vec2(2.0f, 2.0f); -const ivec2 kSubpixelOLEDDownscaleInt = ivec2(2); -const float kSubpixelOLEDAlphaWeight = 1.0f / 25.0f; -const vec2 kSubpixelQDOLEDDownscale = vec2(2.0f, 2.0f); -const ivec2 kSubpixelQDOLEDDownscaleInt = ivec2(2); -const float kSubpixelQDOLEDAlphaWeight = 1.0f / 25.0f; - const vec4 kSubpixelHorizontalRGBKernel[7][7] = vec4[7][7]( vec4[7]( vec4( 7.1068e-03f, -5.4910e-03f, -1.7302e-03f, 0.0f), @@ -86,8 +68,8 @@ const vec4 kSubpixelHorizontalRGBKernel[7][7] = vec4[7][7]( ); const vec4 kSubpixelVerticalBGRKernel[7][7] = vec4[7][7]( - vec4[7]( - vec4(-1.3703e-03f, -2.7128e-03f, -8.9400e-03f, 0.0f), + vec4[7]( + vec4(-1.3703e-03f, -2.7128e-03f, -8.9400e-03f, 0.0f), vec4(-1.3076e-02f, -2.9120e-03f, 2.4325e-03f, 0.0f), vec4(-4.6088e-03f, -1.8820e-02f, 2.6776e-02f, 0.0f), vec4(-1.8680e-02f, -5.5206e-02f, 2.7558e-03f, 0.0f), @@ -148,46 +130,46 @@ const vec4 kSubpixelVerticalBGRKernel[7][7] = vec4[7][7]( vec4(-4.0417e-03f, -1.6621e-02f, -1.7116e-02f, 0.0f), vec4(-6.7143e-03f, -3.3482e-03f, -8.2083e-03f, 0.0f), vec4( 6.7830e-03f, -4.3232e-03f, -7.8619e-04f, 0.0f) - ) + ) ); // G/RB QD-OLED subpixel-aware downscale kernel (2:1 ratio). const vec4 kSubpixelQDOLEDKernel[5][5] = vec4[5][5]( - vec4[5]( - vec4(-2.2206e-03f, -1.7716e-03f, 2.1434e-03f, 0.0f), - vec4(-1.4392e-02f, 4.7502e-03f, 1.3392e-02f, 0.0f), - vec4(-1.9470e-02f, -6.1152e-02f, -4.5265e-02f, 0.0f), - vec4(-1.2906e-02f, -6.2232e-02f, -4.2550e-02f, 0.0f), - vec4( 2.6931e-03f, -1.4399e-04f, -1.1285e-02f, 0.0f) - ), - vec4[5]( - vec4( 1.6583e-02f, -3.0740e-02f, -7.2153e-03f, 0.0f), - vec4(-5.8073e-03f, 3.0652e-02f, 1.6279e-02f, 0.0f), - vec4(-1.1306e-01f, 1.5502e-01f, 1.4186e-02f, 0.0f), - vec4(-3.2931e-03f, 1.4972e-01f, -3.5163e-02f, 0.0f), - vec4( 1.4865e-02f, 8.6307e-03f, 1.2385e-02f, 0.0f) - ), - vec4[5]( - vec4(-5.5262e-03f, -7.3926e-02f, -2.5914e-02f, 0.0f), - vec4( 7.8208e-02f, -2.7367e-02f, -4.4307e-02f, 0.0f), - vec4( 1.7748e-01f, 3.8761e-01f, 1.5539e-01f, 0.0f), - vec4( 8.9775e-02f, 3.8712e-01f, 1.8995e-01f, 0.0f), - vec4(-2.2427e-02f, -1.0061e-01f, 5.5693e-02f, 0.0f) - ), - vec4[5]( - vec4(-1.4519e-01f, -3.1293e-02f, -3.9569e-03f, 0.0f), - vec4( 1.7445e-01f, 1.9616e-02f, -5.5174e-02f, 0.0f), - vec4( 5.6130e-01f, 1.5526e-01f, 1.4462e-01f, 0.0f), - vec4( 1.7025e-01f, 1.5754e-01f, 3.1903e-01f, 0.0f), - vec4(-1.2829e-01f, -5.1410e-03f, 8.4568e-02f, 0.0f) - ), - vec4[5]( - vec4( 5.4248e-03f, 4.0236e-03f, -1.7954e-02f, 0.0f), - vec4( 5.4035e-02f, 3.5357e-03f, -2.3790e-02f, 0.0f), - vec4( 4.6402e-02f, -4.9542e-02f, 1.2279e-01f, 0.0f), - vec4( 6.5886e-02f, -5.4416e-02f, 1.1734e-01f, 0.0f), - vec4(-4.2627e-03f, 5.0940e-03f, 4.4279e-02f, 0.0f) - ) + vec4[5]( + vec4(-2.2206e-03f, -1.7716e-03f, 2.1434e-03f, 0.0f), + vec4(-1.4392e-02f, 4.7502e-03f, 1.3392e-02f, 0.0f), + vec4(-1.9470e-02f, -6.1152e-02f, -4.5265e-02f, 0.0f), + vec4(-1.2906e-02f, -6.2232e-02f, -4.2550e-02f, 0.0f), + vec4( 2.6931e-03f, -1.4399e-04f, -1.1285e-02f, 0.0f) + ), + vec4[5]( + vec4( 1.6583e-02f, -3.0740e-02f, -7.2153e-03f, 0.0f), + vec4(-5.8073e-03f, 3.0652e-02f, 1.6279e-02f, 0.0f), + vec4(-1.1306e-01f, 1.5502e-01f, 1.4186e-02f, 0.0f), + vec4(-3.2931e-03f, 1.4972e-01f, -3.5163e-02f, 0.0f), + vec4( 1.4865e-02f, 8.6307e-03f, 1.2385e-02f, 0.0f) + ), + vec4[5]( + vec4(-5.5262e-03f, -7.3926e-02f, -2.5914e-02f, 0.0f), + vec4( 7.8208e-02f, -2.7367e-02f, -4.4307e-02f, 0.0f), + vec4( 1.7748e-01f, 3.8761e-01f, 1.5539e-01f, 0.0f), + vec4( 8.9775e-02f, 3.8712e-01f, 1.8995e-01f, 0.0f), + vec4(-2.2427e-02f, -1.0061e-01f, 5.5693e-02f, 0.0f) + ), + vec4[5]( + vec4(-1.4519e-01f, -3.1293e-02f, -3.9569e-03f, 0.0f), + vec4( 1.7445e-01f, 1.9616e-02f, -5.5174e-02f, 0.0f), + vec4( 5.6130e-01f, 1.5526e-01f, 1.4462e-01f, 0.0f), + vec4( 1.7025e-01f, 1.5754e-01f, 3.1903e-01f, 0.0f), + vec4(-1.2829e-01f, -5.1410e-03f, 8.4568e-02f, 0.0f) + ), + vec4[5]( + vec4( 5.4248e-03f, 4.0236e-03f, -1.7954e-02f, 0.0f), + vec4( 5.4035e-02f, 3.5357e-03f, -2.3790e-02f, 0.0f), + vec4( 4.6402e-02f, -4.9542e-02f, 1.2279e-01f, 0.0f), + vec4( 6.5886e-02f, -5.4416e-02f, 1.1734e-01f, 0.0f), + vec4(-4.2627e-03f, 5.0940e-03f, 4.4279e-02f, 0.0f) + ) ); const vec4 kSubpixelOLEDKernel[5][5] = vec4[5][5]( vec4[5]( @@ -227,95 +209,100 @@ const vec4 kSubpixelOLEDKernel[5][5] = vec4[5][5]( ) ); -bool try_sample_subpixel_filter(uint shaderFilter, sampler2D layerSampler, vec2 coord, ivec2 texSize, vec2 scale, uint colorspace, out vec4 outColor) -{ - if (shaderFilter != filter_subpixel_rgb && shaderFilter != filter_subpixel_oled && shaderFilter != filter_subpixel_vbgr && shaderFilter != filter_subpixel_qdoled) - return false; - - // Select ratio/stride/alpha per filter. - vec2 ratio; - ivec2 ratioInt; - float alphaWeight; - if (shaderFilter == filter_subpixel_oled) { - ratio = kSubpixelOLEDDownscale; - ratioInt = kSubpixelOLEDDownscaleInt; - alphaWeight = kSubpixelOLEDAlphaWeight; - } else if (shaderFilter == filter_subpixel_qdoled) { - ratio = kSubpixelQDOLEDDownscale; - ratioInt = kSubpixelQDOLEDDownscaleInt; - alphaWeight = kSubpixelQDOLEDAlphaWeight; - } else { - ratio = kSubpixelRGBDownscale; - ratioInt = kSubpixelRGBDownscaleInt; - alphaWeight = kSubpixelRGBAlphaWeight; - } - - if (any(greaterThan(abs(scale - ratio), vec2(kSubpixelRatioTolerance)))) - return false; +#define SAMPLE_SUBPIXEL_OLED_FILTER(layerSampler, coord, texSize, scale, colorspace, outColor) \ + do { \ + const vec2 _downscale = vec2(2.0f, 2.0f); \ + const ivec2 _downscaleInt = ivec2(2); \ + const float _alphaWeight = 1.0f / 25.0f; \ + ivec2 _outputIndex = ivec2(floor(((coord) - vec2(0.5f)) / _downscale)); \ + ivec2 _start = _outputIndex * _downscaleInt - ivec2(2); \ + ivec2 _maxCoord = (texSize) - ivec2(1); \ + vec3 _accum = vec3(0.0f); \ + float _alpha = 0.0f; \ + for (int ky = 0; ky < 5; ky++) { \ + int sy = clamp(_start.y + ky, 0, _maxCoord.y); \ + for (int kx = 0; kx < 5; kx++) { \ + int sx = clamp(_start.x + kx, 0, _maxCoord.x); \ + vec4 _texel = texelFetch((layerSampler), ivec2(sx, sy), 0); \ + vec3 _linearSample = colorspace_plane_degamma_tf(_texel.rgb, (colorspace)); \ + vec3 _kernel = kSubpixelOLEDKernel[ky][kx].rgb; \ + _accum += _linearSample * _kernel; \ + _alpha += _texel.a * _alphaWeight; \ + } \ + } \ + (outColor) = vec4(_accum, clamp(_alpha, 0.0f, 1.0f)); \ + } while (false) - ivec2 outputIndex = ivec2(floor((coord - vec2(0.5f)) / ratio)); - ivec2 start = outputIndex * ratioInt - ivec2(2); - ivec2 maxCoord = texSize - ivec2(1); +#define SAMPLE_SUBPIXEL_QDOLED_FILTER(layerSampler, coord, texSize, scale, colorspace, outColor) \ + do { \ + const vec2 _downscale = vec2(2.0f, 2.0f); \ + const ivec2 _downscaleInt = ivec2(2); \ + const float _alphaWeight = 1.0f / 25.0f; \ + ivec2 _outputIndex = ivec2(floor(((coord) - vec2(0.5f)) / _downscale)); \ + ivec2 _start = _outputIndex * _downscaleInt - ivec2(2); \ + ivec2 _maxCoord = (texSize) - ivec2(1); \ + vec3 _accum = vec3(0.0f); \ + float _alpha = 0.0f; \ + for (int ky = 0; ky < 5; ky++) { \ + int sy = clamp(_start.y + ky, 0, _maxCoord.y); \ + for (int kx = 0; kx < 5; kx++) { \ + int sx = clamp(_start.x + kx, 0, _maxCoord.x); \ + vec4 _texel = texelFetch((layerSampler), ivec2(sx, sy), 0); \ + vec3 _linearSample = colorspace_plane_degamma_tf(_texel.rgb, (colorspace)); \ + vec3 _kernel = kSubpixelQDOLEDKernel[ky][kx].rgb; \ + _accum += _linearSample * _kernel; \ + _alpha += _texel.a * _alphaWeight; \ + } \ + } \ + (outColor) = vec4(_accum, clamp(_alpha, 0.0f, 1.0f)); \ + } while (false) - vec3 accum = vec3(0.0f); - float alpha = 0.0f; +#define SAMPLE_SUBPIXEL_RGB_FILTER(layerSampler, coord, texSize, scale, colorspace, outColor) \ + do { \ + const vec2 _downscale = vec2(3.0f, 3.0f); \ + const ivec2 _downscaleInt = ivec2(3); \ + const float _alphaWeight = 1.0f / 49.0f; \ + ivec2 _outputIndex = ivec2(floor(((coord) - vec2(0.5f)) / _downscale)); \ + ivec2 _start = _outputIndex * _downscaleInt - ivec2(2); \ + ivec2 _maxCoord = (texSize) - ivec2(1); \ + vec3 _accum = vec3(0.0f); \ + float _alpha = 0.0f; \ + for (int ky = 0; ky < 7; ky++) { \ + int sy = clamp(_start.y + ky, 0, _maxCoord.y); \ + for (int kx = 0; kx < 7; kx++) { \ + int sx = clamp(_start.x + kx, 0, _maxCoord.x); \ + vec4 _texel = texelFetch((layerSampler), ivec2(sx, sy), 0); \ + vec3 _linearSample = colorspace_plane_degamma_tf(_texel.rgb, (colorspace)); \ + vec3 _kernel = kSubpixelHorizontalRGBKernel[ky][kx].rgb; \ + _accum += _linearSample * _kernel; \ + _alpha += _texel.a * _alphaWeight; \ + } \ + } \ + (outColor) = vec4(_accum, clamp(_alpha, 0.0f, 1.0f)); \ + } while (false) - switch (shaderFilter) { - case filter_subpixel_oled: - for (int ky = 0; ky < 5; ky++) { - int sy = clamp(start.y + ky, 0, maxCoord.y); - for (int kx = 0; kx < 5; kx++) { - int sx = clamp(start.x + kx, 0, maxCoord.x); - vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); - vec3 kernel = kSubpixelOLEDKernel[ky][kx].rgb; - accum += linearSample * kernel; - alpha += texel.a * alphaWeight; - } - } - break; - case filter_subpixel_qdoled: - for (int ky = 0; ky < 5; ky++) { - int sy = clamp(start.y + ky, 0, maxCoord.y); - for (int kx = 0; kx < 5; kx++) { - int sx = clamp(start.x + kx, 0, maxCoord.x); - vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); - vec3 kernel = kSubpixelQDOLEDKernel[ky][kx].rgb; - accum += linearSample * kernel; - alpha += texel.a * alphaWeight; - } - } - break; - case filter_subpixel_rgb: - for (int ky = 0; ky < 7; ky++) { - int sy = clamp(start.y + ky, 0, maxCoord.y); - for (int kx = 0; kx < 7; kx++) { - int sx = clamp(start.x + kx, 0, maxCoord.x); - vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); - vec3 kernel = kSubpixelHorizontalRGBKernel[ky][kx].rgb; - accum += linearSample * kernel; - alpha += texel.a * alphaWeight; - } - } - break; - case filter_subpixel_vbgr: - for (int ky = 0; ky < 7; ky++) { - int sy = clamp(start.y + ky, 0, maxCoord.y); - for (int kx = 0; kx < 7; kx++) { - int sx = clamp(start.x + kx, 0, maxCoord.x); - vec4 texel = texelFetch(layerSampler, ivec2(sx, sy), 0); - vec3 linearSample = colorspace_plane_degamma_tf(texel.rgb, colorspace); - vec3 kernel = kSubpixelVerticalBGRKernel[ky][kx].rgb; - accum += linearSample * kernel; - alpha += texel.a * alphaWeight; - } - } - break; - } +#define SAMPLE_SUBPIXEL_VBGR_FILTER(layerSampler, coord, texSize, scale, colorspace, outColor) \ + do { \ + const vec2 _downscale = vec2(3.0f, 3.0f); \ + const ivec2 _downscaleInt = ivec2(3); \ + const float _alphaWeight = 1.0f / 49.0f; \ + ivec2 _outputIndex = ivec2(floor(((coord) - vec2(0.5f)) / _downscale)); \ + ivec2 _start = _outputIndex * _downscaleInt - ivec2(2); \ + ivec2 _maxCoord = (texSize) - ivec2(1); \ + vec3 _accum = vec3(0.0f); \ + float _alpha = 0.0f; \ + for (int ky = 0; ky < 7; ky++) { \ + int sy = clamp(_start.y + ky, 0, _maxCoord.y); \ + for (int kx = 0; kx < 7; kx++) { \ + int sx = clamp(_start.x + kx, 0, _maxCoord.x); \ + vec4 _texel = texelFetch((layerSampler), ivec2(sx, sy), 0); \ + vec3 _linearSample = colorspace_plane_degamma_tf(_texel.rgb, (colorspace)); \ + vec3 _kernel = kSubpixelVerticalBGRKernel[ky][kx].rgb; \ + _accum += _linearSample * _kernel; \ + _alpha += _texel.a * _alphaWeight; \ + } \ + } \ + (outColor) = vec4(_accum, clamp(_alpha, 0.0f, 1.0f)); \ + } while (false) - outColor = vec4(accum, clamp(alpha, 0.0f, 1.0f)); - return true; -} #endif