duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

gsvector_yuvtorgb_test.cpp (5716B)


      1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com>
      2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
      3 
      4 #include "common/bitutils.h"
      5 #include "common/gsvector.h"
      6 
      7 #include <gtest/gtest.h>
      8 
      9 #include <algorithm>
     10 #include <array>
     11 
     12 static void YUVToRGB_Vector(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk,
     13                             const std::array<s16, 64>& Yblk, u32* output, bool signed_output)
     14 {
     15   const GSVector4i addval = signed_output ? GSVector4i::cxpr(0) : GSVector4i::cxpr(0x80808080);
     16   for (u32 y = 0; y < 8; y++)
     17   {
     18     const GSVector4i Cr = GSVector4i::loadl(&Crblk[(y / 2) * 8]).s16to32();
     19     const GSVector4i Cb = GSVector4i::loadl(&Cbblk[(y / 2) * 8]).s16to32();
     20     const GSVector4i Y = GSVector4i::load<true>(&Yblk[y * 8]);
     21 
     22     // BT.601 YUV->RGB coefficients, rounding formula from Mednafen.
     23     // r = clamp(sext9(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval;
     24     // g = clamp(sext9(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) + addval
     25     // b = clamp(sext9<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval
     26 
     27     // Need to do the multiply as 32-bit, since 127 * 359 is greater than INT16_MAX.
     28     // upl16(self) = interleave XYZW0000 -> XXYYZZWW.
     29     const GSVector4i Crmul = Cr.mul32l(GSVector4i::cxpr(359)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32();
     30     const GSVector4i Cbmul = Cb.mul32l(GSVector4i::cxpr(454)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32();
     31     const GSVector4i CrCbmul = (Cb.mul32l(GSVector4i::cxpr(-88)) & GSVector4i::cxpr(~0x1F))
     32                                  .add32(Cr.mul32l(GSVector4i::cxpr(-183)) & GSVector4i::cxpr(~0x07))
     33                                  .add32(GSVector4i::cxpr(0x80))
     34                                  .sra32<8>()
     35                                  .ps32();
     36     const GSVector4i r = Crmul.upl16(Crmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);
     37     const GSVector4i g = CrCbmul.upl16(CrCbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);
     38     const GSVector4i b = Cbmul.upl16(Cbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval);
     39     const GSVector4i rg = r.upl8(g);
     40     const GSVector4i b0 = b.upl8();
     41     const GSVector4i rgblow = rg.upl16(b0);
     42     const GSVector4i rgbhigh = rg.uph16(b0);
     43 
     44     GSVector4i::store<false>(&output[y * 8 + 0], rgblow);
     45     GSVector4i::store<false>(&output[y * 8 + 4], rgbhigh);
     46   }
     47 }
     48 
     49 static void YUVToRGB_Scalar(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk,
     50                             const std::array<s16, 64>& Yblk, u32* output, bool signed_output)
     51 {
     52   const s32 addval = signed_output ? 0 : 0x80;
     53   for (u32 y = 0; y < 8; y++)
     54   {
     55     for (u32 x = 0; x < 8; x++)
     56     {
     57       const s32 Cr = Crblk[(x / 2) + (y / 2) * 8];
     58       const s32 Cb = Cbblk[(x / 2) + (y / 2) * 8];
     59       const s32 Y = Yblk[x + y * 8];
     60 
     61       // BT.601 YUV->RGB coefficients, rounding from Mednafen.
     62       const s32 r = std::clamp(SignExtendN<9, s32>(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval;
     63       const s32 g =
     64         std::clamp(SignExtendN<9, s32>(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) +
     65         addval;
     66       const s32 b = std::clamp(SignExtendN<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval;
     67 
     68       output[y * 8 + x] =
     69         static_cast<u32>(Truncate8(r)) | (static_cast<u32>(Truncate8(g)) << 8) | (static_cast<u32>(Truncate8(b)) << 16);
     70     }
     71   }
     72 }
     73 
     74 TEST(GSVector, YUVToRGB)
     75 {
     76   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
     77   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
     78   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
     79   for (s16 i = -128; i < 128; i++)
     80   {
     81     for (u32 j = 0; j < 64; j++)
     82       crblk[j] = i;
     83 
     84     for (s16 k = -128; k < 128; k++)
     85     {
     86       for (u32 j = 0; j < 64; j++)
     87         cbblk[j] = k;
     88 
     89       for (s16 l = -128; l < 128; l++)
     90       {
     91         for (u32 j = 0; j < 64; j++)
     92           yblk[j] = l;
     93 
     94         alignas(VECTOR_ALIGNMENT) u32 rows[64];
     95         YUVToRGB_Scalar(crblk, cbblk, yblk, rows, false);
     96 
     97         alignas(VECTOR_ALIGNMENT) u32 rowv[64];
     98         YUVToRGB_Vector(crblk, cbblk, yblk, rowv, false);
     99         ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);
    100 
    101         YUVToRGB_Scalar(crblk, cbblk, yblk, rows, true);
    102         YUVToRGB_Vector(crblk, cbblk, yblk, rowv, true);
    103         ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0);
    104       }
    105     }
    106   }
    107 }
    108 
    109 #if 0
    110 // Performance test
    111 alignas(VECTOR_ALIGNMENT) u32 g_gsvector_yuvtorgb_temp[64];
    112 
    113 TEST(GSVector, YUVToRGB_Scalar)
    114 {
    115   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
    116   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
    117   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
    118   for (s16 i = -128; i < 128; i++)
    119   {
    120     for (u32 j = 0; j < 64; j++)
    121       crblk[j] = i;
    122 
    123     for (s16 k = -128; k < 128; k++)
    124     {
    125       for (u32 j = 0; j < 64; j++)
    126         cbblk[j] = k;
    127 
    128       for (s16 l = -128; l < 128; l++)
    129       {
    130         for (u32 j = 0; j < 64; j++)
    131           yblk[j] = l;
    132 
    133         YUVToRGB_Scalar(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false);
    134       }
    135     }
    136   }
    137 }
    138 
    139 TEST(GSVector, YUVToRGB_Vector)
    140 {
    141   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk;
    142   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk;
    143   alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk;
    144   for (s16 i = -128; i < 128; i++)
    145   {
    146     for (u32 j = 0; j < 64; j++)
    147       crblk[j] = i;
    148 
    149     for (s16 k = -128; k < 128; k++)
    150     {
    151       for (u32 j = 0; j < 64; j++)
    152         cbblk[j] = k;
    153 
    154       for (s16 l = -128; l < 128; l++)
    155       {
    156         for (u32 j = 0; j < 64; j++)
    157           yblk[j] = l;
    158 
    159         YUVToRGB_Vector(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false);
    160       }
    161     }
    162   }
    163 }
    164 
    165 #endif