gsvector_yuvtorgb_test.cpp (5716B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 #include "common/bitutils.h" 5 #include "common/gsvector.h" 6 7 #include <gtest/gtest.h> 8 9 #include <algorithm> 10 #include <array> 11 12 static void YUVToRGB_Vector(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk, 13 const std::array<s16, 64>& Yblk, u32* output, bool signed_output) 14 { 15 const GSVector4i addval = signed_output ? GSVector4i::cxpr(0) : GSVector4i::cxpr(0x80808080); 16 for (u32 y = 0; y < 8; y++) 17 { 18 const GSVector4i Cr = GSVector4i::loadl(&Crblk[(y / 2) * 8]).s16to32(); 19 const GSVector4i Cb = GSVector4i::loadl(&Cbblk[(y / 2) * 8]).s16to32(); 20 const GSVector4i Y = GSVector4i::load<true>(&Yblk[y * 8]); 21 22 // BT.601 YUV->RGB coefficients, rounding formula from Mednafen. 23 // r = clamp(sext9(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval; 24 // g = clamp(sext9(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) + addval 25 // b = clamp(sext9<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval 26 27 // Need to do the multiply as 32-bit, since 127 * 359 is greater than INT16_MAX. 28 // upl16(self) = interleave XYZW0000 -> XXYYZZWW. 29 const GSVector4i Crmul = Cr.mul32l(GSVector4i::cxpr(359)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32(); 30 const GSVector4i Cbmul = Cb.mul32l(GSVector4i::cxpr(454)).add16(GSVector4i::cxpr(0x80)).sra32<8>().ps32(); 31 const GSVector4i CrCbmul = (Cb.mul32l(GSVector4i::cxpr(-88)) & GSVector4i::cxpr(~0x1F)) 32 .add32(Cr.mul32l(GSVector4i::cxpr(-183)) & GSVector4i::cxpr(~0x07)) 33 .add32(GSVector4i::cxpr(0x80)) 34 .sra32<8>() 35 .ps32(); 36 const GSVector4i r = Crmul.upl16(Crmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval); 37 const GSVector4i g = CrCbmul.upl16(CrCbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval); 38 const GSVector4i b = Cbmul.upl16(Cbmul).add16(Y).sll16<7>().sra16<7>().ps16().add8(addval); 39 const GSVector4i rg = r.upl8(g); 40 const GSVector4i b0 = b.upl8(); 41 const GSVector4i rgblow = rg.upl16(b0); 42 const GSVector4i rgbhigh = rg.uph16(b0); 43 44 GSVector4i::store<false>(&output[y * 8 + 0], rgblow); 45 GSVector4i::store<false>(&output[y * 8 + 4], rgbhigh); 46 } 47 } 48 49 static void YUVToRGB_Scalar(const std::array<s16, 64>& Crblk, const std::array<s16, 64>& Cbblk, 50 const std::array<s16, 64>& Yblk, u32* output, bool signed_output) 51 { 52 const s32 addval = signed_output ? 0 : 0x80; 53 for (u32 y = 0; y < 8; y++) 54 { 55 for (u32 x = 0; x < 8; x++) 56 { 57 const s32 Cr = Crblk[(x / 2) + (y / 2) * 8]; 58 const s32 Cb = Cbblk[(x / 2) + (y / 2) * 8]; 59 const s32 Y = Yblk[x + y * 8]; 60 61 // BT.601 YUV->RGB coefficients, rounding from Mednafen. 62 const s32 r = std::clamp(SignExtendN<9, s32>(Y + (((359 * Cr) + 0x80) >> 8)), -128, 127) + addval; 63 const s32 g = 64 std::clamp(SignExtendN<9, s32>(Y + ((((-88 * Cb) & ~0x1F) + ((-183 * Cr) & ~0x07) + 0x80) >> 8)), -128, 127) + 65 addval; 66 const s32 b = std::clamp(SignExtendN<9, s32>(Y + (((454 * Cb) + 0x80) >> 8)), -128, 127) + addval; 67 68 output[y * 8 + x] = 69 static_cast<u32>(Truncate8(r)) | (static_cast<u32>(Truncate8(g)) << 8) | (static_cast<u32>(Truncate8(b)) << 16); 70 } 71 } 72 } 73 74 TEST(GSVector, YUVToRGB) 75 { 76 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk; 77 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk; 78 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk; 79 for (s16 i = -128; i < 128; i++) 80 { 81 for (u32 j = 0; j < 64; j++) 82 crblk[j] = i; 83 84 for (s16 k = -128; k < 128; k++) 85 { 86 for (u32 j = 0; j < 64; j++) 87 cbblk[j] = k; 88 89 for (s16 l = -128; l < 128; l++) 90 { 91 for (u32 j = 0; j < 64; j++) 92 yblk[j] = l; 93 94 alignas(VECTOR_ALIGNMENT) u32 rows[64]; 95 YUVToRGB_Scalar(crblk, cbblk, yblk, rows, false); 96 97 alignas(VECTOR_ALIGNMENT) u32 rowv[64]; 98 YUVToRGB_Vector(crblk, cbblk, yblk, rowv, false); 99 ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0); 100 101 YUVToRGB_Scalar(crblk, cbblk, yblk, rows, true); 102 YUVToRGB_Vector(crblk, cbblk, yblk, rowv, true); 103 ASSERT_EQ(std::memcmp(rows, rowv, sizeof(rows)), 0); 104 } 105 } 106 } 107 } 108 109 #if 0 110 // Performance test 111 alignas(VECTOR_ALIGNMENT) u32 g_gsvector_yuvtorgb_temp[64]; 112 113 TEST(GSVector, YUVToRGB_Scalar) 114 { 115 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk; 116 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk; 117 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk; 118 for (s16 i = -128; i < 128; i++) 119 { 120 for (u32 j = 0; j < 64; j++) 121 crblk[j] = i; 122 123 for (s16 k = -128; k < 128; k++) 124 { 125 for (u32 j = 0; j < 64; j++) 126 cbblk[j] = k; 127 128 for (s16 l = -128; l < 128; l++) 129 { 130 for (u32 j = 0; j < 64; j++) 131 yblk[j] = l; 132 133 YUVToRGB_Scalar(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false); 134 } 135 } 136 } 137 } 138 139 TEST(GSVector, YUVToRGB_Vector) 140 { 141 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> crblk; 142 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> cbblk; 143 alignas(VECTOR_ALIGNMENT) std::array<s16, 64> yblk; 144 for (s16 i = -128; i < 128; i++) 145 { 146 for (u32 j = 0; j < 64; j++) 147 crblk[j] = i; 148 149 for (s16 k = -128; k < 128; k++) 150 { 151 for (u32 j = 0; j < 64; j++) 152 cbblk[j] = k; 153 154 for (s16 l = -128; l < 128; l++) 155 { 156 for (u32 j = 0; j < 64; j++) 157 yblk[j] = l; 158 159 YUVToRGB_Vector(crblk, cbblk, yblk, g_gsvector_yuvtorgb_temp, false); 160 } 161 } 162 } 163 } 164 165 #endif