gsvector_nosimd.h (78262B)
1 // SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: LGPL-3.0+ 3 4 // Implementation of GSVector4/GSVector4i when the host does not support any form of SIMD. 5 6 #pragma once 7 8 #include "common/types.h" 9 10 #include <algorithm> 11 #include <cmath> 12 #include <cstring> 13 14 #define GSVECTOR_HAS_UNSIGNED 1 15 #define GSVECTOR_HAS_SRLV 1 16 17 class GSVector2; 18 class GSVector2i; 19 class GSVector4; 20 class GSVector4i; 21 22 #define SSATURATE8(expr) static_cast<s8>(std::clamp<decltype(expr)>(expr, -128, 127)) 23 #define USATURATE8(expr) static_cast<u8>(std::clamp<decltype(expr)>(expr, 0, 255)) 24 #define SSATURATE16(expr) static_cast<s16>(std::clamp<decltype(expr)>(expr, -32768, 32767)) 25 #define USATURATE16(expr) static_cast<u16>(std::clamp<decltype(expr)>(expr, 0, 65535)) 26 27 #define ALL_LANES_8(expr) \ 28 GSVector2i ret; \ 29 for (size_t i = 0; i < 8; i++) \ 30 expr; \ 31 return ret; 32 #define ALL_LANES_16(expr) \ 33 GSVector2i ret; \ 34 for (size_t i = 0; i < 4; i++) \ 35 expr; \ 36 return ret; 37 #define ALL_LANES_32(expr) \ 38 GSVector2i ret; \ 39 for (size_t i = 0; i < 2; i++) \ 40 expr; \ 41 return ret; 42 43 class alignas(16) GSVector2i 44 { 45 struct cxpr_init_tag 46 { 47 }; 48 static constexpr cxpr_init_tag cxpr_init{}; 49 50 constexpr GSVector2i(cxpr_init_tag, s32 x, s32 y) : S32{x, y} {} 51 52 constexpr GSVector2i(cxpr_init_tag, s16 s0, s16 s1, s16 s2, s16 s3) : S16{s0, s1, s2, s3} {} 53 54 constexpr GSVector2i(cxpr_init_tag, s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7) 55 : S8{b0, b1, b2, b3, b4, b5, b6, b7} 56 { 57 } 58 59 public: 60 union 61 { 62 struct 63 { 64 s32 x, y; 65 }; 66 struct 67 { 68 s32 r, g; 69 }; 70 float F32[2]; 71 s8 S8[8]; 72 s16 S16[4]; 73 s32 S32[2]; 74 s64 S64[1]; 75 u8 U8[8]; 76 u16 U16[4]; 77 u32 U32[2]; 78 u64 U64[1]; 79 }; 80 81 GSVector2i() = default; 82 83 ALWAYS_INLINE constexpr static GSVector2i cxpr(s32 x, s32 y) { return GSVector2i(cxpr_init, x, y); } 84 85 ALWAYS_INLINE constexpr static GSVector2i cxpr(s32 x) { return GSVector2i(cxpr_init, x, x); } 86 87 ALWAYS_INLINE constexpr static GSVector2i cxpr16(s16 x) { return GSVector2i(cxpr_init, x, x, x, x); } 88 89 ALWAYS_INLINE constexpr static GSVector2i cxpr16(s16 s0, s16 s1, s16 s2, s16 s3) 90 { 91 return GSVector2i(cxpr_init, s0, s1, s2, s3); 92 } 93 94 ALWAYS_INLINE constexpr static GSVector2i cxpr8(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7) 95 { 96 return GSVector2i(cxpr_init, b0, b1, b2, b3, b4, b5, b6, b7); 97 } 98 99 ALWAYS_INLINE GSVector2i(s32 x, s32 y) 100 { 101 this->x = x; 102 this->y = y; 103 } 104 105 ALWAYS_INLINE GSVector2i(s16 s0, s16 s1, s16 s2, s16 s3) 106 { 107 S16[0] = s0; 108 S16[1] = s1; 109 S16[2] = s2; 110 S16[3] = s3; 111 } 112 113 ALWAYS_INLINE constexpr GSVector2i(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7) 114 : S8{b0, b1, b2, b3, b4, b5, b6, b7} 115 { 116 } 117 118 ALWAYS_INLINE GSVector2i(const GSVector2i& v) { std::memcpy(S32, v.S32, sizeof(S32)); } 119 120 // MSVC has bad codegen for the constexpr version when applied to non-constexpr things (https://godbolt.org/z/h8qbn7), 121 // so leave the non-constexpr version default 122 ALWAYS_INLINE explicit GSVector2i(s32 i) { *this = i; } 123 124 ALWAYS_INLINE explicit GSVector2i(const GSVector2& v); 125 126 ALWAYS_INLINE static GSVector2i cast(const GSVector2& v); 127 128 ALWAYS_INLINE void operator=(const GSVector2i& v) { std::memcpy(S32, v.S32, sizeof(S32)); } 129 ALWAYS_INLINE void operator=(s32 i) 130 { 131 x = i; 132 y = i; 133 } 134 135 ALWAYS_INLINE GSVector2i sat_i8(const GSVector2i& min, const GSVector2i& max) const 136 { 137 return max_i8(min).min_i8(max); 138 } 139 ALWAYS_INLINE GSVector2i sat_i16(const GSVector2i& min, const GSVector2i& max) const 140 { 141 return max_i16(min).min_i16(max); 142 } 143 ALWAYS_INLINE GSVector2i sat_i32(const GSVector2i& min, const GSVector2i& max) const 144 { 145 return max_i32(min).min_i32(max); 146 } 147 148 ALWAYS_INLINE GSVector2i sat_u8(const GSVector2i& min, const GSVector2i& max) const 149 { 150 return max_u8(min).min_u8(max); 151 } 152 ALWAYS_INLINE GSVector2i sat_u16(const GSVector2i& min, const GSVector2i& max) const 153 { 154 return max_u16(min).min_u16(max); 155 } 156 ALWAYS_INLINE GSVector2i sat_u32(const GSVector2i& min, const GSVector2i& max) const 157 { 158 return max_u32(min).min_u32(max); 159 } 160 161 GSVector2i min_i8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); } 162 GSVector2i max_i8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); } 163 GSVector2i min_i16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); } 164 GSVector2i max_i16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); } 165 GSVector2i min_i32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); } 166 GSVector2i max_i32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); } 167 168 GSVector2i min_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); } 169 GSVector2i max_u8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); } 170 GSVector2i min_u16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = std::min(U16[i], v.U16[i])); } 171 GSVector2i max_u16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = std::max(U16[i], v.U16[i])); } 172 GSVector2i min_u32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = std::min(U32[i], v.U32[i])); } 173 GSVector2i max_u32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = std::max(U32[i], v.U32[i])); } 174 175 s32 addv_s32() const { return (S32[0] + S32[1]); } 176 177 u8 minv_u8() const 178 { 179 return std::min( 180 U8[0], 181 std::min(U8[1], std::min(U8[2], std::min(U8[3], std::min(U8[4], std::min(U8[5], std::min(U8[6], U8[7]))))))); 182 } 183 184 u16 maxv_u8() const 185 { 186 return std::max( 187 U8[0], 188 std::max(U8[1], std::max(U8[2], std::max(U8[3], std::max(U8[4], std::max(U8[5], std::max(U8[6], U8[7]))))))); 189 } 190 191 u16 minv_u16() const { return std::min(U16[0], std::min(U16[1], std::min(U16[2], U16[3]))); } 192 193 u16 maxv_u16() const { return std::max(U16[0], std::max(U16[1], std::max(U16[2], U16[3]))); } 194 195 s32 minv_s32() const { return std::min(x, y); } 196 197 u32 minv_u32() const { return std::min(U32[0], U32[1]); } 198 199 s32 maxv_s32() const { return std::max(x, y); } 200 201 u32 maxv_u32() const { return std::max(U32[0], U32[1]); } 202 203 ALWAYS_INLINE GSVector2i clamp8() const { return pu16().upl8(); } 204 205 GSVector2i blend8(const GSVector2i& v, const GSVector2i& mask) const 206 { 207 GSVector2i ret; 208 for (size_t i = 0; i < 8; i++) 209 ret.U8[i] = (mask.U8[i] & 0x80) ? v.U8[i] : U8[i]; 210 return ret; 211 } 212 213 template<s32 mask> 214 GSVector2i blend16(const GSVector2i& v) const 215 { 216 GSVector2i ret; 217 for (size_t i = 0; i < 4; i++) 218 ret.U16[i] = ((mask & (1 << i)) != 0) ? v.U16[i] : U16[i]; 219 return ret; 220 } 221 222 template<s32 mask> 223 GSVector2i blend32(const GSVector2i& v) const 224 { 225 GSVector2i ret; 226 for (size_t i = 0; i < 2; i++) 227 ret.U32[i] = ((mask & (1 << i)) != 0) ? v.U32[i] : U32[i]; 228 return ret; 229 } 230 231 GSVector2i blend(const GSVector2i& v, const GSVector2i& mask) const 232 { 233 GSVector2i ret; 234 ret.U64[0] = (v.U64[0] & mask.U64[0]); 235 return ret; 236 } 237 238 ALWAYS_INLINE GSVector2i mix16(const GSVector2i& v) const { return blend16<0xa>(v); } 239 240 GSVector2i shuffle8(const GSVector2i& mask) const 241 { 242 ALL_LANES_8(ret.S8[i] = (mask.S8[i] & 0x80) ? 0 : (S8[mask.S8[i] & 0xf])); 243 } 244 245 GSVector2i ps16() const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S16[(i < 4) ? i : (i - 4)])); } 246 GSVector2i pu16() const { ALL_LANES_8(ret.U8[i] = USATURATE8(U16[(i < 4) ? i : (i - 4)])); } 247 GSVector2i ps32() const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S32[(i < 2) ? i : (i - 2)])); } 248 GSVector2i pu32() const { ALL_LANES_16(ret.U16[i] = USATURATE16(U32[(i < 2) ? i : (i - 2)])); } 249 250 GSVector2i upl8() const { return GSVector2i(S8[0], 0, S8[1], 0, S8[2], 0, S8[3], 0); } 251 252 GSVector2i upl16() const { return GSVector2i(S16[0], 0, S16[1], 0); } 253 254 GSVector2i upl32() const { return GSVector2i(S32[0], 0); } 255 256 GSVector2i i8to16() const { ALL_LANES_16(ret.S16[i] = S8[i]); } 257 258 template<s32 v> 259 GSVector2i srl() const 260 { 261 GSVector2i ret = {}; 262 if constexpr (v < 8) 263 { 264 for (s32 i = 0; i < (8 - v); i++) 265 ret.U8[i] = U8[v + i]; 266 } 267 return ret; 268 } 269 270 template<s32 v> 271 GSVector2i sll() const 272 { 273 GSVector2i ret = {}; 274 if constexpr (v < 8) 275 { 276 for (s32 i = 0; i < (8 - v); i++) 277 ret.U8[v + i] = U8[i]; 278 } 279 return ret; 280 } 281 282 template<s32 v> 283 GSVector2i sll16() const 284 { 285 ALL_LANES_16(ret.U16[i] = U16[i] << v); 286 } 287 288 GSVector2i sll16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v); } 289 290 GSVector2i sllv16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v.U16[i]); } 291 292 template<s32 v> 293 GSVector2i srl16() const 294 { 295 ALL_LANES_16(ret.U16[i] = U16[i] >> v); 296 } 297 298 GSVector2i srl16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v); } 299 300 GSVector2i srlv16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v.U16[i]); } 301 302 template<s32 v> 303 GSVector2i sra16() const 304 { 305 ALL_LANES_16(ret.S16[i] = S16[i] >> v); 306 } 307 308 GSVector2i sra16(s32 v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v); } 309 310 GSVector2i srav16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v.S16[i]); } 311 312 template<s32 v> 313 GSVector2i sll32() const 314 { 315 ALL_LANES_32(ret.U32[i] = U32[i] << v); 316 } 317 318 GSVector2i sll32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v); } 319 320 GSVector2i sllv32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v.U32[i]); } 321 322 template<s32 v> 323 GSVector2i srl32() const 324 { 325 ALL_LANES_32(ret.U32[i] = U32[i] >> v); 326 } 327 328 GSVector2i srl32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v); } 329 330 GSVector2i srlv32(const GSVector2i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v.U32[i]); } 331 332 template<s32 v> 333 GSVector2i sra32() const 334 { 335 ALL_LANES_32(ret.S32[i] = S32[i] >> v); 336 } 337 338 GSVector2i sra32(s32 v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v); } 339 340 GSVector2i srav32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v.S32[i]); } 341 342 GSVector2i add8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] + v.S8[i]); } 343 344 GSVector2i add16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] + v.S16[i]); } 345 346 GSVector2i add32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] + v.S32[i]); } 347 348 GSVector2i adds8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] + v.S8[i])); } 349 350 GSVector2i adds16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] + v.S16[i])); } 351 352 GSVector2i addus8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] + v.U8[i])); } 353 354 GSVector2i addus16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] + v.U16[i])); } 355 356 GSVector2i sub8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] - v.S8[i]); } 357 358 GSVector2i sub16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] - v.S16[i]); } 359 360 GSVector2i sub32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] - v.S32[i]); } 361 362 GSVector2i subs8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] - v.S8[i])); } 363 364 GSVector2i subs16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] - v.S16[i])); } 365 366 GSVector2i subus8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] - v.U8[i])); } 367 368 GSVector2i subus16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] - v.U16[i])); } 369 370 GSVector2i avg8(const GSVector2i& v) const { ALL_LANES_8(ret.U8[i] = (U8[i] + v.U8[i]) >> 1); } 371 372 GSVector2i avg16(const GSVector2i& v) const { ALL_LANES_16(ret.U16[i] = (U16[i] + v.U16[i]) >> 1); } 373 374 GSVector2i mul16l(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] * v.S16[i]); } 375 376 GSVector2i mul32l(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] * v.S32[i]); } 377 378 ALWAYS_INLINE bool eq(const GSVector2i& v) const { return (std::memcmp(S32, v.S32, sizeof(S32))) == 0; } 379 380 GSVector2i eq8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] == v.S8[i]) ? -1 : 0); } 381 GSVector2i eq16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] == v.S16[i]) ? -1 : 0); } 382 GSVector2i eq32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] == v.S32[i]) ? -1 : 0); } 383 384 GSVector2i neq8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] != v.S8[i]) ? -1 : 0); } 385 GSVector2i neq16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] != v.S16[i]) ? -1 : 0); } 386 GSVector2i neq32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] != v.S32[i]) ? -1 : 0); } 387 388 GSVector2i gt8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] > v.S8[i]) ? -1 : 0); } 389 GSVector2i gt16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] > v.S16[i]) ? -1 : 0); } 390 GSVector2i gt32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] > v.S32[i]) ? -1 : 0); } 391 392 GSVector2i ge8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] >= v.S8[i]) ? -1 : 0); } 393 GSVector2i ge16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] >= v.S16[i]) ? -1 : 0); } 394 GSVector2i ge32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] >= v.S32[i]) ? -1 : 0); } 395 396 GSVector2i lt8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] < v.S8[i]) ? -1 : 0); } 397 GSVector2i lt16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] < v.S16[i]) ? -1 : 0); } 398 GSVector2i lt32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] < v.S32[i]) ? -1 : 0); } 399 400 GSVector2i le8(const GSVector2i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] <= v.S8[i]) ? -1 : 0); } 401 GSVector2i le16(const GSVector2i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] <= v.S16[i]) ? -1 : 0); } 402 GSVector2i le32(const GSVector2i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] <= v.S32[i]) ? -1 : 0); } 403 404 ALWAYS_INLINE GSVector2i andnot(const GSVector2i& v) const 405 { 406 GSVector2i ret; 407 ret.U64[0] = (~v.U64[0]) & U64[0]; 408 return ret; 409 } 410 411 s32 mask() const 412 { 413 return static_cast<s32>((static_cast<u32>(U8[0] >> 7) << 0) | (static_cast<u32>(U8[1] >> 7) << 1) | 414 (static_cast<u32>(U8[2] >> 7) << 2) | (static_cast<u32>(U8[3] >> 7) << 3) | 415 (static_cast<u32>(U8[4] >> 7) << 4) | (static_cast<u32>(U8[5] >> 7) << 5) | 416 (static_cast<u32>(U8[6] >> 7) << 6) | (static_cast<u32>(U8[7] >> 7) << 7)); 417 } 418 419 ALWAYS_INLINE bool alltrue() const { return (U64[0] == 0xFFFFFFFFFFFFFFFFULL); } 420 421 ALWAYS_INLINE bool allfalse() const { return (U64[0] == 0); } 422 423 template<s32 i> 424 ALWAYS_INLINE GSVector2i insert8(s32 a) const 425 { 426 GSVector2i ret = *this; 427 ret.S8[i] = static_cast<s8>(a); 428 return ret; 429 } 430 431 template<s32 i> 432 ALWAYS_INLINE s32 extract8() const 433 { 434 return S8[i]; 435 } 436 437 template<s32 i> 438 ALWAYS_INLINE GSVector2i insert16(s32 a) const 439 { 440 GSVector2i ret = *this; 441 ret.S16[i] = static_cast<s16>(a); 442 return ret; 443 } 444 445 template<s32 i> 446 ALWAYS_INLINE s32 extract16() const 447 { 448 return S16[i]; 449 } 450 451 template<s32 i> 452 ALWAYS_INLINE GSVector2i insert32(s32 a) const 453 { 454 GSVector2i ret = *this; 455 ret.S32[i] = a; 456 return ret; 457 } 458 459 template<s32 i> 460 ALWAYS_INLINE s32 extract32() const 461 { 462 return S32[i]; 463 } 464 465 ALWAYS_INLINE static GSVector2i load32(const void* p) 466 { 467 GSVector2i ret; 468 std::memcpy(&ret.x, p, sizeof(s32)); 469 ret.y = 0; 470 return ret; 471 } 472 473 ALWAYS_INLINE static GSVector2i load(const void* p) 474 { 475 GSVector2i ret; 476 std::memcpy(ret.S32, p, sizeof(ret.S32)); 477 return ret; 478 } 479 480 ALWAYS_INLINE static GSVector2i load(s32 i) 481 { 482 GSVector2i ret; 483 ret.x = i; 484 return ret; 485 } 486 487 ALWAYS_INLINE static void store(void* p, const GSVector2i& v) { std::memcpy(p, v.S32, sizeof(S32)); } 488 489 ALWAYS_INLINE static void store32(void* p, const GSVector2i& v) { std::memcpy(p, &v.x, sizeof(s32)); } 490 491 ALWAYS_INLINE static s32 store(const GSVector2i& v) { return v.x; } 492 493 ALWAYS_INLINE void operator&=(const GSVector2i& v) { U64[0] &= v.U64[0]; } 494 ALWAYS_INLINE void operator|=(const GSVector2i& v) { U64[0] |= v.U64[0]; } 495 ALWAYS_INLINE void operator^=(const GSVector2i& v) { U64[0] ^= v.U64[0]; } 496 497 ALWAYS_INLINE friend GSVector2i operator&(const GSVector2i& v1, const GSVector2i& v2) 498 { 499 GSVector2i ret; 500 ret.U64[0] = v1.U64[0] & v2.U64[0]; 501 return ret; 502 } 503 504 ALWAYS_INLINE friend GSVector2i operator|(const GSVector2i& v1, const GSVector2i& v2) 505 { 506 GSVector2i ret; 507 ret.U64[0] = v1.U64[0] | v2.U64[0]; 508 return ret; 509 } 510 511 ALWAYS_INLINE friend GSVector2i operator^(const GSVector2i& v1, const GSVector2i& v2) 512 { 513 GSVector2i ret; 514 ret.U64[0] = v1.U64[0] ^ v2.U64[0]; 515 return ret; 516 } 517 518 ALWAYS_INLINE friend GSVector2i operator&(const GSVector2i& v, s32 i) { return v & GSVector2i(i); } 519 520 ALWAYS_INLINE friend GSVector2i operator|(const GSVector2i& v, s32 i) { return v | GSVector2i(i); } 521 522 ALWAYS_INLINE friend GSVector2i operator^(const GSVector2i& v, s32 i) { return v ^ GSVector2i(i); } 523 524 ALWAYS_INLINE friend GSVector2i operator~(const GSVector2i& v) { return v ^ v.eq32(v); } 525 526 ALWAYS_INLINE static constexpr GSVector2i zero() { return GSVector2i::cxpr(0, 0); } 527 528 ALWAYS_INLINE GSVector2i xy() const { return *this; } 529 ALWAYS_INLINE GSVector2i xx() const { return GSVector2i(x, x); } 530 ALWAYS_INLINE GSVector2i yx() const { return GSVector2i(y, x); } 531 ALWAYS_INLINE GSVector2i yy() const { return GSVector2i(y, y); } 532 }; 533 534 class alignas(16) GSVector2 535 { 536 struct cxpr_init_tag 537 { 538 }; 539 static constexpr cxpr_init_tag cxpr_init{}; 540 541 constexpr GSVector2(cxpr_init_tag, float x, float y) : F32{x, y} {} 542 543 constexpr GSVector2(cxpr_init_tag, int x, int y) : I32{x, y} {} 544 545 public: 546 union 547 { 548 struct 549 { 550 float x, y; 551 }; 552 struct 553 { 554 float r, g; 555 }; 556 float F32[4]; 557 double F64[2]; 558 s8 I8[16]; 559 s16 I16[8]; 560 s32 I32[4]; 561 s64 I64[2]; 562 u8 U8[16]; 563 u16 U16[8]; 564 u32 U32[4]; 565 u64 U64[2]; 566 }; 567 568 GSVector2() = default; 569 570 constexpr static GSVector2 cxpr(float x, float y) { return GSVector2(cxpr_init, x, y); } 571 572 constexpr static GSVector2 cxpr(float x) { return GSVector2(cxpr_init, x, x); } 573 574 constexpr static GSVector2 cxpr(int x, int y) { return GSVector2(cxpr_init, x, y); } 575 576 constexpr static GSVector2 cxpr(int x) { return GSVector2(cxpr_init, x, x); } 577 578 ALWAYS_INLINE GSVector2(float x, float y) 579 { 580 this->x = x; 581 this->y = y; 582 } 583 584 ALWAYS_INLINE GSVector2(int x, int y) 585 { 586 this->x = static_cast<float>(x); 587 this->y = static_cast<float>(y); 588 } 589 590 ALWAYS_INLINE explicit GSVector2(float f) { x = y = f; } 591 592 ALWAYS_INLINE explicit GSVector2(int i) { x = y = static_cast<float>(i); } 593 594 ALWAYS_INLINE explicit GSVector2(const GSVector2i& v); 595 596 ALWAYS_INLINE static GSVector2 cast(const GSVector2i& v); 597 598 ALWAYS_INLINE void operator=(float f) { x = y = f; } 599 600 GSVector2 abs() const { return GSVector2(std::fabs(x), std::fabs(y)); } 601 602 GSVector2 neg() const { return GSVector2(-x, -y); } 603 604 GSVector2 rcp() const { return GSVector2(1.0f / x, 1.0f / y); } 605 606 GSVector2 floor() const { return GSVector2(std::floor(x), std::floor(y)); } 607 608 GSVector2 ceil() const { return GSVector2(std::ceil(x), std::ceil(y)); } 609 610 GSVector2 sat(const GSVector2& min, const GSVector2& max) const 611 { 612 return GSVector2(std::clamp(x, min.x, max.x), std::clamp(y, min.y, max.y)); 613 } 614 615 GSVector2 sat(const float scale = 255) const { return sat(zero(), GSVector2(scale)); } 616 617 GSVector2 clamp(const float scale = 255) const { return min(GSVector2(scale)); } 618 619 GSVector2 min(const GSVector2& v) const { return GSVector2(std::min(x, v.x), std::min(y, v.y)); } 620 621 GSVector2 max(const GSVector2& v) const { return GSVector2(std::max(x, v.x), std::max(y, v.y)); } 622 623 template<int mask> 624 GSVector2 blend32(const GSVector2& v) const 625 { 626 return GSVector2(v.F32[mask & 1], v.F32[(mask >> 1) & 1]); 627 } 628 629 ALWAYS_INLINE GSVector2 blend32(const GSVector2& v, const GSVector2& mask) const 630 { 631 return GSVector2((mask.U32[0] & 0x80000000u) ? v.x : x, (mask.U32[1] & 0x80000000u) ? v.y : y); 632 } 633 634 ALWAYS_INLINE GSVector2 andnot(const GSVector2& v) const 635 { 636 GSVector2 ret; 637 ret.U32[0] = ((~v.U32[0]) & U32[0]); 638 ret.U32[1] = ((~v.U32[1]) & U32[1]); 639 return ret; 640 } 641 642 ALWAYS_INLINE int mask() const { return (U32[0] >> 31) | ((U32[1] >> 30) & 2); } 643 644 ALWAYS_INLINE bool alltrue() const { return (U64[0] == 0xFFFFFFFFFFFFFFFFULL); } 645 646 ALWAYS_INLINE bool allfalse() const { return (U64[0] == 0); } 647 648 ALWAYS_INLINE GSVector2 replace_nan(const GSVector2& v) const { return v.blend32(*this, *this == *this); } 649 650 template<int src, int dst> 651 ALWAYS_INLINE GSVector2 insert32(const GSVector2& v) const 652 { 653 GSVector2 ret = *this; 654 ret.F32[dst] = v.F32[src]; 655 return ret; 656 } 657 658 template<int i> 659 ALWAYS_INLINE int extract32() const 660 { 661 return I32[i]; 662 } 663 664 ALWAYS_INLINE float dot(const GSVector2& v) const { return (x * v.x + y * v.y); } 665 666 ALWAYS_INLINE static constexpr GSVector2 zero() { return GSVector2::cxpr(0.0f, 0.0f); } 667 668 ALWAYS_INLINE static constexpr GSVector2 xffffffff() 669 { 670 GSVector2 ret = zero(); 671 ret.U64[0] = ~ret.U64[0]; 672 return ret; 673 } 674 675 ALWAYS_INLINE static GSVector2 load(float f) { return GSVector2(f, f); } 676 677 ALWAYS_INLINE static GSVector2 load(const void* p) 678 { 679 GSVector2 ret; 680 std::memcpy(ret.F32, p, sizeof(F32)); 681 return ret; 682 } 683 684 ALWAYS_INLINE static void store(void* p, const GSVector2& v) { std::memcpy(p, &v.F32, sizeof(F32)); } 685 686 ALWAYS_INLINE GSVector2 operator-() const { return neg(); } 687 688 void operator+=(const GSVector2& v_) 689 { 690 x = x + v_.x; 691 y = y + v_.y; 692 } 693 void operator-=(const GSVector2& v_) 694 { 695 x = x - v_.x; 696 y = y - v_.y; 697 } 698 void operator*=(const GSVector2& v_) 699 { 700 x = x * v_.x; 701 y = y * v_.y; 702 } 703 void operator/=(const GSVector2& v_) 704 { 705 x = x / v_.x; 706 y = y / v_.y; 707 } 708 709 void operator+=(const float v_) 710 { 711 x = x + v_; 712 y = y + v_; 713 } 714 void operator-=(const float v_) 715 { 716 x = x - v_; 717 y = y - v_; 718 } 719 void operator*=(const float v_) 720 { 721 x = x * v_; 722 y = y * v_; 723 } 724 void operator/=(const float v_) 725 { 726 x = x / v_; 727 y = y / v_; 728 } 729 730 void operator&=(const GSVector2& v_) { U64[0] &= v_.U64[0]; } 731 void operator|=(const GSVector2& v_) { U64[0] |= v_.U64[0]; } 732 void operator^=(const GSVector2& v_) { U64[0] ^= v_.U64[0]; } 733 734 friend GSVector2 operator+(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x + v2.x, v1.y + v2.y); } 735 736 friend GSVector2 operator-(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x - v2.x, v1.y - v2.y); } 737 738 friend GSVector2 operator*(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x * v2.x, v1.y * v2.y); } 739 740 friend GSVector2 operator/(const GSVector2& v1, const GSVector2& v2) { return GSVector2(v1.x / v2.x, v1.y / v2.y); } 741 742 friend GSVector2 operator+(const GSVector2& v, float f) { return GSVector2(v.x + f, v.y + f); } 743 744 friend GSVector2 operator-(const GSVector2& v, float f) { return GSVector2(v.x - f, v.y - f); } 745 746 friend GSVector2 operator*(const GSVector2& v, float f) { return GSVector2(v.x * f, v.y * f); } 747 748 friend GSVector2 operator/(const GSVector2& v, float f) { return GSVector2(v.x / f, v.y / f); } 749 750 friend GSVector2 operator&(const GSVector2& v1, const GSVector2& v2) 751 { 752 GSVector2 ret; 753 ret.U64[0] = v1.U64[0] & v2.U64[0]; 754 return ret; 755 } 756 757 ALWAYS_INLINE friend GSVector2 operator|(const GSVector2& v1, const GSVector2& v2) 758 { 759 GSVector2 ret; 760 ret.U64[0] = v1.U64[0] | v2.U64[0]; 761 return ret; 762 } 763 764 ALWAYS_INLINE friend GSVector2 operator^(const GSVector2& v1, const GSVector2& v2) 765 { 766 GSVector2 ret; 767 ret.U64[0] = v1.U64[0] ^ v2.U64[0]; 768 return ret; 769 } 770 771 ALWAYS_INLINE friend GSVector2 operator==(const GSVector2& v1, const GSVector2& v2) 772 { 773 GSVector2 ret; 774 ret.I32[0] = (v1.x == v2.x) ? -1 : 0; 775 ret.I32[1] = (v1.y == v2.y) ? -1 : 0; 776 return ret; 777 } 778 779 ALWAYS_INLINE friend GSVector2 operator!=(const GSVector2& v1, const GSVector2& v2) 780 { 781 GSVector2 ret; 782 ret.I32[0] = (v1.x != v2.x) ? -1 : 0; 783 ret.I32[1] = (v1.y != v2.y) ? -1 : 0; 784 return ret; 785 } 786 787 ALWAYS_INLINE friend GSVector2 operator>(const GSVector2& v1, const GSVector2& v2) 788 { 789 GSVector2 ret; 790 ret.I32[0] = (v1.x > v2.x) ? -1 : 0; 791 ret.I32[1] = (v1.y > v2.y) ? -1 : 0; 792 return ret; 793 } 794 795 ALWAYS_INLINE friend GSVector2 operator<(const GSVector2& v1, const GSVector2& v2) 796 { 797 GSVector2 ret; 798 ret.I32[0] = (v1.x < v2.x) ? -1 : 0; 799 ret.I32[1] = (v1.y < v2.y) ? -1 : 0; 800 return ret; 801 } 802 803 ALWAYS_INLINE friend GSVector2 operator>=(const GSVector2& v1, const GSVector2& v2) 804 { 805 GSVector2 ret; 806 ret.I32[0] = (v1.x >= v2.x) ? -1 : 0; 807 ret.I32[1] = (v1.y >= v2.y) ? -1 : 0; 808 return ret; 809 } 810 811 ALWAYS_INLINE friend GSVector2 operator<=(const GSVector2& v1, const GSVector2& v2) 812 { 813 GSVector2 ret; 814 ret.I32[0] = (v1.x <= v2.x) ? -1 : 0; 815 ret.I32[1] = (v1.y <= v2.y) ? -1 : 0; 816 return ret; 817 } 818 819 ALWAYS_INLINE GSVector2 xy() const { return *this; } 820 ALWAYS_INLINE GSVector2 xx() const { return GSVector2(x, x); } 821 ALWAYS_INLINE GSVector2 yx() const { return GSVector2(y, x); } 822 ALWAYS_INLINE GSVector2 yy() const { return GSVector2(y, y); } 823 }; 824 825 #undef ALL_LANES_8 826 #undef ALL_LANES_16 827 #undef ALL_LANES_32 828 829 #define ALL_LANES_8(expr) \ 830 GSVector4i ret; \ 831 for (size_t i = 0; i < 16; i++) \ 832 expr; \ 833 return ret; 834 #define ALL_LANES_16(expr) \ 835 GSVector4i ret; \ 836 for (size_t i = 0; i < 8; i++) \ 837 expr; \ 838 return ret; 839 #define ALL_LANES_32(expr) \ 840 GSVector4i ret; \ 841 for (size_t i = 0; i < 4; i++) \ 842 expr; \ 843 return ret; 844 #define ALL_LANES_64(expr) \ 845 GSVector4i ret; \ 846 for (size_t i = 0; i < 2; i++) \ 847 expr; \ 848 return ret; 849 850 class alignas(16) GSVector4i 851 { 852 struct cxpr_init_tag 853 { 854 }; 855 static constexpr cxpr_init_tag cxpr_init{}; 856 857 constexpr GSVector4i(cxpr_init_tag, s32 x, s32 y, s32 z, s32 w) : S32{x, y, z, w} {} 858 859 constexpr GSVector4i(cxpr_init_tag, s16 s0, s16 s1, s16 s2, s16 s3, s16 s4, s16 s5, s16 s6, s16 s7) 860 : S16{s0, s1, s2, s3, s4, s5, s6, s7} 861 { 862 } 863 864 constexpr GSVector4i(cxpr_init_tag, s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7, s8 b8, s8 b9, s8 b10, 865 s8 b11, s8 b12, s8 b13, s8 b14, s8 b15) 866 : S8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15} 867 { 868 } 869 870 public: 871 union 872 { 873 struct 874 { 875 s32 x, y, z, w; 876 }; 877 struct 878 { 879 s32 r, g, b, a; 880 }; 881 struct 882 { 883 s32 left, top, right, bottom; 884 }; 885 float F32[4]; 886 s8 S8[16]; 887 s16 S16[8]; 888 s32 S32[4]; 889 s64 S64[2]; 890 u8 U8[16]; 891 u16 U16[8]; 892 u32 U32[4]; 893 u64 U64[2]; 894 }; 895 896 GSVector4i() = default; 897 898 ALWAYS_INLINE constexpr static GSVector4i cxpr(s32 x, s32 y, s32 z, s32 w) 899 { 900 return GSVector4i(cxpr_init, x, y, z, w); 901 } 902 903 ALWAYS_INLINE constexpr static GSVector4i cxpr(s32 x) { return GSVector4i(cxpr_init, x, x, x, x); } 904 905 ALWAYS_INLINE constexpr static GSVector4i cxpr16(s16 x) { return GSVector4i(cxpr_init, x, x, x, x, x, x, x, x); } 906 907 ALWAYS_INLINE constexpr static GSVector4i cxpr16(s16 s0, s16 s1, s16 s2, s16 s3, s16 s4, s16 s5, s16 s6, s16 s7) 908 { 909 return GSVector4i(cxpr_init, s0, s1, s2, s3, s4, s5, s6, s7); 910 } 911 912 ALWAYS_INLINE constexpr static GSVector4i cxpr8(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7, s8 b8, s8 b9, 913 s8 b10, s8 b11, s8 b12, s8 b13, s8 b14, s8 b15) 914 { 915 return GSVector4i(cxpr_init, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15); 916 } 917 918 ALWAYS_INLINE GSVector4i(s32 x, s32 y, s32 z, s32 w) 919 { 920 this->x = x; 921 this->y = y; 922 this->z = z; 923 this->w = w; 924 } 925 926 ALWAYS_INLINE GSVector4i(s32 x, s32 y) { *this = load(x).upl32(load(y)); } 927 928 ALWAYS_INLINE GSVector4i(s16 s0, s16 s1, s16 s2, s16 s3, s16 s4, s16 s5, s16 s6, s16 s7) 929 { 930 S16[0] = s0; 931 S16[1] = s1; 932 S16[2] = s2; 933 S16[3] = s3; 934 S16[4] = s4; 935 S16[5] = s5; 936 S16[6] = s6; 937 S16[7] = s7; 938 } 939 940 ALWAYS_INLINE constexpr GSVector4i(s8 b0, s8 b1, s8 b2, s8 b3, s8 b4, s8 b5, s8 b6, s8 b7, s8 b8, s8 b9, s8 b10, 941 s8 b11, s8 b12, s8 b13, s8 b14, s8 b15) 942 : S8{b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15} 943 { 944 } 945 946 ALWAYS_INLINE GSVector4i(const GSVector4i& v) { std::memcpy(S32, v.S32, sizeof(S32)); } 947 ALWAYS_INLINE explicit GSVector4i(const GSVector2i& v) : S32{v.S32[0], v.S32[1], 0, 0} {} 948 949 // MSVC has bad codegen for the constexpr version when applied to non-constexpr things (https://godbolt.org/z/h8qbn7), 950 // so leave the non-constexpr version default 951 ALWAYS_INLINE explicit GSVector4i(s32 i) { *this = i; } 952 953 ALWAYS_INLINE explicit GSVector4i(const GSVector4& v); 954 955 ALWAYS_INLINE static GSVector4i cast(const GSVector4& v); 956 957 ALWAYS_INLINE void operator=(const GSVector4i& v) { std::memcpy(S32, v.S32, sizeof(S32)); } 958 ALWAYS_INLINE void operator=(s32 i) 959 { 960 x = i; 961 y = i; 962 z = i; 963 w = i; 964 } 965 966 // rect 967 968 ALWAYS_INLINE s32 width() const { return right - left; } 969 970 ALWAYS_INLINE s32 height() const { return bottom - top; } 971 972 ALWAYS_INLINE GSVector4i rsize() const 973 { 974 return sub32(xyxy()); // same as GSVector4i(0, 0, width(), height()); 975 } 976 977 ALWAYS_INLINE s32 rarea() const { return width() * height(); } 978 979 ALWAYS_INLINE bool rempty() const { return lt32(zwzw()).mask() != 0x00ff; } 980 981 // TODO: Optimize for no-simd, this generates crap code. 982 ALWAYS_INLINE GSVector4i runion(const GSVector4i& v) const { return min_i32(v).upl64(max_i32(v).srl<8>()); } 983 984 ALWAYS_INLINE GSVector4i rintersect(const GSVector4i& v) const { return sat_i32(v); } 985 ALWAYS_INLINE bool rintersects(const GSVector4i& v) const { return !rintersect(v).rempty(); } 986 ALWAYS_INLINE bool rcontains(const GSVector4i& v) const { return rintersect(v).eq(v); } 987 988 ALWAYS_INLINE u32 rgba32() const 989 { 990 GSVector4i v = *this; 991 992 v = v.ps32(v); 993 v = v.pu16(v); 994 995 return (u32)store(v); 996 } 997 998 ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& min, const GSVector4i& max) const 999 { 1000 return max_i8(min).min_i8(max); 1001 } 1002 ALWAYS_INLINE GSVector4i sat_i8(const GSVector4i& minmax) const 1003 { 1004 return max_i8(minmax.xyxy()).min_i8(minmax.zwzw()); 1005 } 1006 ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& min, const GSVector4i& max) const 1007 { 1008 return max_i16(min).min_i16(max); 1009 } 1010 ALWAYS_INLINE GSVector4i sat_i16(const GSVector4i& minmax) const 1011 { 1012 return max_i16(minmax.xyxy()).min_i16(minmax.zwzw()); 1013 } 1014 ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& min, const GSVector4i& max) const 1015 { 1016 return max_i32(min).min_i32(max); 1017 } 1018 ALWAYS_INLINE GSVector4i sat_i32(const GSVector4i& minmax) const 1019 { 1020 return max_i32(minmax.xyxy()).min_i32(minmax.zwzw()); 1021 } 1022 1023 ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& min, const GSVector4i& max) const 1024 { 1025 return max_u8(min).min_u8(max); 1026 } 1027 ALWAYS_INLINE GSVector4i sat_u8(const GSVector4i& minmax) const 1028 { 1029 return max_u8(minmax.xyxy()).min_u8(minmax.zwzw()); 1030 } 1031 ALWAYS_INLINE GSVector4i sat_u16(const GSVector4i& min, const GSVector4i& max) const 1032 { 1033 return max_u16(min).min_u16(max); 1034 } 1035 ALWAYS_INLINE GSVector4i sat_u16(const GSVector4i& minmax) const 1036 { 1037 return max_u16(minmax.xyxy()).min_u16(minmax.zwzw()); 1038 } 1039 ALWAYS_INLINE GSVector4i sat_u32(const GSVector4i& min, const GSVector4i& max) const 1040 { 1041 return max_u32(min).min_u32(max); 1042 } 1043 ALWAYS_INLINE GSVector4i sat_u32(const GSVector4i& minmax) const 1044 { 1045 return max_u32(minmax.xyxy()).min_u32(minmax.zwzw()); 1046 } 1047 1048 GSVector4i min_i8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::min(S8[i], v.S8[i])); } 1049 GSVector4i max_i8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = std::max(S8[i], v.S8[i])); } 1050 GSVector4i min_i16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::min(S16[i], v.S16[i])); } 1051 GSVector4i max_i16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = std::max(S16[i], v.S16[i])); } 1052 GSVector4i min_i32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::min(S32[i], v.S32[i])); } 1053 GSVector4i max_i32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = std::max(S32[i], v.S32[i])); } 1054 1055 GSVector4i min_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::min(U8[i], v.U8[i])); } 1056 GSVector4i max_u8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = std::max(U8[i], v.U8[i])); } 1057 GSVector4i min_u16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = std::min(U16[i], v.U16[i])); } 1058 GSVector4i max_u16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = std::max(U16[i], v.U16[i])); } 1059 GSVector4i min_u32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = std::min(U32[i], v.U32[i])); } 1060 GSVector4i max_u32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = std::max(U32[i], v.U32[i])); } 1061 1062 GSVector4i madd_s16(const GSVector4i& v) const 1063 { 1064 ALL_LANES_32(ret.S32[i] = (S16[i * 2] * v.S16[i * 2]) + (S16[i * 2 + 1] * v.S16[i * 2 + 1])); 1065 } 1066 1067 GSVector4i addp_s32() const { return GSVector4i(x + y, z + w, 0, 0); } 1068 1069 s32 addv_s32() const { return (S32[0] + S32[1] + S32[2] + S32[3]); } 1070 1071 u8 minv_u8() const 1072 { 1073 return std::min( 1074 U8[0], 1075 std::min( 1076 U8[1], 1077 std::min( 1078 U8[2], 1079 std::min( 1080 U8[3], 1081 std::min( 1082 U8[4], 1083 std::min( 1084 U8[5], 1085 std::min( 1086 U8[6], 1087 std::min( 1088 U8[7], 1089 std::min( 1090 U8[9], 1091 std::min(U8[10], 1092 std::min(U8[11], std::min(U8[12], std::min(U8[13], std::min(U8[14], U8[15])))))))))))))); 1093 } 1094 1095 u16 maxv_u8() const 1096 { 1097 return std::max( 1098 U8[0], 1099 std::max( 1100 U8[1], 1101 std::max( 1102 U8[2], 1103 std::max( 1104 U8[3], 1105 std::max( 1106 U8[4], 1107 std::max( 1108 U8[5], 1109 std::max( 1110 U8[6], 1111 std::max( 1112 U8[7], 1113 std::max( 1114 U8[9], 1115 std::max(U8[10], 1116 std::max(U8[11], std::max(U8[12], std::max(U8[13], std::max(U8[14], U8[15])))))))))))))); 1117 } 1118 1119 u16 minv_u16() const 1120 { 1121 return std::min( 1122 U16[0], 1123 std::min(U16[1], 1124 std::min(U16[2], std::min(U16[3], std::min(U16[4], std::min(U16[5], std::min(U16[6], U16[7]))))))); 1125 } 1126 1127 u16 maxv_u16() const 1128 { 1129 return std::max( 1130 U16[0], 1131 std::max(U16[1], 1132 std::max(U16[2], std::max(U16[3], std::max(U16[4], std::max(U16[5], std::max(U16[6], U16[7]))))))); 1133 } 1134 1135 s32 minv_s32() const { return std::min(x, std::min(y, std::min(z, w))); } 1136 1137 u32 minv_u32() const { return std::min(U32[0], std::min(U32[1], std::min(U32[2], U32[3]))); } 1138 1139 s32 maxv_s32() const { return std::max(x, std::max(y, std::max(z, w))); } 1140 1141 u32 maxv_u32() const { return std::max(U32[0], std::max(U32[1], std::max(U32[2], U32[3]))); } 1142 1143 static s32 min_i16(s32 a, s32 b) { return store(load(a).min_i16(load(b))); } 1144 1145 ALWAYS_INLINE GSVector4i clamp8() const { return pu16().upl8(); } 1146 1147 GSVector4i blend8(const GSVector4i& v, const GSVector4i& mask) const 1148 { 1149 GSVector4i ret; 1150 for (size_t i = 0; i < 16; i++) 1151 ret.U8[i] = (mask.U8[i] & 0x80) ? v.U8[i] : U8[i]; 1152 return ret; 1153 } 1154 1155 template<s32 mask> 1156 GSVector4i blend16(const GSVector4i& v) const 1157 { 1158 GSVector4i ret; 1159 for (size_t i = 0; i < 8; i++) 1160 ret.U16[i] = ((mask & (1 << i)) != 0) ? v.U16[i] : U16[i]; 1161 return ret; 1162 } 1163 1164 template<s32 mask> 1165 GSVector4i blend32(const GSVector4i& v) const 1166 { 1167 GSVector4i ret; 1168 for (size_t i = 0; i < 4; i++) 1169 ret.U32[i] = ((mask & (1 << i)) != 0) ? v.U32[i] : U32[i]; 1170 return ret; 1171 } 1172 1173 GSVector4i blend(const GSVector4i& v, const GSVector4i& mask) const 1174 { 1175 GSVector4i ret; 1176 for (size_t i = 0; i < 2; i++) 1177 ret.U64[i] = (v.U64[i] & mask.U64[i]) | (U64[i] & ~mask.U64[i]); 1178 return ret; 1179 } 1180 1181 ALWAYS_INLINE GSVector4i mix16(const GSVector4i& v) const { return blend16<0xaa>(v); } 1182 1183 GSVector4i shuffle8(const GSVector4i& mask) const 1184 { 1185 ALL_LANES_8(ret.S8[i] = (mask.S8[i] & 0x80) ? 0 : (S8[mask.S8[i] & 0xf])); 1186 } 1187 1188 GSVector4i ps16(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8((i < 8) ? S16[i] : v.S16[i - 8])); } 1189 GSVector4i ps16() const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S16[(i < 8) ? i : (i - 8)])); } 1190 GSVector4i pu16(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8((i < 8) ? U16[i] : v.U16[i - 8])); } 1191 GSVector4i pu16() const { ALL_LANES_8(ret.U8[i] = USATURATE8(U16[(i < 8) ? i : (i - 8)])); } 1192 GSVector4i ps32(const GSVector4i& v) const 1193 { 1194 ALL_LANES_16(ret.U16[i] = SSATURATE16((i < 4) ? S32[i] : v.S32[i - 4])); 1195 } 1196 GSVector4i ps32() const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S32[(i < 4) ? i : (i - 4)])); } 1197 GSVector4i pu32(const GSVector4i& v) const 1198 { 1199 ALL_LANES_16(ret.U16[i] = USATURATE16((i < 4) ? U32[i] : v.U32[i - 4])); 1200 } 1201 GSVector4i pu32() const { ALL_LANES_16(ret.U16[i] = USATURATE16(U32[(i < 4) ? i : (i - 4)])); } 1202 1203 GSVector4i upl8(const GSVector4i& v) const 1204 { 1205 return GSVector4i(S8[0], v.S8[0], S8[1], v.S8[1], S8[2], v.S8[2], S8[3], v.S8[3], S8[4], v.S8[4], S8[5], v.S8[5], 1206 S8[6], v.S8[6], S8[7], v.S8[7]); 1207 } 1208 GSVector4i uph8(const GSVector4i& v) const 1209 { 1210 return GSVector4i(S8[8], v.S8[8], S8[9], v.S8[9], S8[10], v.S8[10], S8[11], v.S8[11], S8[12], v.S8[12], S8[13], 1211 v.S8[13], S8[14], v.S8[14], S8[15], v.S8[15]); 1212 } 1213 GSVector4i upl16(const GSVector4i& v) const 1214 { 1215 return GSVector4i(S16[0], v.S16[0], S16[1], v.S16[1], S16[2], v.S16[2], S16[3], v.S16[3]); 1216 } 1217 GSVector4i uph16(const GSVector4i& v) const 1218 { 1219 return GSVector4i(S16[4], v.S16[4], S16[5], v.S16[5], S16[6], v.S16[6], S16[7], v.S16[7]); 1220 } 1221 GSVector4i upl32(const GSVector4i& v) const { return GSVector4i(S32[0], v.S32[0], S32[1], v.S32[1]); } 1222 GSVector4i uph32(const GSVector4i& v) const { return GSVector4i(S32[2], v.S32[2], S32[3], v.S32[3]); } 1223 GSVector4i upl64(const GSVector4i& v) const 1224 { 1225 GSVector4i ret; 1226 ret.S64[0] = S64[0]; 1227 ret.S64[1] = v.S64[0]; 1228 return ret; 1229 } 1230 GSVector4i uph64(const GSVector4i& v) const 1231 { 1232 GSVector4i ret; 1233 ret.S64[0] = S64[1]; 1234 ret.S64[1] = v.S64[1]; 1235 return ret; 1236 } 1237 1238 GSVector4i upl8() const 1239 { 1240 return GSVector4i(S8[0], 0, S8[1], 0, S8[2], 0, S8[3], 0, S8[4], 0, S8[5], 0, S8[6], 0, S8[7], 0); 1241 } 1242 GSVector4i uph8() const 1243 { 1244 return GSVector4i(S8[8], 0, S8[9], 0, S8[10], 0, S8[11], 0, S8[12], 0, S8[13], 0, S8[14], 0, S8[15], 0); 1245 } 1246 1247 GSVector4i upl16() const { return GSVector4i(S16[0], 0, S16[1], 0, S16[2], 0, S16[3], 0); } 1248 GSVector4i uph16() const { return GSVector4i(S16[4], 0, S16[5], 0, S16[6], 0, S16[7], 0); } 1249 1250 GSVector4i upl32() const { return GSVector4i(S32[0], 0, S32[1], 0); } 1251 GSVector4i uph32() const { return GSVector4i(S32[2], 0, S32[3], 0); } 1252 GSVector4i upl64() const 1253 { 1254 GSVector4i ret; 1255 ret.S64[0] = S64[0]; 1256 ret.S64[1] = 0; 1257 return ret; 1258 } 1259 GSVector4i uph64() const 1260 { 1261 GSVector4i ret; 1262 ret.S64[0] = S64[1]; 1263 ret.S64[1] = 0; 1264 return ret; 1265 } 1266 1267 GSVector4i s8to16() const { ALL_LANES_16(ret.S16[i] = S8[i]); } 1268 GSVector4i s8to32() const { ALL_LANES_32(ret.S32[i] = S8[i]); } 1269 GSVector4i s8to64() const { ALL_LANES_64(ret.S64[i] = S8[i]); } 1270 1271 GSVector4i s16to32() const { ALL_LANES_32(ret.S32[i] = S16[i]); } 1272 GSVector4i s16to64() const { ALL_LANES_64(ret.S64[i] = S16[i]); } 1273 GSVector4i s32to64() const { ALL_LANES_64(ret.S64[i] = S32[i]); } 1274 GSVector4i u8to16() const { ALL_LANES_64(ret.U16[i] = U8[i]); } 1275 GSVector4i u8to32() const { ALL_LANES_32(ret.U32[i] = U8[i]); } 1276 GSVector4i u8to64() const { ALL_LANES_64(ret.U64[i] = U8[i]); } 1277 GSVector4i u16to32() const { ALL_LANES_32(ret.U32[i] = U16[i]); } 1278 GSVector4i u16to64() const { ALL_LANES_64(ret.U64[i] = U16[i]); } 1279 GSVector4i u32to64() const { ALL_LANES_64(ret.U64[i] = U32[i]); } 1280 1281 template<s32 v> 1282 GSVector4i srl() const 1283 { 1284 GSVector4i ret = {}; 1285 if constexpr (v < 16) 1286 { 1287 for (s32 i = 0; i < (16 - v); i++) 1288 ret.U8[i] = U8[v + i]; 1289 } 1290 return ret; 1291 } 1292 1293 template<s32 v> 1294 GSVector4i srl(const GSVector4i& r) 1295 { 1296 // This sucks. Hopefully it's never used. 1297 u8 concat[32]; 1298 std::memcpy(concat, U8, sizeof(u8) * 16); 1299 std::memcpy(concat + 16, r.U8, sizeof(u8) * 16); 1300 1301 GSVector4i ret; 1302 std::memcpy(ret.U8, &concat[v], sizeof(u8) * 16); 1303 return ret; 1304 } 1305 1306 template<s32 v> 1307 GSVector4i sll() const 1308 { 1309 GSVector4i ret = {}; 1310 if constexpr (v < 16) 1311 { 1312 for (s32 i = 0; i < (16 - v); i++) 1313 ret.U8[v + i] = U8[i]; 1314 } 1315 return ret; 1316 } 1317 1318 template<s32 v> 1319 GSVector4i sll16() const 1320 { 1321 ALL_LANES_16(ret.U16[i] = U16[i] << v); 1322 } 1323 1324 GSVector4i sll16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v); } 1325 1326 GSVector4i sllv16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] << v.U16[i]); } 1327 1328 template<s32 v> 1329 GSVector4i srl16() const 1330 { 1331 ALL_LANES_16(ret.U16[i] = U16[i] >> v); 1332 } 1333 1334 GSVector4i srl16(s32 v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v); } 1335 1336 GSVector4i srlv16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = U16[i] >> v.U16[i]); } 1337 1338 template<s32 v> 1339 GSVector4i sra16() const 1340 { 1341 ALL_LANES_16(ret.S16[i] = S16[i] >> v); 1342 } 1343 1344 GSVector4i sra16(s32 v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v); } 1345 1346 GSVector4i srav16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] >> v.S16[i]); } 1347 1348 template<s32 v> 1349 GSVector4i sll32() const 1350 { 1351 ALL_LANES_32(ret.U32[i] = U32[i] << v); 1352 } 1353 1354 GSVector4i sll32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v); } 1355 1356 GSVector4i sllv32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] << v.U32[i]); } 1357 1358 template<s32 v> 1359 GSVector4i srl32() const 1360 { 1361 ALL_LANES_32(ret.U32[i] = U32[i] >> v); 1362 } 1363 1364 GSVector4i srl32(s32 v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v); } 1365 1366 GSVector4i srlv32(const GSVector4i& v) const { ALL_LANES_32(ret.U32[i] = U32[i] >> v.U32[i]); } 1367 1368 template<s32 v> 1369 GSVector4i sra32() const 1370 { 1371 ALL_LANES_32(ret.S32[i] = S32[i] >> v); 1372 } 1373 1374 GSVector4i sra32(s32 v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v); } 1375 1376 GSVector4i srav32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] >> v.S32[i]); } 1377 1378 template<s64 v> 1379 GSVector4i sll64() const 1380 { 1381 ALL_LANES_64(ret.U64[i] = U64[i] << v); 1382 } 1383 1384 GSVector4i sll64(s32 v) const { ALL_LANES_64(ret.U64[i] = U64[i] << v); } 1385 1386 GSVector4i sllv64(const GSVector4i& v) const { ALL_LANES_64(ret.U64[i] = U64[i] << v.U64[i]); } 1387 1388 template<s64 v> 1389 GSVector4i srl64() const 1390 { 1391 ALL_LANES_64(ret.U64[i] = U64[i] >> v); 1392 } 1393 1394 GSVector4i srl64(s32 v) const { ALL_LANES_64(ret.U64[i] = U64[i] >> v); } 1395 1396 GSVector4i srlv64(const GSVector4i& v) const { ALL_LANES_64(ret.U64[i] = U64[i] >> v.U64[i]); } 1397 1398 template<s64 v> 1399 GSVector4i sra64() const 1400 { 1401 ALL_LANES_64(ret.S64[i] = S64[i] >> v); 1402 } 1403 1404 GSVector4i sra64(s32 v) const { ALL_LANES_64(ret.S64[i] = S64[i] >> v); } 1405 1406 GSVector4i srav64(const GSVector4i& v) const { ALL_LANES_64(ret.S64[i] = S64[i] >> v.S64[i]); } 1407 1408 GSVector4i add8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] + v.S8[i]); } 1409 1410 GSVector4i add16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] + v.S16[i]); } 1411 1412 GSVector4i add32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] + v.S32[i]); } 1413 1414 GSVector4i adds8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] + v.S8[i])); } 1415 1416 GSVector4i adds16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] + v.S16[i])); } 1417 1418 GSVector4i hadds16(const GSVector4i& v) const 1419 { 1420 return GSVector4i(SSATURATE16(S16[0] + S16[1]), SSATURATE16(S16[2] + S16[3]), SSATURATE16(S16[4] + S16[5]), 1421 SSATURATE16(S16[6] + S16[7]), SSATURATE16(v.S16[0] + v.S16[1]), SSATURATE16(v.S16[2] + v.S16[3]), 1422 SSATURATE16(v.S16[4] + v.S16[5]), SSATURATE16(v.S16[6] + v.S16[7])); 1423 } 1424 1425 GSVector4i addus8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] + v.U8[i])); } 1426 1427 GSVector4i addus16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] + v.U16[i])); } 1428 1429 GSVector4i sub8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = S8[i] - v.S8[i]); } 1430 1431 GSVector4i sub16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] - v.S16[i]); } 1432 1433 GSVector4i sub32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] - v.S32[i]); } 1434 1435 GSVector4i subs8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = SSATURATE8(S8[i] - v.S8[i])); } 1436 1437 GSVector4i subs16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = SSATURATE16(S16[i] - v.S16[i])); } 1438 1439 GSVector4i subus8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = USATURATE8(U8[i] - v.U8[i])); } 1440 1441 GSVector4i subus16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = USATURATE16(U16[i] - v.U16[i])); } 1442 1443 GSVector4i avg8(const GSVector4i& v) const { ALL_LANES_8(ret.U8[i] = (U8[i] + v.U8[i]) >> 1); } 1444 1445 GSVector4i avg16(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = (U16[i] + v.U16[i]) >> 1); } 1446 1447 GSVector4i mul16hs(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] * v.S16[i]) >> 16); } 1448 1449 GSVector4i mul16hu(const GSVector4i& v) const { ALL_LANES_16(ret.U16[i] = (U16[i] * v.U16[i]) >> 16); } 1450 1451 GSVector4i mul16l(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = S16[i] * v.S16[i]); } 1452 1453 GSVector4i mul16hrs(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = ((S16[i] * v.S16[i]) >> 14) + 1); } 1454 1455 GSVector4i mul32l(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = S32[i] * v.S32[i]); } 1456 1457 ALWAYS_INLINE bool eq(const GSVector4i& v) const { return (std::memcmp(S32, v.S32, sizeof(S32))) == 0; } 1458 1459 GSVector4i eq8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] == v.S8[i]) ? -1 : 0); } 1460 GSVector4i eq16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] == v.S16[i]) ? -1 : 0); } 1461 GSVector4i eq32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] == v.S32[i]) ? -1 : 0); } 1462 GSVector4i eq64(const GSVector4i& v) const { ALL_LANES_64(ret.S64[i] = (S64[i] == v.S64[i]) ? -1 : 0); } 1463 1464 GSVector4i neq8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] != v.S8[i]) ? -1 : 0); } 1465 GSVector4i neq16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] != v.S16[i]) ? -1 : 0); } 1466 GSVector4i neq32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] != v.S32[i]) ? -1 : 0); } 1467 1468 GSVector4i gt8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] > v.S8[i]) ? -1 : 0); } 1469 GSVector4i gt16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] > v.S16[i]) ? -1 : 0); } 1470 GSVector4i gt32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] > v.S32[i]) ? -1 : 0); } 1471 1472 GSVector4i ge8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] >= v.S8[i]) ? -1 : 0); } 1473 GSVector4i ge16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] >= v.S16[i]) ? -1 : 0); } 1474 GSVector4i ge32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] >= v.S32[i]) ? -1 : 0); } 1475 1476 GSVector4i lt8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] < v.S8[i]) ? -1 : 0); } 1477 GSVector4i lt16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] < v.S16[i]) ? -1 : 0); } 1478 GSVector4i lt32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] < v.S32[i]) ? -1 : 0); } 1479 1480 GSVector4i le8(const GSVector4i& v) const { ALL_LANES_8(ret.S8[i] = (S8[i] <= v.S8[i]) ? -1 : 0); } 1481 GSVector4i le16(const GSVector4i& v) const { ALL_LANES_16(ret.S16[i] = (S16[i] <= v.S16[i]) ? -1 : 0); } 1482 GSVector4i le32(const GSVector4i& v) const { ALL_LANES_32(ret.S32[i] = (S32[i] <= v.S32[i]) ? -1 : 0); } 1483 1484 ALWAYS_INLINE GSVector4i andnot(const GSVector4i& v) const { ALL_LANES_64(ret.U64[i] = (~v.U64[i]) & U64[i]); } 1485 1486 s32 mask() const 1487 { 1488 return static_cast<s32>((static_cast<u32>(U8[0] >> 7) << 0) | (static_cast<u32>(U8[1] >> 7) << 1) | 1489 (static_cast<u32>(U8[2] >> 7) << 2) | (static_cast<u32>(U8[3] >> 7) << 3) | 1490 (static_cast<u32>(U8[4] >> 7) << 4) | (static_cast<u32>(U8[5] >> 7) << 5) | 1491 (static_cast<u32>(U8[6] >> 7) << 6) | (static_cast<u32>(U8[7] >> 7) << 7) | 1492 (static_cast<u32>(U8[8] >> 7) << 8) | (static_cast<u32>(U8[9] >> 7) << 9) | 1493 (static_cast<u32>(U8[10] >> 7) << 10) | (static_cast<u32>(U8[11] >> 7) << 11) | 1494 (static_cast<u32>(U8[12] >> 7) << 12) | (static_cast<u32>(U8[13] >> 7) << 13) | 1495 (static_cast<u32>(U8[14] >> 7) << 14) | (static_cast<u32>(U8[15] >> 7) << 15)); 1496 } 1497 1498 ALWAYS_INLINE bool alltrue() const { return ((U64[0] & U64[1]) == 0xFFFFFFFFFFFFFFFFULL); } 1499 1500 ALWAYS_INLINE bool allfalse() const { return ((U64[0] | U64[1]) == 0); } 1501 1502 template<s32 i> 1503 ALWAYS_INLINE GSVector4i insert8(s32 a) const 1504 { 1505 GSVector4i ret = *this; 1506 ret.S8[i] = static_cast<s8>(a); 1507 return ret; 1508 } 1509 1510 template<s32 i> 1511 ALWAYS_INLINE s32 extract8() const 1512 { 1513 return S8[i]; 1514 } 1515 1516 template<s32 i> 1517 ALWAYS_INLINE GSVector4i insert16(s32 a) const 1518 { 1519 GSVector4i ret = *this; 1520 ret.S16[i] = static_cast<s16>(a); 1521 return ret; 1522 } 1523 1524 template<s32 i> 1525 ALWAYS_INLINE s32 extract16() const 1526 { 1527 return S16[i]; 1528 } 1529 1530 template<s32 i> 1531 ALWAYS_INLINE GSVector4i insert32(s32 a) const 1532 { 1533 GSVector4i ret = *this; 1534 ret.S32[i] = a; 1535 return ret; 1536 } 1537 1538 template<s32 i> 1539 ALWAYS_INLINE s32 extract32() const 1540 { 1541 return S32[i]; 1542 } 1543 1544 template<s32 i> 1545 ALWAYS_INLINE GSVector4i insert64(s64 a) const 1546 { 1547 GSVector4i ret = *this; 1548 ret.S64[i] = a; 1549 return ret; 1550 } 1551 1552 template<s32 i> 1553 ALWAYS_INLINE s64 extract64() const 1554 { 1555 return S64[i]; 1556 } 1557 1558 ALWAYS_INLINE static GSVector4i loadnt(const void* p) 1559 { 1560 GSVector4i ret; 1561 std::memcpy(&ret, p, sizeof(ret.S32)); 1562 return ret; 1563 } 1564 1565 ALWAYS_INLINE static GSVector4i load32(const void* p) 1566 { 1567 GSVector4i ret; 1568 std::memcpy(&ret.x, p, sizeof(s32)); 1569 ret.y = 0; 1570 ret.z = 0; 1571 ret.w = 0; 1572 return ret; 1573 } 1574 1575 ALWAYS_INLINE static GSVector4i loadl(const void* p) 1576 { 1577 GSVector4i ret; 1578 std::memcpy(&ret.U64[0], p, sizeof(ret.U64[0])); 1579 ret.U64[1] = 0; 1580 return ret; 1581 } 1582 1583 ALWAYS_INLINE static GSVector4i loadh(const void* p) 1584 { 1585 GSVector4i ret; 1586 ret.U64[0] = 0; 1587 std::memcpy(&ret.U64[1], p, sizeof(ret.U64[1])); 1588 return ret; 1589 } 1590 1591 ALWAYS_INLINE static GSVector4i loadh(const GSVector2i& v) { return loadh(&v); } 1592 1593 template<bool aligned> 1594 ALWAYS_INLINE static GSVector4i load(const void* p) 1595 { 1596 GSVector4i ret; 1597 std::memcpy(ret.S32, p, sizeof(ret.S32)); 1598 return ret; 1599 } 1600 1601 ALWAYS_INLINE static GSVector4i load(s32 i) 1602 { 1603 GSVector4i ret; 1604 ret.x = i; 1605 ret.y = 0; 1606 ret.z = 0; 1607 ret.w = 0; 1608 return ret; 1609 } 1610 1611 ALWAYS_INLINE static GSVector4i loadq(s64 i) 1612 { 1613 GSVector4i ret; 1614 ret.S64[0] = i; 1615 ret.S64[1] = 0; 1616 return ret; 1617 } 1618 1619 ALWAYS_INLINE static void storent(void* p, const GSVector4i& v) { std::memcpy(p, v.S32, sizeof(v.S32)); } 1620 1621 ALWAYS_INLINE static void storel(void* p, const GSVector4i& v) { std::memcpy(p, &v.S32[0], sizeof(s32) * 2); } 1622 1623 ALWAYS_INLINE static void storeh(void* p, const GSVector4i& v) { std::memcpy(p, &v.S32[2], sizeof(s32) * 2); } 1624 1625 ALWAYS_INLINE static void store(void* pl, void* ph, const GSVector4i& v) 1626 { 1627 GSVector4i::storel(pl, v); 1628 GSVector4i::storeh(ph, v); 1629 } 1630 1631 template<bool aligned> 1632 ALWAYS_INLINE static void store(void* p, const GSVector4i& v) 1633 { 1634 std::memcpy(p, v.S32, sizeof(S32)); 1635 } 1636 1637 ALWAYS_INLINE static void store32(void* p, const GSVector4i& v) { std::memcpy(p, &v.x, sizeof(s32)); } 1638 1639 ALWAYS_INLINE static s32 store(const GSVector4i& v) { return v.x; } 1640 1641 ALWAYS_INLINE static s64 storeq(const GSVector4i& v) { return v.S64[0]; } 1642 1643 ALWAYS_INLINE void operator&=(const GSVector4i& v) 1644 { 1645 U64[0] &= v.U64[0]; 1646 U64[1] &= v.U64[1]; 1647 } 1648 ALWAYS_INLINE void operator|=(const GSVector4i& v) 1649 { 1650 U64[0] |= v.U64[0]; 1651 U64[1] |= v.U64[1]; 1652 } 1653 ALWAYS_INLINE void operator^=(const GSVector4i& v) 1654 { 1655 U64[0] ^= v.U64[0]; 1656 U64[1] ^= v.U64[1]; 1657 } 1658 1659 ALWAYS_INLINE friend GSVector4i operator&(const GSVector4i& v1, const GSVector4i& v2) 1660 { 1661 GSVector4i ret; 1662 ret.U64[0] = v1.U64[0] & v2.U64[0]; 1663 ret.U64[1] = v1.U64[1] & v2.U64[1]; 1664 return ret; 1665 } 1666 1667 ALWAYS_INLINE friend GSVector4i operator|(const GSVector4i& v1, const GSVector4i& v2) 1668 { 1669 GSVector4i ret; 1670 ret.U64[0] = v1.U64[0] | v2.U64[0]; 1671 ret.U64[1] = v1.U64[1] | v2.U64[1]; 1672 return ret; 1673 } 1674 1675 ALWAYS_INLINE friend GSVector4i operator^(const GSVector4i& v1, const GSVector4i& v2) 1676 { 1677 GSVector4i ret; 1678 ret.U64[0] = v1.U64[0] ^ v2.U64[0]; 1679 ret.U64[1] = v1.U64[1] ^ v2.U64[1]; 1680 return ret; 1681 } 1682 1683 ALWAYS_INLINE friend GSVector4i operator&(const GSVector4i& v, s32 i) { return v & GSVector4i(i); } 1684 1685 ALWAYS_INLINE friend GSVector4i operator|(const GSVector4i& v, s32 i) { return v | GSVector4i(i); } 1686 1687 ALWAYS_INLINE friend GSVector4i operator^(const GSVector4i& v, s32 i) { return v ^ GSVector4i(i); } 1688 1689 ALWAYS_INLINE friend GSVector4i operator~(const GSVector4i& v) { return v ^ v.eq32(v); } 1690 1691 ALWAYS_INLINE static constexpr GSVector4i zero() { return GSVector4i::cxpr(0, 0, 0, 0); } 1692 1693 ALWAYS_INLINE GSVector4i xyxy(const GSVector4i& v) const { return upl64(v); } 1694 1695 ALWAYS_INLINE GSVector2i xy() const { return GSVector2i(x, y); } 1696 ALWAYS_INLINE GSVector2i zw() const { return GSVector2i(z, w); } 1697 1698 // clang-format off 1699 // l/h/lh not implemented until needed 1700 1701 #define VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ 1702 ALWAYS_INLINE GSVector4i xs##ys##zs##ws() const {return GSVector4i(S32[xn], S32[yn], S32[zn], S32[wn]);} 1703 1704 #define VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ 1705 VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ 1706 VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \ 1707 VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \ 1708 VECTOR4i_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \ 1709 1710 #define VECTOR4i_SHUFFLE_2(xs, xn, ys, yn) \ 1711 VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, x, 0) \ 1712 VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, y, 1) \ 1713 VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, z, 2) \ 1714 VECTOR4i_SHUFFLE_3(xs, xn, ys, yn, w, 3) \ 1715 1716 #define VECTOR4i_SHUFFLE_1(xs, xn) \ 1717 VECTOR4i_SHUFFLE_2(xs, xn, x, 0) \ 1718 VECTOR4i_SHUFFLE_2(xs, xn, y, 1) \ 1719 VECTOR4i_SHUFFLE_2(xs, xn, z, 2) \ 1720 VECTOR4i_SHUFFLE_2(xs, xn, w, 3) \ 1721 1722 VECTOR4i_SHUFFLE_1(x, 0) 1723 VECTOR4i_SHUFFLE_1(y, 1) 1724 VECTOR4i_SHUFFLE_1(z, 2) 1725 VECTOR4i_SHUFFLE_1(w, 3) 1726 1727 // clang-format on 1728 }; 1729 1730 class alignas(16) GSVector4 1731 { 1732 struct cxpr_init_tag 1733 { 1734 }; 1735 static constexpr cxpr_init_tag cxpr_init{}; 1736 1737 constexpr GSVector4(cxpr_init_tag, float x, float y, float z, float w) : F32{x, y, z, w} {} 1738 1739 constexpr GSVector4(cxpr_init_tag, int x, int y, int z, int w) : I32{x, y, z, w} {} 1740 1741 constexpr GSVector4(cxpr_init_tag, u64 x, u64 y) : U64{x, y} {} 1742 1743 constexpr GSVector4(cxpr_init_tag, double x, double y) : F64{x, y} {} 1744 1745 public: 1746 union 1747 { 1748 struct 1749 { 1750 float x, y, z, w; 1751 }; 1752 struct 1753 { 1754 float r, g, b, a; 1755 }; 1756 struct 1757 { 1758 float left, top, right, bottom; 1759 }; 1760 float F32[4]; 1761 double F64[2]; 1762 s8 I8[16]; 1763 s16 I16[8]; 1764 s32 I32[4]; 1765 s64 I64[2]; 1766 u8 U8[16]; 1767 u16 U16[8]; 1768 u32 U32[4]; 1769 u64 U64[2]; 1770 }; 1771 1772 GSVector4() = default; 1773 1774 constexpr static GSVector4 cxpr(float x, float y, float z, float w) { return GSVector4(cxpr_init, x, y, z, w); } 1775 1776 constexpr static GSVector4 cxpr(float x) { return GSVector4(cxpr_init, x, x, x, x); } 1777 1778 constexpr static GSVector4 cxpr(int x, int y, int z, int w) { return GSVector4(cxpr_init, x, y, z, w); } 1779 1780 constexpr static GSVector4 cxpr(int x) { return GSVector4(cxpr_init, x, x, x, x); } 1781 1782 constexpr static GSVector4 cxpr64(u64 x, u64 y) { return GSVector4(cxpr_init, x, y); } 1783 1784 constexpr static GSVector4 cxpr64(u64 x) { return GSVector4(cxpr_init, x, x); } 1785 1786 constexpr static GSVector4 cxpr64(double x, double y) { return GSVector4(cxpr_init, x, y); } 1787 1788 constexpr static GSVector4 cxpr64(double x) { return GSVector4(cxpr_init, x, x); } 1789 1790 ALWAYS_INLINE GSVector4(float x, float y, float z, float w) 1791 { 1792 this->x = x; 1793 this->y = y; 1794 this->z = z; 1795 this->w = w; 1796 } 1797 1798 ALWAYS_INLINE GSVector4(float x, float y) 1799 { 1800 this->x = x; 1801 this->y = y; 1802 this->z = 0.0f; 1803 this->w = 0.0f; 1804 } 1805 1806 ALWAYS_INLINE GSVector4(int x, int y, int z, int w) 1807 { 1808 this->x = static_cast<float>(x); 1809 this->y = static_cast<float>(y); 1810 this->z = static_cast<float>(z); 1811 this->w = static_cast<float>(w); 1812 } 1813 1814 ALWAYS_INLINE GSVector4(int x, int y) 1815 { 1816 this->x = static_cast<float>(x); 1817 this->y = static_cast<float>(y); 1818 this->z = 0.0f; 1819 this->w = 0.0f; 1820 } 1821 1822 ALWAYS_INLINE explicit GSVector4(float f) { x = y = z = w = f; } 1823 1824 ALWAYS_INLINE explicit GSVector4(int i) { x = y = z = w = static_cast<float>(i); } 1825 1826 ALWAYS_INLINE explicit GSVector4(const GSVector2& v) : x(v.x), y(v.y), z(0.0f), w(0.0f) {} 1827 ALWAYS_INLINE explicit GSVector4(const GSVector4i& v); 1828 1829 ALWAYS_INLINE static GSVector4 cast(const GSVector4i& v); 1830 1831 ALWAYS_INLINE static GSVector4 f64(double x, double y) 1832 { 1833 GSVector4 ret; 1834 ret.F64[0] = x; 1835 ret.F64[1] = y; 1836 return ret; 1837 } 1838 1839 ALWAYS_INLINE static GSVector4 f64(double x) 1840 { 1841 GSVector4 ret; 1842 ret.F64[0] = ret.F64[1] = x; 1843 return ret; 1844 } 1845 1846 ALWAYS_INLINE void operator=(float f) { x = y = z = w = f; } 1847 1848 u32 rgba32() const { return GSVector4i(*this).rgba32(); } 1849 1850 ALWAYS_INLINE static GSVector4 rgba32(u32 rgba) { return GSVector4(GSVector4i::load((int)rgba).u8to32()); } 1851 1852 ALWAYS_INLINE static GSVector4 unorm8(u32 rgba) { return rgba32(rgba) * GSVector4::cxpr(1.0f / 255.0f); } 1853 1854 GSVector4 abs() const { return GSVector4(std::fabs(x), std::fabs(y), std::fabs(z), std::fabs(w)); } 1855 1856 GSVector4 neg() const { return GSVector4(-x, -y, -z, -w); } 1857 1858 GSVector4 rcp() const { return GSVector4(1.0f / x, 1.0f / y, 1.0f / z, 1.0f / w); } 1859 1860 GSVector4 rcpnr() const 1861 { 1862 GSVector4 v_ = rcp(); 1863 1864 return (v_ + v_) - (v_ * v_) * *this; 1865 } 1866 1867 GSVector4 floor() const { return GSVector4(std::floor(x), std::floor(y), std::floor(z), std::floor(w)); } 1868 1869 GSVector4 ceil() const { return GSVector4(std::ceil(x), std::ceil(y), std::ceil(z), std::ceil(w)); } 1870 1871 GSVector4 madd(const GSVector4& a_, const GSVector4& b_) const { return *this * a_ + b_; } 1872 1873 GSVector4 msub(const GSVector4& a_, const GSVector4& b_) const { return *this * a_ - b_; } 1874 1875 GSVector4 nmadd(const GSVector4& a_, const GSVector4& b_) const { return b_ - *this * a_; } 1876 1877 GSVector4 nmsub(const GSVector4& a_, const GSVector4& b_) const { return -b_ - *this * a_; } 1878 1879 GSVector4 addm(const GSVector4& a_, const GSVector4& b_) const 1880 { 1881 return a_.madd(b_, *this); // *this + a * b 1882 } 1883 1884 GSVector4 subm(const GSVector4& a_, const GSVector4& b_) const 1885 { 1886 return a_.nmadd(b_, *this); // *this - a * b 1887 } 1888 1889 GSVector4 hadd() const { return GSVector4(x + y, z + w, x + y, z + w); } 1890 1891 GSVector4 hadd(const GSVector4& v) const { return GSVector4(x + y, z + w, v.x + v.y, v.z + v.w); } 1892 1893 GSVector4 hsub() const { return GSVector4(x - y, z - w, x - y, z - w); } 1894 1895 GSVector4 hsub(const GSVector4& v) const { return GSVector4(x - y, z - w, v.x - v.y, v.z - v.w); } 1896 1897 template<int i> 1898 GSVector4 dp(const GSVector4& v) const 1899 { 1900 float res = 0.0f; 1901 if constexpr (i & 0x10) 1902 res += x * v.x; 1903 if constexpr (i & 0x20) 1904 res += y * v.y; 1905 if constexpr (i & 0x40) 1906 res += z * v.z; 1907 if constexpr (i & 0x80) 1908 res += w * v.w; 1909 return GSVector4((i & 0x01) ? res : 0.0f, (i & 0x02) ? res : 0.0f, (i & 0x04) ? res : 0.0f, 1910 (i & 0x08) ? res : 0.0f); 1911 } 1912 1913 GSVector4 sat(const GSVector4& min, const GSVector4& max) const 1914 { 1915 return GSVector4(std::clamp(x, min.x, max.x), std::clamp(y, min.y, max.y), std::clamp(z, min.z, max.z), 1916 std::clamp(w, min.w, max.w)); 1917 } 1918 1919 GSVector4 sat(const GSVector4& v) const 1920 { 1921 return GSVector4(std::clamp(x, v.x, v.z), std::clamp(y, v.y, v.w), std::clamp(z, v.x, v.z), 1922 std::clamp(w, v.y, v.w)); 1923 } 1924 1925 GSVector4 sat(const float scale = 255) const { return sat(zero(), GSVector4(scale)); } 1926 1927 GSVector4 clamp(const float scale = 255) const { return min(GSVector4(scale)); } 1928 1929 GSVector4 min(const GSVector4& v) const 1930 { 1931 return GSVector4(std::min(x, v.x), std::min(y, v.y), std::min(z, v.z), std::min(w, v.w)); 1932 } 1933 1934 GSVector4 max(const GSVector4& v) const 1935 { 1936 return GSVector4(std::max(x, v.x), std::max(y, v.y), std::max(z, v.z), std::max(w, v.w)); 1937 } 1938 1939 template<int mask> 1940 GSVector4 blend32(const GSVector4& v) const 1941 { 1942 return GSVector4(v.F32[mask & 1], v.F32[(mask >> 1) & 1], v.F32[(mask >> 2) & 1], v.F32[(mask >> 3) & 1]); 1943 } 1944 1945 ALWAYS_INLINE GSVector4 blend32(const GSVector4& v, const GSVector4& mask) const 1946 { 1947 return GSVector4((mask.U32[0] & 0x80000000u) ? v.x : x, (mask.U32[1] & 0x80000000u) ? v.y : y, 1948 (mask.U32[2] & 0x80000000u) ? v.z : z, (mask.U32[3] & 0x80000000u) ? v.w : w); 1949 } 1950 1951 GSVector4 upl(const GSVector4& v) const { return GSVector4(x, y, v.x, v.y); } 1952 1953 GSVector4 uph(const GSVector4& v) const { return GSVector4(z, w, v.z, v.w); } 1954 1955 GSVector4 upld(const GSVector4& v) const 1956 { 1957 GSVector4 ret; 1958 ret.U64[0] = U64[0]; 1959 ret.U64[1] = v.U64[0]; 1960 return ret; 1961 } 1962 1963 GSVector4 uphd(const GSVector4& v) const 1964 { 1965 GSVector4 ret; 1966 ret.U64[0] = U64[1]; 1967 ret.U64[1] = v.U64[1]; 1968 return ret; 1969 } 1970 1971 ALWAYS_INLINE GSVector4 l2h(const GSVector4& v) const { return GSVector4(x, y, v.x, v.y); } 1972 1973 ALWAYS_INLINE GSVector4 h2l(const GSVector4& v) const { return GSVector4(v.z, v.w, z, w); } 1974 1975 ALWAYS_INLINE GSVector4 andnot(const GSVector4& v) const 1976 { 1977 GSVector4 ret; 1978 ret.U32[0] = ((~v.U32[0]) & U32[0]); 1979 ret.U32[1] = ((~v.U32[1]) & U32[1]); 1980 ret.U32[2] = ((~v.U32[2]) & U32[2]); 1981 ret.U32[3] = ((~v.U32[3]) & U32[3]); 1982 return ret; 1983 } 1984 1985 ALWAYS_INLINE int mask() const 1986 { 1987 return (U32[0] >> 31) | ((U32[1] >> 30) & 2) | ((U32[2] >> 29) & 4) | ((U32[3] >> 28) & 8); 1988 } 1989 1990 ALWAYS_INLINE bool alltrue() const { return ((U64[0] & U64[1]) == 0xFFFFFFFFFFFFFFFFULL); } 1991 1992 ALWAYS_INLINE bool allfalse() const { return ((U64[0] | U64[1]) == 0); } 1993 1994 ALWAYS_INLINE GSVector4 replace_nan(const GSVector4& v) const { return v.blend32(*this, *this == *this); } 1995 1996 template<int src, int dst> 1997 ALWAYS_INLINE GSVector4 insert32(const GSVector4& v) const 1998 { 1999 GSVector4 ret = *this; 2000 ret.F32[dst] = v.F32[src]; 2001 return ret; 2002 } 2003 2004 template<int i> 2005 ALWAYS_INLINE int extract32() const 2006 { 2007 return I32[i]; 2008 } 2009 2010 template<int dst> 2011 ALWAYS_INLINE GSVector4 insert64(double v) const 2012 { 2013 GSVector4 ret; 2014 ret.F64[dst] = v; 2015 return ret; 2016 } 2017 2018 template<int src> 2019 ALWAYS_INLINE double extract64() const 2020 { 2021 return F64[src]; 2022 } 2023 2024 ALWAYS_INLINE static constexpr GSVector4 zero() { return GSVector4::cxpr(0.0f, 0.0f, 0.0f, 0.0f); } 2025 2026 ALWAYS_INLINE static constexpr GSVector4 xffffffff() 2027 { 2028 GSVector4 ret = zero(); 2029 ret.U64[0] = ~ret.U64[0]; 2030 ret.U64[1] = ~ret.U64[1]; 2031 return ret; 2032 } 2033 2034 ALWAYS_INLINE static GSVector4 loadl(const void* p) 2035 { 2036 GSVector4 ret; 2037 std::memcpy(&ret.x, p, sizeof(float) * 2); 2038 ret.z = 0.0f; 2039 ret.w = 0.0f; 2040 return ret; 2041 } 2042 2043 ALWAYS_INLINE static GSVector4 load(float f) { return GSVector4(f, f, f, f); } 2044 2045 template<bool aligned> 2046 ALWAYS_INLINE static GSVector4 load(const void* p) 2047 { 2048 GSVector4 ret; 2049 std::memcpy(&ret.x, p, sizeof(float) * 4); 2050 return ret; 2051 } 2052 2053 ALWAYS_INLINE static void storent(void* p, const GSVector4& v) { std::memcpy(p, &v, sizeof(v)); } 2054 2055 ALWAYS_INLINE static void storel(void* p, const GSVector4& v) { std::memcpy(p, &v.x, sizeof(float) * 2); } 2056 2057 ALWAYS_INLINE static void storeh(void* p, const GSVector4& v) { std::memcpy(p, &v.z, sizeof(float) * 2); } 2058 2059 template<bool aligned> 2060 ALWAYS_INLINE static void store(void* p, const GSVector4& v) 2061 { 2062 std::memcpy(p, v.F32, sizeof(F32)); 2063 } 2064 2065 ALWAYS_INLINE static void store(float* p, const GSVector4& v) { *p = v.x; } 2066 2067 ALWAYS_INLINE GSVector4 operator-() const { return neg(); } 2068 2069 void operator+=(const GSVector4& v_) 2070 { 2071 x = x + v_.x; 2072 y = y + v_.y; 2073 z = z + v_.z; 2074 w = w + v_.w; 2075 } 2076 void operator-=(const GSVector4& v_) 2077 { 2078 x = x - v_.x; 2079 y = y - v_.y; 2080 z = z - v_.z; 2081 w = w - v_.w; 2082 } 2083 void operator*=(const GSVector4& v_) 2084 { 2085 x = x * v_.x; 2086 y = y * v_.y; 2087 z = z * v_.z; 2088 w = w * v_.w; 2089 } 2090 void operator/=(const GSVector4& v_) 2091 { 2092 x = x / v_.x; 2093 y = y / v_.y; 2094 z = z / v_.z; 2095 w = w / v_.w; 2096 } 2097 2098 void operator+=(const float v_) 2099 { 2100 x = x + v_; 2101 y = y + v_; 2102 z = z + v_; 2103 w = w + v_; 2104 } 2105 void operator-=(const float v_) 2106 { 2107 x = x - v_; 2108 y = y - v_; 2109 z = z - v_; 2110 w = w - v_; 2111 } 2112 void operator*=(const float v_) 2113 { 2114 x = x * v_; 2115 y = y * v_; 2116 z = z * v_; 2117 w = w * v_; 2118 } 2119 void operator/=(const float v_) 2120 { 2121 x = x / v_; 2122 y = y / v_; 2123 z = z / v_; 2124 w = w / v_; 2125 } 2126 2127 void operator&=(const GSVector4& v_) 2128 { 2129 U64[0] &= v_.U64[0]; 2130 U64[1] &= v_.U64[1]; 2131 } 2132 void operator|=(const GSVector4& v_) 2133 { 2134 U64[0] |= v_.U64[0]; 2135 U64[1] |= v_.U64[1]; 2136 } 2137 void operator^=(const GSVector4& v_) 2138 { 2139 U64[0] ^= v_.U64[0]; 2140 U64[1] ^= v_.U64[1]; 2141 } 2142 2143 friend GSVector4 operator+(const GSVector4& v1, const GSVector4& v2) 2144 { 2145 return GSVector4(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.w + v2.w); 2146 } 2147 2148 friend GSVector4 operator-(const GSVector4& v1, const GSVector4& v2) 2149 { 2150 return GSVector4(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.w - v2.w); 2151 } 2152 2153 friend GSVector4 operator*(const GSVector4& v1, const GSVector4& v2) 2154 { 2155 return GSVector4(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.w * v2.w); 2156 } 2157 2158 friend GSVector4 operator/(const GSVector4& v1, const GSVector4& v2) 2159 { 2160 return GSVector4(v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.w / v2.w); 2161 } 2162 2163 friend GSVector4 operator+(const GSVector4& v, float f) { return GSVector4(v.x + f, v.y + f, v.z + f, v.w + f); } 2164 2165 friend GSVector4 operator-(const GSVector4& v, float f) { return GSVector4(v.x - f, v.y - f, v.z - f, v.w - f); } 2166 2167 friend GSVector4 operator*(const GSVector4& v, float f) { return GSVector4(v.x * f, v.y * f, v.z * f, v.w * f); } 2168 2169 friend GSVector4 operator/(const GSVector4& v, float f) { return GSVector4(v.x / f, v.y / f, v.z / f, v.w / f); } 2170 2171 friend GSVector4 operator&(const GSVector4& v1, const GSVector4& v2) 2172 { 2173 GSVector4 ret; 2174 ret.U64[0] = v1.U64[0] & v2.U64[0]; 2175 ret.U64[1] = v1.U64[1] & v2.U64[1]; 2176 return ret; 2177 } 2178 2179 ALWAYS_INLINE friend GSVector4 operator|(const GSVector4& v1, const GSVector4& v2) 2180 { 2181 GSVector4 ret; 2182 ret.U64[0] = v1.U64[0] | v2.U64[0]; 2183 ret.U64[1] = v1.U64[1] | v2.U64[1]; 2184 return ret; 2185 } 2186 2187 ALWAYS_INLINE friend GSVector4 operator^(const GSVector4& v1, const GSVector4& v2) 2188 { 2189 GSVector4 ret; 2190 ret.U64[0] = v1.U64[0] ^ v2.U64[0]; 2191 ret.U64[1] = v1.U64[1] ^ v2.U64[1]; 2192 return ret; 2193 } 2194 2195 ALWAYS_INLINE friend GSVector4 operator==(const GSVector4& v1, const GSVector4& v2) 2196 { 2197 GSVector4 ret; 2198 ret.I32[0] = (v1.x == v2.x) ? -1 : 0; 2199 ret.I32[1] = (v1.y == v2.y) ? -1 : 0; 2200 ret.I32[2] = (v1.z == v2.z) ? -1 : 0; 2201 ret.I32[3] = (v1.w == v2.w) ? -1 : 0; 2202 return ret; 2203 } 2204 2205 ALWAYS_INLINE friend GSVector4 operator!=(const GSVector4& v1, const GSVector4& v2) 2206 { 2207 GSVector4 ret; 2208 ret.I32[0] = (v1.x != v2.x) ? -1 : 0; 2209 ret.I32[1] = (v1.y != v2.y) ? -1 : 0; 2210 ret.I32[2] = (v1.z != v2.z) ? -1 : 0; 2211 ret.I32[3] = (v1.w != v2.w) ? -1 : 0; 2212 return ret; 2213 } 2214 2215 ALWAYS_INLINE friend GSVector4 operator>(const GSVector4& v1, const GSVector4& v2) 2216 { 2217 GSVector4 ret; 2218 ret.I32[0] = (v1.x > v2.x) ? -1 : 0; 2219 ret.I32[1] = (v1.y > v2.y) ? -1 : 0; 2220 ret.I32[2] = (v1.z > v2.z) ? -1 : 0; 2221 ret.I32[3] = (v1.w > v2.w) ? -1 : 0; 2222 return ret; 2223 } 2224 2225 ALWAYS_INLINE friend GSVector4 operator<(const GSVector4& v1, const GSVector4& v2) 2226 { 2227 GSVector4 ret; 2228 ret.I32[0] = (v1.x < v2.x) ? -1 : 0; 2229 ret.I32[1] = (v1.y < v2.y) ? -1 : 0; 2230 ret.I32[2] = (v1.z < v2.z) ? -1 : 0; 2231 ret.I32[3] = (v1.w < v2.w) ? -1 : 0; 2232 return ret; 2233 } 2234 2235 ALWAYS_INLINE friend GSVector4 operator>=(const GSVector4& v1, const GSVector4& v2) 2236 { 2237 GSVector4 ret; 2238 ret.I32[0] = (v1.x >= v2.x) ? -1 : 0; 2239 ret.I32[1] = (v1.y >= v2.y) ? -1 : 0; 2240 ret.I32[2] = (v1.z >= v2.z) ? -1 : 0; 2241 ret.I32[3] = (v1.w >= v2.w) ? -1 : 0; 2242 return ret; 2243 } 2244 2245 ALWAYS_INLINE friend GSVector4 operator<=(const GSVector4& v1, const GSVector4& v2) 2246 { 2247 GSVector4 ret; 2248 ret.I32[0] = (v1.x <= v2.x) ? -1 : 0; 2249 ret.I32[1] = (v1.y <= v2.y) ? -1 : 0; 2250 ret.I32[2] = (v1.z <= v2.z) ? -1 : 0; 2251 ret.I32[3] = (v1.w <= v2.w) ? -1 : 0; 2252 return ret; 2253 } 2254 2255 ALWAYS_INLINE GSVector4 mul64(const GSVector4& v_) const 2256 { 2257 GSVector4 ret; 2258 ret.F64[0] = F64[0] * v_.F64[0]; 2259 ret.F64[1] = F64[1] * v_.F64[1]; 2260 return ret; 2261 } 2262 2263 ALWAYS_INLINE GSVector4 add64(const GSVector4& v_) const 2264 { 2265 GSVector4 ret; 2266 ret.F64[0] = F64[0] + v_.F64[0]; 2267 ret.F64[1] = F64[1] + v_.F64[1]; 2268 return ret; 2269 } 2270 2271 ALWAYS_INLINE GSVector4 sub64(const GSVector4& v_) const 2272 { 2273 GSVector4 ret; 2274 ret.F64[0] = F64[0] - v_.F64[0]; 2275 ret.F64[1] = F64[1] - v_.F64[1]; 2276 return ret; 2277 } 2278 2279 ALWAYS_INLINE GSVector4 div64(const GSVector4& v) const 2280 { 2281 return GSVector4::f64(F64[0] / v.F64[0], F64[1] / v.F64[1]); 2282 } 2283 2284 ALWAYS_INLINE GSVector4 gt64(const GSVector4& v) const 2285 { 2286 GSVector4 ret; 2287 ret.U64[0] = (F64[0] > v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2288 ret.U64[1] = (F64[1] > v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2289 return ret; 2290 } 2291 2292 ALWAYS_INLINE GSVector4 eq64(const GSVector4& v) const 2293 { 2294 GSVector4 ret; 2295 ret.U64[0] = (F64[0] == v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2296 ret.U64[1] = (F64[1] == v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2297 return ret; 2298 } 2299 2300 ALWAYS_INLINE GSVector4 lt64(const GSVector4& v) const 2301 { 2302 GSVector4 ret; 2303 ret.U64[0] = (F64[0] < v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2304 ret.U64[1] = (F64[1] < v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2305 return ret; 2306 } 2307 2308 ALWAYS_INLINE GSVector4 ge64(const GSVector4& v) const 2309 { 2310 GSVector4 ret; 2311 ret.U64[0] = (F64[0] >= v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2312 ret.U64[1] = (F64[1] >= v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2313 return ret; 2314 } 2315 2316 ALWAYS_INLINE GSVector4 le64(const GSVector4& v) const 2317 { 2318 GSVector4 ret; 2319 ret.U64[0] = (F64[0] <= v.F64[0]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2320 ret.U64[1] = (F64[1] <= v.F64[1]) ? 0xFFFFFFFFFFFFFFFFULL : 0; 2321 return ret; 2322 } 2323 2324 ALWAYS_INLINE GSVector4 min64(const GSVector4& v) const 2325 { 2326 return GSVector4::f64(std::min(F64[0], v.F64[0]), std::min(F64[1], v.F64[1])); 2327 } 2328 2329 ALWAYS_INLINE GSVector4 max64(const GSVector4& v) const 2330 { 2331 return GSVector4::f64(std::max(F64[0], v.F64[0]), std::max(F64[1], v.F64[1])); 2332 } 2333 2334 ALWAYS_INLINE GSVector4 abs64() const { return *this & GSVector4::cxpr64(static_cast<u64>(0x7FFFFFFFFFFFFFFFULL)); } 2335 2336 ALWAYS_INLINE GSVector4 neg64() const { return *this ^ GSVector4::cxpr64(static_cast<u64>(0x8000000000000000ULL(); } 2337 2338 ALWAYS_INLINE GSVector4 sqrt64() const { return GSVector4::f64(std::sqrt(F64[0]), std::sqrt(F64[1])); } 2339 2340 ALWAYS_INLINE GSVector4 sqr64() const { return GSVector4::f64(F64[0] * F64[0], F64[1] * F64[1]); } 2341 2342 ALWAYS_INLINE GSVector4 floor64() const { return GSVector4::f64(std::floor(F64[0]), std::floor(F64[1])); } 2343 2344 ALWAYS_INLINE static GSVector4 f32to64(const GSVector4& v_) 2345 { 2346 GSVector4 ret; 2347 ret.F64[0] = v_.x; 2348 ret.F64[1] = v_.y; 2349 return ret; 2350 } 2351 2352 ALWAYS_INLINE static GSVector4 f32to64(const void* p) 2353 { 2354 float f[2]; 2355 std::memcpy(f, p, sizeof(f)); 2356 GSVector4 ret; 2357 ret.F64[0] = f[0]; 2358 ret.F64[1] = f[1]; 2359 return ret; 2360 } 2361 2362 ALWAYS_INLINE GSVector4i f64toi32() const 2363 { 2364 return GSVector4i(static_cast<s32>(F64[0]), static_cast<s32>(F64[1]), 0, 0); 2365 } 2366 2367 // clang-format off 2368 2369 #define VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, ws, wn) \ 2370 ALWAYS_INLINE GSVector4 xs##ys##zs##ws() const { return GSVector4(F32[xn], F32[yn], F32[zn], F32[wn]); } \ 2371 ALWAYS_INLINE GSVector4 xs##ys##zs##ws(const GSVector4& v_) const { return GSVector4(F32[xn], F32[yn], v_.F32[zn], v_.F32[wn]); } 2372 2373 #define VECTOR4_SHUFFLE_3(xs, xn, ys, yn, zs, zn) \ 2374 VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, x, 0) \ 2375 VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, y, 1) \ 2376 VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, z, 2) \ 2377 VECTOR4_SHUFFLE_4(xs, xn, ys, yn, zs, zn, w, 3) \ 2378 2379 #define VECTOR4_SHUFFLE_2(xs, xn, ys, yn) \ 2380 VECTOR4_SHUFFLE_3(xs, xn, ys, yn, x, 0) \ 2381 VECTOR4_SHUFFLE_3(xs, xn, ys, yn, y, 1) \ 2382 VECTOR4_SHUFFLE_3(xs, xn, ys, yn, z, 2) \ 2383 VECTOR4_SHUFFLE_3(xs, xn, ys, yn, w, 3) \ 2384 2385 #define VECTOR4_SHUFFLE_1(xs, xn) \ 2386 VECTOR4_SHUFFLE_2(xs, xn, x, 0) \ 2387 VECTOR4_SHUFFLE_2(xs, xn, y, 1) \ 2388 VECTOR4_SHUFFLE_2(xs, xn, z, 2) \ 2389 VECTOR4_SHUFFLE_2(xs, xn, w, 3) \ 2390 2391 VECTOR4_SHUFFLE_1(x, 0) 2392 VECTOR4_SHUFFLE_1(y, 1) 2393 VECTOR4_SHUFFLE_1(z, 2) 2394 VECTOR4_SHUFFLE_1(w, 3) 2395 2396 // clang-format on 2397 2398 ALWAYS_INLINE GSVector4 broadcast32() const { return GSVector4(x, x, x, x); } 2399 2400 ALWAYS_INLINE static GSVector4 broadcast32(const GSVector4& v) { return GSVector4(v.x, v.x, v.x, v.x); } 2401 2402 ALWAYS_INLINE static GSVector4 broadcast32(const void* f) 2403 { 2404 float ff; 2405 std::memcpy(&ff, f, sizeof(ff)); 2406 return GSVector4(ff, ff, ff, ff); 2407 } 2408 2409 ALWAYS_INLINE static GSVector4 broadcast64(const void* d) 2410 { 2411 GSVector4 ret; 2412 std::memcpy(&ret.F64[0], d, sizeof(ret.F64[0])); 2413 ret.F64[1] = ret.F64[0]; 2414 return ret; 2415 } 2416 }; 2417 2418 ALWAYS_INLINE GSVector2i::GSVector2i(const GSVector2& v) 2419 { 2420 x = static_cast<s32>(v.x); 2421 y = static_cast<s32>(v.y); 2422 } 2423 2424 ALWAYS_INLINE GSVector2::GSVector2(const GSVector2i& v) 2425 { 2426 x = static_cast<float>(v.x); 2427 y = static_cast<float>(v.y); 2428 } 2429 2430 ALWAYS_INLINE GSVector2i GSVector2i::cast(const GSVector2& v) 2431 { 2432 GSVector2i ret; 2433 std::memcpy(&ret, &v, sizeof(ret)); 2434 return ret; 2435 } 2436 2437 ALWAYS_INLINE GSVector2 GSVector2::cast(const GSVector2i& v) 2438 { 2439 GSVector2 ret; 2440 std::memcpy(&ret, &v, sizeof(ret)); 2441 return ret; 2442 } 2443 2444 ALWAYS_INLINE GSVector4i::GSVector4i(const GSVector4& v) 2445 { 2446 x = static_cast<s32>(v.x); 2447 y = static_cast<s32>(v.y); 2448 z = static_cast<s32>(v.z); 2449 w = static_cast<s32>(v.w); 2450 } 2451 2452 ALWAYS_INLINE GSVector4::GSVector4(const GSVector4i& v) 2453 { 2454 x = static_cast<float>(v.x); 2455 y = static_cast<float>(v.y); 2456 z = static_cast<float>(v.z); 2457 w = static_cast<float>(v.w); 2458 } 2459 2460 ALWAYS_INLINE GSVector4i GSVector4i::cast(const GSVector4& v) 2461 { 2462 GSVector4i ret; 2463 std::memcpy(&ret, &v, sizeof(ret)); 2464 return ret; 2465 } 2466 2467 ALWAYS_INLINE GSVector4 GSVector4::cast(const GSVector4i& v) 2468 { 2469 GSVector4 ret; 2470 std::memcpy(&ret, &v, sizeof(ret)); 2471 return ret; 2472 } 2473 2474 #undef SSATURATE8 2475 #undef USATURATE8 2476 #undef SSATURATE16 2477 #undef USATURATE16 2478 #undef ALL_LANES_8 2479 #undef ALL_LANES_16 2480 #undef ALL_LANES_32 2481 #undef ALL_LANES_64