logic-aarch64.cc (268439B)
1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 28 29 #include <cmath> 30 31 #include "simulator-aarch64.h" 32 33 namespace vixl { 34 namespace aarch64 { 35 36 using vixl::internal::SimFloat16; 37 38 template <typename T> 39 bool IsFloat64() { 40 return false; 41 } 42 template <> 43 bool IsFloat64<double>() { 44 return true; 45 } 46 47 template <typename T> 48 bool IsFloat32() { 49 return false; 50 } 51 template <> 52 bool IsFloat32<float>() { 53 return true; 54 } 55 56 template <typename T> 57 bool IsFloat16() { 58 return false; 59 } 60 template <> 61 bool IsFloat16<Float16>() { 62 return true; 63 } 64 template <> 65 bool IsFloat16<SimFloat16>() { 66 return true; 67 } 68 69 template <> 70 double Simulator::FPDefaultNaN<double>() { 71 return kFP64DefaultNaN; 72 } 73 74 75 template <> 76 float Simulator::FPDefaultNaN<float>() { 77 return kFP32DefaultNaN; 78 } 79 80 81 template <> 82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() { 83 return SimFloat16(kFP16DefaultNaN); 84 } 85 86 87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 88 if (src >= 0) { 89 return UFixedToDouble(src, fbits, round); 90 } else if (src == INT64_MIN) { 91 return -UFixedToDouble(src, fbits, round); 92 } else { 93 return -UFixedToDouble(-src, fbits, round); 94 } 95 } 96 97 98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 99 // An input of 0 is a special case because the result is effectively 100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 101 if (src == 0) { 102 return 0.0; 103 } 104 105 // Calculate the exponent. The highest significant bit will have the value 106 // 2^exponent. 107 const int highest_significant_bit = 63 - CountLeadingZeros(src); 108 const int64_t exponent = highest_significant_bit - fbits; 109 110 return FPRoundToDouble(0, exponent, src, round); 111 } 112 113 114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 115 if (src >= 0) { 116 return UFixedToFloat(src, fbits, round); 117 } else if (src == INT64_MIN) { 118 return -UFixedToFloat(src, fbits, round); 119 } else { 120 return -UFixedToFloat(-src, fbits, round); 121 } 122 } 123 124 125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 126 // An input of 0 is a special case because the result is effectively 127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 128 if (src == 0) { 129 return 0.0f; 130 } 131 132 // Calculate the exponent. The highest significant bit will have the value 133 // 2^exponent. 134 const int highest_significant_bit = 63 - CountLeadingZeros(src); 135 const int32_t exponent = highest_significant_bit - fbits; 136 137 return FPRoundToFloat(0, exponent, src, round); 138 } 139 140 141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) { 142 if (src >= 0) { 143 return UFixedToFloat16(src, fbits, round); 144 } else if (src == INT64_MIN) { 145 return -UFixedToFloat16(src, fbits, round); 146 } else { 147 return -UFixedToFloat16(-src, fbits, round); 148 } 149 } 150 151 152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src, 153 int fbits, 154 FPRounding round) { 155 // An input of 0 is a special case because the result is effectively 156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 157 if (src == 0) { 158 return 0.0f; 159 } 160 161 // Calculate the exponent. The highest significant bit will have the value 162 // 2^exponent. 163 const int highest_significant_bit = 63 - CountLeadingZeros(src); 164 const int16_t exponent = highest_significant_bit - fbits; 165 166 return FPRoundToFloat16(0, exponent, src, round); 167 } 168 169 170 uint64_t Simulator::GenerateRandomTag(uint16_t exclude) { 171 uint64_t rtag = nrand48(rand_state_) >> 28; 172 VIXL_ASSERT(IsUint4(rtag)); 173 174 if (exclude == 0) { 175 exclude = nrand48(rand_state_) >> 27; 176 } 177 178 // TODO: implement this to better match the specification, which calls for a 179 // true random mode, and a pseudo-random mode with state (EL1.TAG) modified by 180 // PRNG. 181 return ChooseNonExcludedTag(rtag, 0, exclude); 182 } 183 184 185 bool Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 186 dst.ClearForWrite(vform); 187 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 188 if (!LoadLane(dst, vform, i, addr)) { 189 return false; 190 } 191 addr += LaneSizeInBytesFromFormat(vform); 192 } 193 return true; 194 } 195 196 197 bool Simulator::ld1(VectorFormat vform, 198 LogicVRegister dst, 199 int index, 200 uint64_t addr) { 201 dst.ClearForWrite(vform); 202 return LoadLane(dst, vform, index, addr); 203 } 204 205 206 bool Simulator::ld1r(VectorFormat vform, 207 VectorFormat unpack_vform, 208 LogicVRegister dst, 209 uint64_t addr, 210 bool is_signed) { 211 unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform); 212 dst.ClearForWrite(vform); 213 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 214 if (is_signed) { 215 if (!LoadIntToLane(dst, vform, unpack_size, i, addr)) { 216 return false; 217 } 218 } else { 219 if (!LoadUintToLane(dst, vform, unpack_size, i, addr)) { 220 return false; 221 } 222 } 223 } 224 return true; 225 } 226 227 228 bool Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 229 return ld1r(vform, vform, dst, addr); 230 } 231 232 233 bool Simulator::ld2(VectorFormat vform, 234 LogicVRegister dst1, 235 LogicVRegister dst2, 236 uint64_t addr1) { 237 dst1.ClearForWrite(vform); 238 dst2.ClearForWrite(vform); 239 int esize = LaneSizeInBytesFromFormat(vform); 240 uint64_t addr2 = addr1 + esize; 241 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 242 if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2)) { 243 return false; 244 } 245 addr1 += 2 * esize; 246 addr2 += 2 * esize; 247 } 248 return true; 249 } 250 251 252 bool Simulator::ld2(VectorFormat vform, 253 LogicVRegister dst1, 254 LogicVRegister dst2, 255 int index, 256 uint64_t addr1) { 257 dst1.ClearForWrite(vform); 258 dst2.ClearForWrite(vform); 259 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 260 return (LoadLane(dst1, vform, index, addr1) && 261 LoadLane(dst2, vform, index, addr2)); 262 } 263 264 265 bool Simulator::ld2r(VectorFormat vform, 266 LogicVRegister dst1, 267 LogicVRegister dst2, 268 uint64_t addr) { 269 dst1.ClearForWrite(vform); 270 dst2.ClearForWrite(vform); 271 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 272 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 273 if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2)) { 274 return false; 275 } 276 } 277 return true; 278 } 279 280 281 bool Simulator::ld3(VectorFormat vform, 282 LogicVRegister dst1, 283 LogicVRegister dst2, 284 LogicVRegister dst3, 285 uint64_t addr1) { 286 dst1.ClearForWrite(vform); 287 dst2.ClearForWrite(vform); 288 dst3.ClearForWrite(vform); 289 int esize = LaneSizeInBytesFromFormat(vform); 290 uint64_t addr2 = addr1 + esize; 291 uint64_t addr3 = addr2 + esize; 292 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 293 if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) || 294 !LoadLane(dst3, vform, i, addr3)) { 295 return false; 296 } 297 addr1 += 3 * esize; 298 addr2 += 3 * esize; 299 addr3 += 3 * esize; 300 } 301 return true; 302 } 303 304 305 bool Simulator::ld3(VectorFormat vform, 306 LogicVRegister dst1, 307 LogicVRegister dst2, 308 LogicVRegister dst3, 309 int index, 310 uint64_t addr1) { 311 dst1.ClearForWrite(vform); 312 dst2.ClearForWrite(vform); 313 dst3.ClearForWrite(vform); 314 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 315 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 316 return (LoadLane(dst1, vform, index, addr1) && 317 LoadLane(dst2, vform, index, addr2) && 318 LoadLane(dst3, vform, index, addr3)); 319 } 320 321 322 bool Simulator::ld3r(VectorFormat vform, 323 LogicVRegister dst1, 324 LogicVRegister dst2, 325 LogicVRegister dst3, 326 uint64_t addr) { 327 dst1.ClearForWrite(vform); 328 dst2.ClearForWrite(vform); 329 dst3.ClearForWrite(vform); 330 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 331 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 332 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 333 if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) || 334 !LoadLane(dst3, vform, i, addr3)) { 335 return false; 336 } 337 } 338 return true; 339 } 340 341 342 bool Simulator::ld4(VectorFormat vform, 343 LogicVRegister dst1, 344 LogicVRegister dst2, 345 LogicVRegister dst3, 346 LogicVRegister dst4, 347 uint64_t addr1) { 348 dst1.ClearForWrite(vform); 349 dst2.ClearForWrite(vform); 350 dst3.ClearForWrite(vform); 351 dst4.ClearForWrite(vform); 352 int esize = LaneSizeInBytesFromFormat(vform); 353 uint64_t addr2 = addr1 + esize; 354 uint64_t addr3 = addr2 + esize; 355 uint64_t addr4 = addr3 + esize; 356 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 357 if (!LoadLane(dst1, vform, i, addr1) || !LoadLane(dst2, vform, i, addr2) || 358 !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) { 359 return false; 360 } 361 addr1 += 4 * esize; 362 addr2 += 4 * esize; 363 addr3 += 4 * esize; 364 addr4 += 4 * esize; 365 } 366 return true; 367 } 368 369 370 bool Simulator::ld4(VectorFormat vform, 371 LogicVRegister dst1, 372 LogicVRegister dst2, 373 LogicVRegister dst3, 374 LogicVRegister dst4, 375 int index, 376 uint64_t addr1) { 377 dst1.ClearForWrite(vform); 378 dst2.ClearForWrite(vform); 379 dst3.ClearForWrite(vform); 380 dst4.ClearForWrite(vform); 381 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 382 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 383 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 384 return (LoadLane(dst1, vform, index, addr1) && 385 LoadLane(dst2, vform, index, addr2) && 386 LoadLane(dst3, vform, index, addr3) && 387 LoadLane(dst4, vform, index, addr4)); 388 } 389 390 391 bool Simulator::ld4r(VectorFormat vform, 392 LogicVRegister dst1, 393 LogicVRegister dst2, 394 LogicVRegister dst3, 395 LogicVRegister dst4, 396 uint64_t addr) { 397 dst1.ClearForWrite(vform); 398 dst2.ClearForWrite(vform); 399 dst3.ClearForWrite(vform); 400 dst4.ClearForWrite(vform); 401 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 402 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 403 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 404 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 405 if (!LoadLane(dst1, vform, i, addr) || !LoadLane(dst2, vform, i, addr2) || 406 !LoadLane(dst3, vform, i, addr3) || !LoadLane(dst4, vform, i, addr4)) { 407 return false; 408 } 409 } 410 return true; 411 } 412 413 414 bool Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 415 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 416 if (!StoreLane(src, vform, i, addr)) return false; 417 addr += LaneSizeInBytesFromFormat(vform); 418 } 419 return true; 420 } 421 422 423 bool Simulator::st1(VectorFormat vform, 424 LogicVRegister src, 425 int index, 426 uint64_t addr) { 427 return StoreLane(src, vform, index, addr); 428 } 429 430 431 bool Simulator::st2(VectorFormat vform, 432 LogicVRegister src, 433 LogicVRegister src2, 434 uint64_t addr) { 435 int esize = LaneSizeInBytesFromFormat(vform); 436 uint64_t addr2 = addr + esize; 437 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 438 if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2)) { 439 return false; 440 } 441 addr += 2 * esize; 442 addr2 += 2 * esize; 443 } 444 return true; 445 } 446 447 448 bool Simulator::st2(VectorFormat vform, 449 LogicVRegister src, 450 LogicVRegister src2, 451 int index, 452 uint64_t addr) { 453 int esize = LaneSizeInBytesFromFormat(vform); 454 return (StoreLane(src, vform, index, addr) && 455 StoreLane(src2, vform, index, addr + 1 * esize)); 456 } 457 458 459 bool Simulator::st3(VectorFormat vform, 460 LogicVRegister src, 461 LogicVRegister src2, 462 LogicVRegister src3, 463 uint64_t addr) { 464 int esize = LaneSizeInBytesFromFormat(vform); 465 uint64_t addr2 = addr + esize; 466 uint64_t addr3 = addr2 + esize; 467 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 468 if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) || 469 !StoreLane(src3, vform, i, addr3)) { 470 return false; 471 } 472 addr += 3 * esize; 473 addr2 += 3 * esize; 474 addr3 += 3 * esize; 475 } 476 return true; 477 } 478 479 480 bool Simulator::st3(VectorFormat vform, 481 LogicVRegister src, 482 LogicVRegister src2, 483 LogicVRegister src3, 484 int index, 485 uint64_t addr) { 486 int esize = LaneSizeInBytesFromFormat(vform); 487 return (StoreLane(src, vform, index, addr) && 488 StoreLane(src2, vform, index, addr + 1 * esize) && 489 StoreLane(src3, vform, index, addr + 2 * esize)); 490 } 491 492 493 bool Simulator::st4(VectorFormat vform, 494 LogicVRegister src, 495 LogicVRegister src2, 496 LogicVRegister src3, 497 LogicVRegister src4, 498 uint64_t addr) { 499 int esize = LaneSizeInBytesFromFormat(vform); 500 uint64_t addr2 = addr + esize; 501 uint64_t addr3 = addr2 + esize; 502 uint64_t addr4 = addr3 + esize; 503 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 504 if (!StoreLane(src, vform, i, addr) || !StoreLane(src2, vform, i, addr2) || 505 !StoreLane(src3, vform, i, addr3) || 506 !StoreLane(src4, vform, i, addr4)) { 507 return false; 508 } 509 addr += 4 * esize; 510 addr2 += 4 * esize; 511 addr3 += 4 * esize; 512 addr4 += 4 * esize; 513 } 514 return true; 515 } 516 517 518 bool Simulator::st4(VectorFormat vform, 519 LogicVRegister src, 520 LogicVRegister src2, 521 LogicVRegister src3, 522 LogicVRegister src4, 523 int index, 524 uint64_t addr) { 525 int esize = LaneSizeInBytesFromFormat(vform); 526 return (StoreLane(src, vform, index, addr) && 527 StoreLane(src2, vform, index, addr + 1 * esize) && 528 StoreLane(src3, vform, index, addr + 2 * esize) && 529 StoreLane(src4, vform, index, addr + 3 * esize)); 530 } 531 532 533 LogicVRegister Simulator::cmp(VectorFormat vform, 534 LogicVRegister dst, 535 const LogicVRegister& src1, 536 const LogicVRegister& src2, 537 Condition cond) { 538 dst.ClearForWrite(vform); 539 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 540 int64_t sa = src1.Int(vform, i); 541 int64_t sb = src2.Int(vform, i); 542 uint64_t ua = src1.Uint(vform, i); 543 uint64_t ub = src2.Uint(vform, i); 544 bool result = false; 545 switch (cond) { 546 case eq: 547 result = (ua == ub); 548 break; 549 case ge: 550 result = (sa >= sb); 551 break; 552 case gt: 553 result = (sa > sb); 554 break; 555 case hi: 556 result = (ua > ub); 557 break; 558 case hs: 559 result = (ua >= ub); 560 break; 561 case lt: 562 result = (sa < sb); 563 break; 564 case le: 565 result = (sa <= sb); 566 break; 567 default: 568 VIXL_UNREACHABLE(); 569 break; 570 } 571 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 572 } 573 return dst; 574 } 575 576 577 LogicVRegister Simulator::cmp(VectorFormat vform, 578 LogicVRegister dst, 579 const LogicVRegister& src1, 580 int imm, 581 Condition cond) { 582 SimVRegister temp; 583 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 584 return cmp(vform, dst, src1, imm_reg, cond); 585 } 586 587 588 LogicVRegister Simulator::cmptst(VectorFormat vform, 589 LogicVRegister dst, 590 const LogicVRegister& src1, 591 const LogicVRegister& src2) { 592 dst.ClearForWrite(vform); 593 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 594 uint64_t ua = src1.Uint(vform, i); 595 uint64_t ub = src2.Uint(vform, i); 596 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 597 } 598 return dst; 599 } 600 601 602 LogicVRegister Simulator::add(VectorFormat vform, 603 LogicVRegister dst, 604 const LogicVRegister& src1, 605 const LogicVRegister& src2) { 606 int lane_size = LaneSizeInBitsFromFormat(vform); 607 dst.ClearForWrite(vform); 608 609 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 610 // Test for unsigned saturation. 611 uint64_t ua = src1.UintLeftJustified(vform, i); 612 uint64_t ub = src2.UintLeftJustified(vform, i); 613 uint64_t ur = ua + ub; 614 if (ur < ua) { 615 dst.SetUnsignedSat(i, true); 616 } 617 618 // Test for signed saturation. 619 bool pos_a = (ua >> 63) == 0; 620 bool pos_b = (ub >> 63) == 0; 621 bool pos_r = (ur >> 63) == 0; 622 // If the signs of the operands are the same, but different from the result, 623 // there was an overflow. 624 if ((pos_a == pos_b) && (pos_a != pos_r)) { 625 dst.SetSignedSat(i, pos_a); 626 } 627 dst.SetInt(vform, i, ur >> (64 - lane_size)); 628 } 629 return dst; 630 } 631 632 LogicVRegister Simulator::add_uint(VectorFormat vform, 633 LogicVRegister dst, 634 const LogicVRegister& src1, 635 uint64_t value) { 636 int lane_size = LaneSizeInBitsFromFormat(vform); 637 VIXL_ASSERT(IsUintN(lane_size, value)); 638 dst.ClearForWrite(vform); 639 // Left-justify `value`. 640 uint64_t ub = value << (64 - lane_size); 641 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 642 // Test for unsigned saturation. 643 uint64_t ua = src1.UintLeftJustified(vform, i); 644 uint64_t ur = ua + ub; 645 if (ur < ua) { 646 dst.SetUnsignedSat(i, true); 647 } 648 649 // Test for signed saturation. 650 // `value` is always positive, so we have an overflow if the (signed) result 651 // is smaller than the first operand. 652 if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) { 653 dst.SetSignedSat(i, true); 654 } 655 656 dst.SetInt(vform, i, ur >> (64 - lane_size)); 657 } 658 return dst; 659 } 660 661 LogicVRegister Simulator::addp(VectorFormat vform, 662 LogicVRegister dst, 663 const LogicVRegister& src1, 664 const LogicVRegister& src2) { 665 SimVRegister temp1, temp2; 666 uzp1(vform, temp1, src1, src2); 667 uzp2(vform, temp2, src1, src2); 668 add(vform, dst, temp1, temp2); 669 if (IsSVEFormat(vform)) { 670 interleave_top_bottom(vform, dst, dst); 671 } 672 return dst; 673 } 674 675 LogicVRegister Simulator::sdiv(VectorFormat vform, 676 LogicVRegister dst, 677 const LogicVRegister& src1, 678 const LogicVRegister& src2) { 679 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); 680 681 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 682 int64_t val1 = src1.Int(vform, i); 683 int64_t val2 = src2.Int(vform, i); 684 int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt; 685 int64_t quotient = 0; 686 if ((val1 == min_int) && (val2 == -1)) { 687 quotient = min_int; 688 } else if (val2 != 0) { 689 quotient = val1 / val2; 690 } 691 dst.SetInt(vform, i, quotient); 692 } 693 694 return dst; 695 } 696 697 LogicVRegister Simulator::udiv(VectorFormat vform, 698 LogicVRegister dst, 699 const LogicVRegister& src1, 700 const LogicVRegister& src2) { 701 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD)); 702 703 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 704 uint64_t val1 = src1.Uint(vform, i); 705 uint64_t val2 = src2.Uint(vform, i); 706 uint64_t quotient = 0; 707 if (val2 != 0) { 708 quotient = val1 / val2; 709 } 710 dst.SetUint(vform, i, quotient); 711 } 712 713 return dst; 714 } 715 716 717 LogicVRegister Simulator::mla(VectorFormat vform, 718 LogicVRegister dst, 719 const LogicVRegister& srca, 720 const LogicVRegister& src1, 721 const LogicVRegister& src2) { 722 SimVRegister temp; 723 mul(vform, temp, src1, src2); 724 add(vform, dst, srca, temp); 725 return dst; 726 } 727 728 729 LogicVRegister Simulator::mls(VectorFormat vform, 730 LogicVRegister dst, 731 const LogicVRegister& srca, 732 const LogicVRegister& src1, 733 const LogicVRegister& src2) { 734 SimVRegister temp; 735 mul(vform, temp, src1, src2); 736 sub(vform, dst, srca, temp); 737 return dst; 738 } 739 740 741 LogicVRegister Simulator::mul(VectorFormat vform, 742 LogicVRegister dst, 743 const LogicVRegister& src1, 744 const LogicVRegister& src2) { 745 dst.ClearForWrite(vform); 746 747 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 748 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 749 } 750 return dst; 751 } 752 753 754 LogicVRegister Simulator::mul(VectorFormat vform, 755 LogicVRegister dst, 756 const LogicVRegister& src1, 757 const LogicVRegister& src2, 758 int index) { 759 SimVRegister temp; 760 VectorFormat indexform = VectorFormatFillQ(vform); 761 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 762 } 763 764 765 LogicVRegister Simulator::smulh(VectorFormat vform, 766 LogicVRegister dst, 767 const LogicVRegister& src1, 768 const LogicVRegister& src2) { 769 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 770 int64_t dst_val = 0xbadbeef; 771 int64_t val1 = src1.Int(vform, i); 772 int64_t val2 = src2.Int(vform, i); 773 switch (LaneSizeInBitsFromFormat(vform)) { 774 case 8: 775 dst_val = internal::MultiplyHigh<8>(val1, val2); 776 break; 777 case 16: 778 dst_val = internal::MultiplyHigh<16>(val1, val2); 779 break; 780 case 32: 781 dst_val = internal::MultiplyHigh<32>(val1, val2); 782 break; 783 case 64: 784 dst_val = internal::MultiplyHigh<64>(val1, val2); 785 break; 786 default: 787 VIXL_UNREACHABLE(); 788 break; 789 } 790 dst.SetInt(vform, i, dst_val); 791 } 792 return dst; 793 } 794 795 796 LogicVRegister Simulator::umulh(VectorFormat vform, 797 LogicVRegister dst, 798 const LogicVRegister& src1, 799 const LogicVRegister& src2) { 800 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 801 uint64_t dst_val = 0xbadbeef; 802 uint64_t val1 = src1.Uint(vform, i); 803 uint64_t val2 = src2.Uint(vform, i); 804 switch (LaneSizeInBitsFromFormat(vform)) { 805 case 8: 806 dst_val = internal::MultiplyHigh<8>(val1, val2); 807 break; 808 case 16: 809 dst_val = internal::MultiplyHigh<16>(val1, val2); 810 break; 811 case 32: 812 dst_val = internal::MultiplyHigh<32>(val1, val2); 813 break; 814 case 64: 815 dst_val = internal::MultiplyHigh<64>(val1, val2); 816 break; 817 default: 818 VIXL_UNREACHABLE(); 819 break; 820 } 821 dst.SetUint(vform, i, dst_val); 822 } 823 return dst; 824 } 825 826 827 LogicVRegister Simulator::mla(VectorFormat vform, 828 LogicVRegister dst, 829 const LogicVRegister& src1, 830 const LogicVRegister& src2, 831 int index) { 832 SimVRegister temp; 833 VectorFormat indexform = VectorFormatFillQ(vform); 834 return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); 835 } 836 837 838 LogicVRegister Simulator::mls(VectorFormat vform, 839 LogicVRegister dst, 840 const LogicVRegister& src1, 841 const LogicVRegister& src2, 842 int index) { 843 SimVRegister temp; 844 VectorFormat indexform = VectorFormatFillQ(vform); 845 return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index)); 846 } 847 848 LogicVRegister Simulator::sqdmull(VectorFormat vform, 849 LogicVRegister dst, 850 const LogicVRegister& src1, 851 const LogicVRegister& src2, 852 int index) { 853 SimVRegister temp; 854 VectorFormat indexform = 855 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 856 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 857 } 858 859 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 860 LogicVRegister dst, 861 const LogicVRegister& src1, 862 const LogicVRegister& src2, 863 int index) { 864 SimVRegister temp; 865 VectorFormat indexform = 866 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 867 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 868 } 869 870 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 871 LogicVRegister dst, 872 const LogicVRegister& src1, 873 const LogicVRegister& src2, 874 int index) { 875 SimVRegister temp; 876 VectorFormat indexform = 877 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 878 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 879 } 880 881 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 882 LogicVRegister dst, 883 const LogicVRegister& src1, 884 const LogicVRegister& src2, 885 int index) { 886 SimVRegister temp; 887 VectorFormat indexform = VectorFormatFillQ(vform); 888 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 889 } 890 891 892 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 893 LogicVRegister dst, 894 const LogicVRegister& src1, 895 const LogicVRegister& src2, 896 int index) { 897 SimVRegister temp; 898 VectorFormat indexform = VectorFormatFillQ(vform); 899 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 900 } 901 902 903 LogicVRegister Simulator::sqrdmlah(VectorFormat vform, 904 LogicVRegister dst, 905 const LogicVRegister& src1, 906 const LogicVRegister& src2, 907 int index) { 908 SimVRegister temp; 909 VectorFormat indexform = VectorFormatFillQ(vform); 910 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index)); 911 } 912 913 914 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, 915 LogicVRegister dst, 916 const LogicVRegister& src1, 917 const LogicVRegister& src2, 918 int index) { 919 SimVRegister temp; 920 VectorFormat indexform = VectorFormatFillQ(vform); 921 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 922 } 923 924 uint64_t Simulator::PolynomialMult(uint64_t op1, 925 uint64_t op2, 926 int lane_size_in_bits) const { 927 return PolynomialMult128(op1, op2, lane_size_in_bits).second; 928 } 929 930 LogicVRegister Simulator::pmul(VectorFormat vform, 931 LogicVRegister dst, 932 const LogicVRegister& src1, 933 const LogicVRegister& src2) { 934 dst.ClearForWrite(vform); 935 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 936 dst.SetUint(vform, 937 i, 938 PolynomialMult(src1.Uint(vform, i), 939 src2.Uint(vform, i), 940 LaneSizeInBitsFromFormat(vform))); 941 } 942 return dst; 943 } 944 945 946 LogicVRegister Simulator::pmull(VectorFormat vform, 947 LogicVRegister dst, 948 const LogicVRegister& src1, 949 const LogicVRegister& src2) { 950 dst.ClearForWrite(vform); 951 VectorFormat vform_src = VectorFormatHalfWidth(vform); 952 953 // Process the elements in reverse to avoid problems when the destination 954 // register is the same as a source. 955 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 956 dst.SetUint(vform, 957 i, 958 PolynomialMult128(src1.Uint(vform_src, i), 959 src2.Uint(vform_src, i), 960 LaneSizeInBitsFromFormat(vform_src))); 961 } 962 963 return dst; 964 } 965 966 967 LogicVRegister Simulator::pmull2(VectorFormat vform, 968 LogicVRegister dst, 969 const LogicVRegister& src1, 970 const LogicVRegister& src2) { 971 dst.ClearForWrite(vform); 972 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 973 974 int lane_count = LaneCountFromFormat(vform); 975 for (int i = 0; i < lane_count; i++) { 976 dst.SetUint(vform, 977 i, 978 PolynomialMult128(src1.Uint(vform_src, lane_count + i), 979 src2.Uint(vform_src, lane_count + i), 980 LaneSizeInBitsFromFormat(vform_src))); 981 } 982 983 return dst; 984 } 985 986 987 LogicVRegister Simulator::sub(VectorFormat vform, 988 LogicVRegister dst, 989 const LogicVRegister& src1, 990 const LogicVRegister& src2) { 991 int lane_size = LaneSizeInBitsFromFormat(vform); 992 dst.ClearForWrite(vform); 993 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 994 // Test for unsigned saturation. 995 uint64_t ua = src1.UintLeftJustified(vform, i); 996 uint64_t ub = src2.UintLeftJustified(vform, i); 997 uint64_t ur = ua - ub; 998 if (ub > ua) { 999 dst.SetUnsignedSat(i, false); 1000 } 1001 1002 // Test for signed saturation. 1003 bool pos_a = (ua >> 63) == 0; 1004 bool pos_b = (ub >> 63) == 0; 1005 bool pos_r = (ur >> 63) == 0; 1006 // If the signs of the operands are different, and the sign of the first 1007 // operand doesn't match the result, there was an overflow. 1008 if ((pos_a != pos_b) && (pos_a != pos_r)) { 1009 dst.SetSignedSat(i, pos_a); 1010 } 1011 1012 dst.SetInt(vform, i, ur >> (64 - lane_size)); 1013 } 1014 return dst; 1015 } 1016 1017 LogicVRegister Simulator::sub_uint(VectorFormat vform, 1018 LogicVRegister dst, 1019 const LogicVRegister& src1, 1020 uint64_t value) { 1021 int lane_size = LaneSizeInBitsFromFormat(vform); 1022 VIXL_ASSERT(IsUintN(lane_size, value)); 1023 dst.ClearForWrite(vform); 1024 // Left-justify `value`. 1025 uint64_t ub = value << (64 - lane_size); 1026 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1027 // Test for unsigned saturation. 1028 uint64_t ua = src1.UintLeftJustified(vform, i); 1029 uint64_t ur = ua - ub; 1030 if (ub > ua) { 1031 dst.SetUnsignedSat(i, false); 1032 } 1033 1034 // Test for signed saturation. 1035 // `value` is always positive, so we have an overflow if the (signed) result 1036 // is greater than the first operand. 1037 if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) { 1038 dst.SetSignedSat(i, false); 1039 } 1040 1041 dst.SetInt(vform, i, ur >> (64 - lane_size)); 1042 } 1043 return dst; 1044 } 1045 1046 LogicVRegister Simulator::and_(VectorFormat vform, 1047 LogicVRegister dst, 1048 const LogicVRegister& src1, 1049 const LogicVRegister& src2) { 1050 dst.ClearForWrite(vform); 1051 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1052 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1053 } 1054 return dst; 1055 } 1056 1057 1058 LogicVRegister Simulator::orr(VectorFormat vform, 1059 LogicVRegister dst, 1060 const LogicVRegister& src1, 1061 const LogicVRegister& src2) { 1062 dst.ClearForWrite(vform); 1063 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1064 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1065 } 1066 return dst; 1067 } 1068 1069 1070 LogicVRegister Simulator::orn(VectorFormat vform, 1071 LogicVRegister dst, 1072 const LogicVRegister& src1, 1073 const LogicVRegister& src2) { 1074 dst.ClearForWrite(vform); 1075 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1076 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1077 } 1078 return dst; 1079 } 1080 1081 1082 LogicVRegister Simulator::eor(VectorFormat vform, 1083 LogicVRegister dst, 1084 const LogicVRegister& src1, 1085 const LogicVRegister& src2) { 1086 dst.ClearForWrite(vform); 1087 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1088 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1089 } 1090 return dst; 1091 } 1092 1093 1094 LogicVRegister Simulator::bic(VectorFormat vform, 1095 LogicVRegister dst, 1096 const LogicVRegister& src1, 1097 const LogicVRegister& src2) { 1098 dst.ClearForWrite(vform); 1099 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1100 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1101 } 1102 return dst; 1103 } 1104 1105 1106 LogicVRegister Simulator::bic(VectorFormat vform, 1107 LogicVRegister dst, 1108 const LogicVRegister& src, 1109 uint64_t imm) { 1110 uint64_t result[16]; 1111 int lane_count = LaneCountFromFormat(vform); 1112 for (int i = 0; i < lane_count; ++i) { 1113 result[i] = src.Uint(vform, i) & ~imm; 1114 } 1115 dst.ClearForWrite(vform); 1116 for (int i = 0; i < lane_count; ++i) { 1117 dst.SetUint(vform, i, result[i]); 1118 } 1119 return dst; 1120 } 1121 1122 1123 LogicVRegister Simulator::bif(VectorFormat vform, 1124 LogicVRegister dst, 1125 const LogicVRegister& src1, 1126 const LogicVRegister& src2) { 1127 dst.ClearForWrite(vform); 1128 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1129 uint64_t operand1 = dst.Uint(vform, i); 1130 uint64_t operand2 = ~src2.Uint(vform, i); 1131 uint64_t operand3 = src1.Uint(vform, i); 1132 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1133 dst.SetUint(vform, i, result); 1134 } 1135 return dst; 1136 } 1137 1138 1139 LogicVRegister Simulator::bit(VectorFormat vform, 1140 LogicVRegister dst, 1141 const LogicVRegister& src1, 1142 const LogicVRegister& src2) { 1143 dst.ClearForWrite(vform); 1144 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1145 uint64_t operand1 = dst.Uint(vform, i); 1146 uint64_t operand2 = src2.Uint(vform, i); 1147 uint64_t operand3 = src1.Uint(vform, i); 1148 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1149 dst.SetUint(vform, i, result); 1150 } 1151 return dst; 1152 } 1153 1154 1155 LogicVRegister Simulator::bsl(VectorFormat vform, 1156 LogicVRegister dst, 1157 const LogicVRegister& src_mask, 1158 const LogicVRegister& src1, 1159 const LogicVRegister& src2) { 1160 dst.ClearForWrite(vform); 1161 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1162 uint64_t operand1 = src2.Uint(vform, i); 1163 uint64_t operand2 = src_mask.Uint(vform, i); 1164 uint64_t operand3 = src1.Uint(vform, i); 1165 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1166 dst.SetUint(vform, i, result); 1167 } 1168 return dst; 1169 } 1170 1171 1172 LogicVRegister Simulator::sminmax(VectorFormat vform, 1173 LogicVRegister dst, 1174 const LogicVRegister& src1, 1175 const LogicVRegister& src2, 1176 bool max) { 1177 dst.ClearForWrite(vform); 1178 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1179 int64_t src1_val = src1.Int(vform, i); 1180 int64_t src2_val = src2.Int(vform, i); 1181 int64_t dst_val; 1182 if (max) { 1183 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1184 } else { 1185 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1186 } 1187 dst.SetInt(vform, i, dst_val); 1188 } 1189 return dst; 1190 } 1191 1192 1193 LogicVRegister Simulator::smax(VectorFormat vform, 1194 LogicVRegister dst, 1195 const LogicVRegister& src1, 1196 const LogicVRegister& src2) { 1197 return sminmax(vform, dst, src1, src2, true); 1198 } 1199 1200 1201 LogicVRegister Simulator::smin(VectorFormat vform, 1202 LogicVRegister dst, 1203 const LogicVRegister& src1, 1204 const LogicVRegister& src2) { 1205 return sminmax(vform, dst, src1, src2, false); 1206 } 1207 1208 1209 LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1210 LogicVRegister dst, 1211 const LogicVRegister& src1, 1212 const LogicVRegister& src2, 1213 bool max) { 1214 unsigned lanes = LaneCountFromFormat(vform); 1215 int64_t result[kZRegMaxSizeInBytes]; 1216 const LogicVRegister* src = &src1; 1217 for (unsigned j = 0; j < 2; j++) { 1218 for (unsigned i = 0; i < lanes; i += 2) { 1219 int64_t first_val = src->Int(vform, i); 1220 int64_t second_val = src->Int(vform, i + 1); 1221 int64_t dst_val; 1222 if (max) { 1223 dst_val = (first_val > second_val) ? first_val : second_val; 1224 } else { 1225 dst_val = (first_val < second_val) ? first_val : second_val; 1226 } 1227 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result)); 1228 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1229 } 1230 src = &src2; 1231 } 1232 dst.SetIntArray(vform, result); 1233 if (IsSVEFormat(vform)) { 1234 interleave_top_bottom(vform, dst, dst); 1235 } 1236 return dst; 1237 } 1238 1239 1240 LogicVRegister Simulator::smaxp(VectorFormat vform, 1241 LogicVRegister dst, 1242 const LogicVRegister& src1, 1243 const LogicVRegister& src2) { 1244 return sminmaxp(vform, dst, src1, src2, true); 1245 } 1246 1247 1248 LogicVRegister Simulator::sminp(VectorFormat vform, 1249 LogicVRegister dst, 1250 const LogicVRegister& src1, 1251 const LogicVRegister& src2) { 1252 return sminmaxp(vform, dst, src1, src2, false); 1253 } 1254 1255 1256 LogicVRegister Simulator::addp(VectorFormat vform, 1257 LogicVRegister dst, 1258 const LogicVRegister& src) { 1259 VIXL_ASSERT(vform == kFormatD); 1260 1261 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); 1262 dst.ClearForWrite(vform); 1263 dst.SetUint(vform, 0, dst_val); 1264 return dst; 1265 } 1266 1267 1268 LogicVRegister Simulator::addv(VectorFormat vform, 1269 LogicVRegister dst, 1270 const LogicVRegister& src) { 1271 VectorFormat vform_dst = 1272 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1273 1274 1275 int64_t dst_val = 0; 1276 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1277 dst_val += src.Int(vform, i); 1278 } 1279 1280 dst.ClearForWrite(vform_dst); 1281 dst.SetInt(vform_dst, 0, dst_val); 1282 return dst; 1283 } 1284 1285 1286 LogicVRegister Simulator::saddlv(VectorFormat vform, 1287 LogicVRegister dst, 1288 const LogicVRegister& src) { 1289 VectorFormat vform_dst = 1290 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1291 1292 int64_t dst_val = 0; 1293 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1294 dst_val += src.Int(vform, i); 1295 } 1296 1297 dst.ClearForWrite(vform_dst); 1298 dst.SetInt(vform_dst, 0, dst_val); 1299 return dst; 1300 } 1301 1302 1303 LogicVRegister Simulator::uaddlv(VectorFormat vform, 1304 LogicVRegister dst, 1305 const LogicVRegister& src) { 1306 VectorFormat vform_dst = 1307 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1308 1309 uint64_t dst_val = 0; 1310 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1311 dst_val += src.Uint(vform, i); 1312 } 1313 1314 dst.ClearForWrite(vform_dst); 1315 dst.SetUint(vform_dst, 0, dst_val); 1316 return dst; 1317 } 1318 1319 1320 LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1321 LogicVRegister dst, 1322 const LogicPRegister& pg, 1323 const LogicVRegister& src, 1324 bool max) { 1325 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1326 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1327 if (!pg.IsActive(vform, i)) continue; 1328 1329 int64_t src_val = src.Int(vform, i); 1330 if (max) { 1331 dst_val = (src_val > dst_val) ? src_val : dst_val; 1332 } else { 1333 dst_val = (src_val < dst_val) ? src_val : dst_val; 1334 } 1335 } 1336 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1337 dst.SetInt(vform, 0, dst_val); 1338 return dst; 1339 } 1340 1341 1342 LogicVRegister Simulator::smaxv(VectorFormat vform, 1343 LogicVRegister dst, 1344 const LogicVRegister& src) { 1345 sminmaxv(vform, dst, GetPTrue(), src, true); 1346 return dst; 1347 } 1348 1349 1350 LogicVRegister Simulator::sminv(VectorFormat vform, 1351 LogicVRegister dst, 1352 const LogicVRegister& src) { 1353 sminmaxv(vform, dst, GetPTrue(), src, false); 1354 return dst; 1355 } 1356 1357 1358 LogicVRegister Simulator::smaxv(VectorFormat vform, 1359 LogicVRegister dst, 1360 const LogicPRegister& pg, 1361 const LogicVRegister& src) { 1362 VIXL_ASSERT(IsSVEFormat(vform)); 1363 sminmaxv(vform, dst, pg, src, true); 1364 return dst; 1365 } 1366 1367 1368 LogicVRegister Simulator::sminv(VectorFormat vform, 1369 LogicVRegister dst, 1370 const LogicPRegister& pg, 1371 const LogicVRegister& src) { 1372 VIXL_ASSERT(IsSVEFormat(vform)); 1373 sminmaxv(vform, dst, pg, src, false); 1374 return dst; 1375 } 1376 1377 1378 LogicVRegister Simulator::uminmax(VectorFormat vform, 1379 LogicVRegister dst, 1380 const LogicVRegister& src1, 1381 const LogicVRegister& src2, 1382 bool max) { 1383 dst.ClearForWrite(vform); 1384 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1385 uint64_t src1_val = src1.Uint(vform, i); 1386 uint64_t src2_val = src2.Uint(vform, i); 1387 uint64_t dst_val; 1388 if (max) { 1389 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1390 } else { 1391 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1392 } 1393 dst.SetUint(vform, i, dst_val); 1394 } 1395 return dst; 1396 } 1397 1398 1399 LogicVRegister Simulator::umax(VectorFormat vform, 1400 LogicVRegister dst, 1401 const LogicVRegister& src1, 1402 const LogicVRegister& src2) { 1403 return uminmax(vform, dst, src1, src2, true); 1404 } 1405 1406 1407 LogicVRegister Simulator::umin(VectorFormat vform, 1408 LogicVRegister dst, 1409 const LogicVRegister& src1, 1410 const LogicVRegister& src2) { 1411 return uminmax(vform, dst, src1, src2, false); 1412 } 1413 1414 1415 LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1416 LogicVRegister dst, 1417 const LogicVRegister& src1, 1418 const LogicVRegister& src2, 1419 bool max) { 1420 unsigned lanes = LaneCountFromFormat(vform); 1421 uint64_t result[kZRegMaxSizeInBytes]; 1422 const LogicVRegister* src = &src1; 1423 for (unsigned j = 0; j < 2; j++) { 1424 for (unsigned i = 0; i < lanes; i += 2) { 1425 uint64_t first_val = src->Uint(vform, i); 1426 uint64_t second_val = src->Uint(vform, i + 1); 1427 uint64_t dst_val; 1428 if (max) { 1429 dst_val = (first_val > second_val) ? first_val : second_val; 1430 } else { 1431 dst_val = (first_val < second_val) ? first_val : second_val; 1432 } 1433 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result)); 1434 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1435 } 1436 src = &src2; 1437 } 1438 dst.SetUintArray(vform, result); 1439 if (IsSVEFormat(vform)) { 1440 interleave_top_bottom(vform, dst, dst); 1441 } 1442 return dst; 1443 } 1444 1445 1446 LogicVRegister Simulator::umaxp(VectorFormat vform, 1447 LogicVRegister dst, 1448 const LogicVRegister& src1, 1449 const LogicVRegister& src2) { 1450 return uminmaxp(vform, dst, src1, src2, true); 1451 } 1452 1453 1454 LogicVRegister Simulator::uminp(VectorFormat vform, 1455 LogicVRegister dst, 1456 const LogicVRegister& src1, 1457 const LogicVRegister& src2) { 1458 return uminmaxp(vform, dst, src1, src2, false); 1459 } 1460 1461 1462 LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1463 LogicVRegister dst, 1464 const LogicPRegister& pg, 1465 const LogicVRegister& src, 1466 bool max) { 1467 uint64_t dst_val = max ? 0 : UINT64_MAX; 1468 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1469 if (!pg.IsActive(vform, i)) continue; 1470 1471 uint64_t src_val = src.Uint(vform, i); 1472 if (max) { 1473 dst_val = (src_val > dst_val) ? src_val : dst_val; 1474 } else { 1475 dst_val = (src_val < dst_val) ? src_val : dst_val; 1476 } 1477 } 1478 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1479 dst.SetUint(vform, 0, dst_val); 1480 return dst; 1481 } 1482 1483 1484 LogicVRegister Simulator::umaxv(VectorFormat vform, 1485 LogicVRegister dst, 1486 const LogicVRegister& src) { 1487 uminmaxv(vform, dst, GetPTrue(), src, true); 1488 return dst; 1489 } 1490 1491 1492 LogicVRegister Simulator::uminv(VectorFormat vform, 1493 LogicVRegister dst, 1494 const LogicVRegister& src) { 1495 uminmaxv(vform, dst, GetPTrue(), src, false); 1496 return dst; 1497 } 1498 1499 1500 LogicVRegister Simulator::umaxv(VectorFormat vform, 1501 LogicVRegister dst, 1502 const LogicPRegister& pg, 1503 const LogicVRegister& src) { 1504 VIXL_ASSERT(IsSVEFormat(vform)); 1505 uminmaxv(vform, dst, pg, src, true); 1506 return dst; 1507 } 1508 1509 1510 LogicVRegister Simulator::uminv(VectorFormat vform, 1511 LogicVRegister dst, 1512 const LogicPRegister& pg, 1513 const LogicVRegister& src) { 1514 VIXL_ASSERT(IsSVEFormat(vform)); 1515 uminmaxv(vform, dst, pg, src, false); 1516 return dst; 1517 } 1518 1519 1520 LogicVRegister Simulator::shl(VectorFormat vform, 1521 LogicVRegister dst, 1522 const LogicVRegister& src, 1523 int shift) { 1524 VIXL_ASSERT(shift >= 0); 1525 SimVRegister temp; 1526 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1527 return ushl(vform, dst, src, shiftreg); 1528 } 1529 1530 1531 LogicVRegister Simulator::sshll(VectorFormat vform, 1532 LogicVRegister dst, 1533 const LogicVRegister& src, 1534 int shift) { 1535 VIXL_ASSERT(shift >= 0); 1536 SimVRegister temp1, temp2; 1537 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1538 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1539 return sshl(vform, dst, extendedreg, shiftreg); 1540 } 1541 1542 1543 LogicVRegister Simulator::sshll2(VectorFormat vform, 1544 LogicVRegister dst, 1545 const LogicVRegister& src, 1546 int shift) { 1547 VIXL_ASSERT(shift >= 0); 1548 SimVRegister temp1, temp2; 1549 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1550 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1551 return sshl(vform, dst, extendedreg, shiftreg); 1552 } 1553 1554 1555 LogicVRegister Simulator::shll(VectorFormat vform, 1556 LogicVRegister dst, 1557 const LogicVRegister& src) { 1558 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1559 return sshll(vform, dst, src, shift); 1560 } 1561 1562 1563 LogicVRegister Simulator::shll2(VectorFormat vform, 1564 LogicVRegister dst, 1565 const LogicVRegister& src) { 1566 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1567 return sshll2(vform, dst, src, shift); 1568 } 1569 1570 1571 LogicVRegister Simulator::ushll(VectorFormat vform, 1572 LogicVRegister dst, 1573 const LogicVRegister& src, 1574 int shift) { 1575 VIXL_ASSERT(shift >= 0); 1576 SimVRegister temp1, temp2; 1577 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1578 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1579 return ushl(vform, dst, extendedreg, shiftreg); 1580 } 1581 1582 1583 LogicVRegister Simulator::ushll2(VectorFormat vform, 1584 LogicVRegister dst, 1585 const LogicVRegister& src, 1586 int shift) { 1587 VIXL_ASSERT(shift >= 0); 1588 SimVRegister temp1, temp2; 1589 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1590 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1591 return ushl(vform, dst, extendedreg, shiftreg); 1592 } 1593 1594 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform, 1595 const LogicPRegister& pg, 1596 const LogicVRegister& src, 1597 int offset_from_last_active) { 1598 // Untested for any other values. 1599 VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1)); 1600 1601 int last_active = GetLastActive(vform, pg); 1602 int lane_count = LaneCountFromFormat(vform); 1603 int index = 1604 ((last_active + offset_from_last_active) + lane_count) % lane_count; 1605 return std::make_pair(last_active >= 0, src.Uint(vform, index)); 1606 } 1607 1608 LogicVRegister Simulator::compact(VectorFormat vform, 1609 LogicVRegister dst, 1610 const LogicPRegister& pg, 1611 const LogicVRegister& src) { 1612 int j = 0; 1613 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1614 if (pg.IsActive(vform, i)) { 1615 dst.SetUint(vform, j++, src.Uint(vform, i)); 1616 } 1617 } 1618 for (; j < LaneCountFromFormat(vform); j++) { 1619 dst.SetUint(vform, j, 0); 1620 } 1621 return dst; 1622 } 1623 1624 LogicVRegister Simulator::splice(VectorFormat vform, 1625 LogicVRegister dst, 1626 const LogicPRegister& pg, 1627 const LogicVRegister& src1, 1628 const LogicVRegister& src2) { 1629 int lane_count = LaneCountFromFormat(vform); 1630 int first_active = GetFirstActive(vform, pg); 1631 int last_active = GetLastActive(vform, pg); 1632 int dst_idx = 0; 1633 uint64_t result[kZRegMaxSizeInBytes]; 1634 1635 if (first_active >= 0) { 1636 VIXL_ASSERT(last_active >= first_active); 1637 VIXL_ASSERT(last_active < lane_count); 1638 for (int i = first_active; i <= last_active; i++) { 1639 result[dst_idx++] = src1.Uint(vform, i); 1640 } 1641 } 1642 1643 VIXL_ASSERT(dst_idx <= lane_count); 1644 for (int i = dst_idx; i < lane_count; i++) { 1645 result[i] = src2.Uint(vform, i - dst_idx); 1646 } 1647 1648 dst.SetUintArray(vform, result); 1649 1650 return dst; 1651 } 1652 1653 LogicVRegister Simulator::sel(VectorFormat vform, 1654 LogicVRegister dst, 1655 const SimPRegister& pg, 1656 const LogicVRegister& src1, 1657 const LogicVRegister& src2) { 1658 int p_reg_bits_per_lane = 1659 LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit; 1660 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { 1661 uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane) 1662 ? src1.Uint(vform, lane) 1663 : src2.Uint(vform, lane); 1664 dst.SetUint(vform, lane, lane_value); 1665 } 1666 return dst; 1667 } 1668 1669 1670 LogicPRegister Simulator::sel(LogicPRegister dst, 1671 const LogicPRegister& pg, 1672 const LogicPRegister& src1, 1673 const LogicPRegister& src2) { 1674 for (int i = 0; i < dst.GetChunkCount(); i++) { 1675 LogicPRegister::ChunkType mask = pg.GetChunk(i); 1676 LogicPRegister::ChunkType result = 1677 (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i)); 1678 dst.SetChunk(i, result); 1679 } 1680 return dst; 1681 } 1682 1683 1684 LogicVRegister Simulator::sli(VectorFormat vform, 1685 LogicVRegister dst, 1686 const LogicVRegister& src, 1687 int shift) { 1688 dst.ClearForWrite(vform); 1689 int lane_count = LaneCountFromFormat(vform); 1690 for (int i = 0; i < lane_count; i++) { 1691 uint64_t src_lane = src.Uint(vform, i); 1692 uint64_t dst_lane = dst.Uint(vform, i); 1693 uint64_t shifted = src_lane << shift; 1694 uint64_t mask = MaxUintFromFormat(vform) << shift; 1695 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1696 } 1697 return dst; 1698 } 1699 1700 1701 LogicVRegister Simulator::sqshl(VectorFormat vform, 1702 LogicVRegister dst, 1703 const LogicVRegister& src, 1704 int shift) { 1705 VIXL_ASSERT(shift >= 0); 1706 SimVRegister temp; 1707 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1708 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1709 } 1710 1711 1712 LogicVRegister Simulator::uqshl(VectorFormat vform, 1713 LogicVRegister dst, 1714 const LogicVRegister& src, 1715 int shift) { 1716 VIXL_ASSERT(shift >= 0); 1717 SimVRegister temp; 1718 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1719 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1720 } 1721 1722 1723 LogicVRegister Simulator::sqshlu(VectorFormat vform, 1724 LogicVRegister dst, 1725 const LogicVRegister& src, 1726 int shift) { 1727 VIXL_ASSERT(shift >= 0); 1728 SimVRegister temp; 1729 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1730 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1731 } 1732 1733 1734 LogicVRegister Simulator::sri(VectorFormat vform, 1735 LogicVRegister dst, 1736 const LogicVRegister& src, 1737 int shift) { 1738 dst.ClearForWrite(vform); 1739 int lane_count = LaneCountFromFormat(vform); 1740 VIXL_ASSERT((shift > 0) && 1741 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1742 for (int i = 0; i < lane_count; i++) { 1743 uint64_t src_lane = src.Uint(vform, i); 1744 uint64_t dst_lane = dst.Uint(vform, i); 1745 uint64_t shifted; 1746 uint64_t mask; 1747 if (shift == 64) { 1748 shifted = 0; 1749 mask = 0; 1750 } else { 1751 shifted = src_lane >> shift; 1752 mask = MaxUintFromFormat(vform) >> shift; 1753 } 1754 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1755 } 1756 return dst; 1757 } 1758 1759 1760 LogicVRegister Simulator::ushr(VectorFormat vform, 1761 LogicVRegister dst, 1762 const LogicVRegister& src, 1763 int shift) { 1764 VIXL_ASSERT(shift >= 0); 1765 SimVRegister temp; 1766 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1767 return ushl(vform, dst, src, shiftreg); 1768 } 1769 1770 1771 LogicVRegister Simulator::sshr(VectorFormat vform, 1772 LogicVRegister dst, 1773 const LogicVRegister& src, 1774 int shift) { 1775 VIXL_ASSERT(shift >= 0); 1776 SimVRegister temp; 1777 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1778 return sshl(vform, dst, src, shiftreg); 1779 } 1780 1781 1782 LogicVRegister Simulator::ssra(VectorFormat vform, 1783 LogicVRegister dst, 1784 const LogicVRegister& src, 1785 int shift) { 1786 SimVRegister temp; 1787 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1788 return add(vform, dst, dst, shifted_reg); 1789 } 1790 1791 1792 LogicVRegister Simulator::usra(VectorFormat vform, 1793 LogicVRegister dst, 1794 const LogicVRegister& src, 1795 int shift) { 1796 SimVRegister temp; 1797 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1798 return add(vform, dst, dst, shifted_reg); 1799 } 1800 1801 1802 LogicVRegister Simulator::srsra(VectorFormat vform, 1803 LogicVRegister dst, 1804 const LogicVRegister& src, 1805 int shift) { 1806 SimVRegister temp; 1807 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1808 return add(vform, dst, dst, shifted_reg); 1809 } 1810 1811 1812 LogicVRegister Simulator::ursra(VectorFormat vform, 1813 LogicVRegister dst, 1814 const LogicVRegister& src, 1815 int shift) { 1816 SimVRegister temp; 1817 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1818 return add(vform, dst, dst, shifted_reg); 1819 } 1820 1821 1822 LogicVRegister Simulator::cls(VectorFormat vform, 1823 LogicVRegister dst, 1824 const LogicVRegister& src) { 1825 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); 1826 int lane_count = LaneCountFromFormat(vform); 1827 1828 // Ensure that we can store one result per lane. 1829 int result[kZRegMaxSizeInBytes]; 1830 1831 for (int i = 0; i < lane_count; i++) { 1832 result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits); 1833 } 1834 1835 dst.ClearForWrite(vform); 1836 for (int i = 0; i < lane_count; ++i) { 1837 dst.SetUint(vform, i, result[i]); 1838 } 1839 return dst; 1840 } 1841 1842 1843 LogicVRegister Simulator::clz(VectorFormat vform, 1844 LogicVRegister dst, 1845 const LogicVRegister& src) { 1846 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); 1847 int lane_count = LaneCountFromFormat(vform); 1848 1849 // Ensure that we can store one result per lane. 1850 int result[kZRegMaxSizeInBytes]; 1851 1852 for (int i = 0; i < lane_count; i++) { 1853 result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits); 1854 } 1855 1856 dst.ClearForWrite(vform); 1857 for (int i = 0; i < lane_count; ++i) { 1858 dst.SetUint(vform, i, result[i]); 1859 } 1860 return dst; 1861 } 1862 1863 1864 LogicVRegister Simulator::cnot(VectorFormat vform, 1865 LogicVRegister dst, 1866 const LogicVRegister& src) { 1867 dst.ClearForWrite(vform); 1868 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1869 uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0; 1870 dst.SetUint(vform, i, value); 1871 } 1872 return dst; 1873 } 1874 1875 1876 LogicVRegister Simulator::cnt(VectorFormat vform, 1877 LogicVRegister dst, 1878 const LogicVRegister& src) { 1879 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); 1880 int lane_count = LaneCountFromFormat(vform); 1881 1882 // Ensure that we can store one result per lane. 1883 int result[kZRegMaxSizeInBytes]; 1884 1885 for (int i = 0; i < lane_count; i++) { 1886 result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits); 1887 } 1888 1889 dst.ClearForWrite(vform); 1890 for (int i = 0; i < lane_count; ++i) { 1891 dst.SetUint(vform, i, result[i]); 1892 } 1893 return dst; 1894 } 1895 1896 static int64_t CalculateSignedShiftDistance(int64_t shift_val, 1897 int esize, 1898 bool shift_in_ls_byte) { 1899 if (shift_in_ls_byte) { 1900 // Neon uses the least-significant byte of the lane as the shift distance. 1901 shift_val = ExtractSignedBitfield64(7, 0, shift_val); 1902 } else { 1903 // SVE uses a saturated shift distance in the range 1904 // -(esize + 1) ... (esize + 1). 1905 if (shift_val > (esize + 1)) shift_val = esize + 1; 1906 if (shift_val < -(esize + 1)) shift_val = -(esize + 1); 1907 } 1908 return shift_val; 1909 } 1910 1911 LogicVRegister Simulator::sshl(VectorFormat vform, 1912 LogicVRegister dst, 1913 const LogicVRegister& src1, 1914 const LogicVRegister& src2, 1915 bool shift_in_ls_byte) { 1916 dst.ClearForWrite(vform); 1917 int esize = LaneSizeInBitsFromFormat(vform); 1918 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1919 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i), 1920 esize, 1921 shift_in_ls_byte); 1922 1923 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1924 1925 // Set signed saturation state. 1926 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { 1927 dst.SetSignedSat(i, lj_src_val >= 0); 1928 } 1929 1930 // Set unsigned saturation state. 1931 if (lj_src_val < 0) { 1932 dst.SetUnsignedSat(i, false); 1933 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1934 (lj_src_val != 0)) { 1935 dst.SetUnsignedSat(i, true); 1936 } 1937 1938 int64_t src_val = src1.Int(vform, i); 1939 bool src_is_negative = src_val < 0; 1940 if (shift_val > 63) { 1941 dst.SetInt(vform, i, 0); 1942 } else if (shift_val < -63) { 1943 dst.SetRounding(i, src_is_negative); 1944 dst.SetInt(vform, i, src_is_negative ? -1 : 0); 1945 } else { 1946 // Use unsigned types for shifts, as behaviour is undefined for signed 1947 // lhs. 1948 uint64_t usrc_val = static_cast<uint64_t>(src_val); 1949 1950 if (shift_val < 0) { 1951 // Convert to right shift. 1952 shift_val = -shift_val; 1953 1954 // Set rounding state by testing most-significant bit shifted out. 1955 // Rounding only needed on right shifts. 1956 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { 1957 dst.SetRounding(i, true); 1958 } 1959 1960 usrc_val >>= shift_val; 1961 1962 if (src_is_negative) { 1963 // Simulate sign-extension. 1964 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); 1965 } 1966 } else { 1967 usrc_val <<= shift_val; 1968 } 1969 dst.SetUint(vform, i, usrc_val); 1970 } 1971 } 1972 return dst; 1973 } 1974 1975 1976 LogicVRegister Simulator::ushl(VectorFormat vform, 1977 LogicVRegister dst, 1978 const LogicVRegister& src1, 1979 const LogicVRegister& src2, 1980 bool shift_in_ls_byte) { 1981 dst.ClearForWrite(vform); 1982 int esize = LaneSizeInBitsFromFormat(vform); 1983 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1984 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i), 1985 esize, 1986 shift_in_ls_byte); 1987 1988 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1989 1990 // Set saturation state. 1991 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1992 dst.SetUnsignedSat(i, true); 1993 } 1994 1995 uint64_t src_val = src1.Uint(vform, i); 1996 if ((shift_val > 63) || (shift_val < -64)) { 1997 dst.SetUint(vform, i, 0); 1998 } else { 1999 if (shift_val < 0) { 2000 // Set rounding state. Rounding only needed on right shifts. 2001 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 2002 dst.SetRounding(i, true); 2003 } 2004 2005 if (shift_val == -64) { 2006 src_val = 0; 2007 } else { 2008 src_val >>= -shift_val; 2009 } 2010 } else { 2011 src_val <<= shift_val; 2012 } 2013 dst.SetUint(vform, i, src_val); 2014 } 2015 } 2016 return dst; 2017 } 2018 2019 LogicVRegister Simulator::sshr(VectorFormat vform, 2020 LogicVRegister dst, 2021 const LogicVRegister& src1, 2022 const LogicVRegister& src2) { 2023 SimVRegister temp; 2024 // Saturate to sidestep the min-int problem. 2025 neg(vform, temp, src2).SignedSaturate(vform); 2026 sshl(vform, dst, src1, temp, false); 2027 return dst; 2028 } 2029 2030 LogicVRegister Simulator::ushr(VectorFormat vform, 2031 LogicVRegister dst, 2032 const LogicVRegister& src1, 2033 const LogicVRegister& src2) { 2034 SimVRegister temp; 2035 // Saturate to sidestep the min-int problem. 2036 neg(vform, temp, src2).SignedSaturate(vform); 2037 ushl(vform, dst, src1, temp, false); 2038 return dst; 2039 } 2040 2041 LogicVRegister Simulator::neg(VectorFormat vform, 2042 LogicVRegister dst, 2043 const LogicVRegister& src) { 2044 dst.ClearForWrite(vform); 2045 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2046 // Test for signed saturation. 2047 int64_t sa = src.Int(vform, i); 2048 if (sa == MinIntFromFormat(vform)) { 2049 dst.SetSignedSat(i, true); 2050 } 2051 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 2052 } 2053 return dst; 2054 } 2055 2056 2057 LogicVRegister Simulator::suqadd(VectorFormat vform, 2058 LogicVRegister dst, 2059 const LogicVRegister& src1, 2060 const LogicVRegister& src2) { 2061 dst.ClearForWrite(vform); 2062 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2063 int64_t sa = src1.IntLeftJustified(vform, i); 2064 uint64_t ub = src2.UintLeftJustified(vform, i); 2065 uint64_t ur = sa + ub; 2066 2067 int64_t sr; 2068 memcpy(&sr, &ur, sizeof(sr)); 2069 if (sr < sa) { // Test for signed positive saturation. 2070 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 2071 } else { 2072 dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i)); 2073 } 2074 } 2075 return dst; 2076 } 2077 2078 2079 LogicVRegister Simulator::usqadd(VectorFormat vform, 2080 LogicVRegister dst, 2081 const LogicVRegister& src1, 2082 const LogicVRegister& src2) { 2083 dst.ClearForWrite(vform); 2084 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2085 uint64_t ua = src1.UintLeftJustified(vform, i); 2086 int64_t sb = src2.IntLeftJustified(vform, i); 2087 uint64_t ur = ua + sb; 2088 2089 if ((sb > 0) && (ur <= ua)) { 2090 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 2091 } else if ((sb < 0) && (ur >= ua)) { 2092 dst.SetUint(vform, i, 0); // Negative saturation. 2093 } else { 2094 dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i)); 2095 } 2096 } 2097 return dst; 2098 } 2099 2100 2101 LogicVRegister Simulator::abs(VectorFormat vform, 2102 LogicVRegister dst, 2103 const LogicVRegister& src) { 2104 dst.ClearForWrite(vform); 2105 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2106 // Test for signed saturation. 2107 int64_t sa = src.Int(vform, i); 2108 if (sa == MinIntFromFormat(vform)) { 2109 dst.SetSignedSat(i, true); 2110 } 2111 if (sa < 0) { 2112 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 2113 } else { 2114 dst.SetInt(vform, i, sa); 2115 } 2116 } 2117 return dst; 2118 } 2119 2120 2121 LogicVRegister Simulator::andv(VectorFormat vform, 2122 LogicVRegister dst, 2123 const LogicPRegister& pg, 2124 const LogicVRegister& src) { 2125 VIXL_ASSERT(IsSVEFormat(vform)); 2126 uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform)); 2127 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2128 if (!pg.IsActive(vform, i)) continue; 2129 2130 result &= src.Uint(vform, i); 2131 } 2132 VectorFormat vform_dst = 2133 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 2134 dst.ClearForWrite(vform_dst); 2135 dst.SetUint(vform_dst, 0, result); 2136 return dst; 2137 } 2138 2139 2140 LogicVRegister Simulator::eorv(VectorFormat vform, 2141 LogicVRegister dst, 2142 const LogicPRegister& pg, 2143 const LogicVRegister& src) { 2144 VIXL_ASSERT(IsSVEFormat(vform)); 2145 uint64_t result = 0; 2146 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2147 if (!pg.IsActive(vform, i)) continue; 2148 2149 result ^= src.Uint(vform, i); 2150 } 2151 VectorFormat vform_dst = 2152 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 2153 dst.ClearForWrite(vform_dst); 2154 dst.SetUint(vform_dst, 0, result); 2155 return dst; 2156 } 2157 2158 2159 LogicVRegister Simulator::orv(VectorFormat vform, 2160 LogicVRegister dst, 2161 const LogicPRegister& pg, 2162 const LogicVRegister& src) { 2163 VIXL_ASSERT(IsSVEFormat(vform)); 2164 uint64_t result = 0; 2165 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2166 if (!pg.IsActive(vform, i)) continue; 2167 2168 result |= src.Uint(vform, i); 2169 } 2170 VectorFormat vform_dst = 2171 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 2172 dst.ClearForWrite(vform_dst); 2173 dst.SetUint(vform_dst, 0, result); 2174 return dst; 2175 } 2176 2177 2178 LogicVRegister Simulator::saddv(VectorFormat vform, 2179 LogicVRegister dst, 2180 const LogicPRegister& pg, 2181 const LogicVRegister& src) { 2182 VIXL_ASSERT(IsSVEFormat(vform)); 2183 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize); 2184 int64_t result = 0; 2185 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2186 if (!pg.IsActive(vform, i)) continue; 2187 2188 // The destination register always has D-lane sizes and the source register 2189 // always has S-lanes or smaller, so signed integer overflow -- undefined 2190 // behaviour -- can't occur. 2191 result += src.Int(vform, i); 2192 } 2193 2194 dst.ClearForWrite(kFormatD); 2195 dst.SetInt(kFormatD, 0, result); 2196 return dst; 2197 } 2198 2199 2200 LogicVRegister Simulator::uaddv(VectorFormat vform, 2201 LogicVRegister dst, 2202 const LogicPRegister& pg, 2203 const LogicVRegister& src) { 2204 VIXL_ASSERT(IsSVEFormat(vform)); 2205 uint64_t result = 0; 2206 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2207 if (!pg.IsActive(vform, i)) continue; 2208 2209 result += src.Uint(vform, i); 2210 } 2211 2212 dst.ClearForWrite(kFormatD); 2213 dst.SetUint(kFormatD, 0, result); 2214 return dst; 2215 } 2216 2217 2218 LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 2219 LogicVRegister dst, 2220 bool dst_is_signed, 2221 const LogicVRegister& src, 2222 bool src_is_signed) { 2223 bool upperhalf = false; 2224 VectorFormat srcform = dstform; 2225 if ((dstform == kFormat16B) || (dstform == kFormat8H) || 2226 (dstform == kFormat4S)) { 2227 upperhalf = true; 2228 srcform = VectorFormatHalfLanes(srcform); 2229 } 2230 srcform = VectorFormatDoubleWidth(srcform); 2231 2232 LogicVRegister src_copy = src; 2233 2234 int offset; 2235 if (upperhalf) { 2236 offset = LaneCountFromFormat(dstform) / 2; 2237 } else { 2238 offset = 0; 2239 } 2240 2241 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2242 int64_t ssrc = src_copy.Int(srcform, i); 2243 uint64_t usrc = src_copy.Uint(srcform, i); 2244 2245 // Test for signed saturation 2246 if (ssrc > MaxIntFromFormat(dstform)) { 2247 dst.SetSignedSat(offset + i, true); 2248 } else if (ssrc < MinIntFromFormat(dstform)) { 2249 dst.SetSignedSat(offset + i, false); 2250 } 2251 2252 // Test for unsigned saturation 2253 if (src_is_signed) { 2254 if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 2255 dst.SetUnsignedSat(offset + i, true); 2256 } else if (ssrc < 0) { 2257 dst.SetUnsignedSat(offset + i, false); 2258 } 2259 } else { 2260 if (usrc > MaxUintFromFormat(dstform)) { 2261 dst.SetUnsignedSat(offset + i, true); 2262 } 2263 } 2264 2265 int64_t result; 2266 if (src_is_signed) { 2267 result = ssrc & MaxUintFromFormat(dstform); 2268 } else { 2269 result = usrc & MaxUintFromFormat(dstform); 2270 } 2271 2272 if (dst_is_signed) { 2273 dst.SetInt(dstform, offset + i, result); 2274 } else { 2275 dst.SetUint(dstform, offset + i, result); 2276 } 2277 } 2278 2279 if (upperhalf) { 2280 // Clear any bits beyond a Q register. 2281 dst.ClearForWrite(kFormat16B); 2282 } else { 2283 dst.ClearForWrite(dstform); 2284 } 2285 return dst; 2286 } 2287 2288 2289 LogicVRegister Simulator::xtn(VectorFormat vform, 2290 LogicVRegister dst, 2291 const LogicVRegister& src) { 2292 return extractnarrow(vform, dst, true, src, true); 2293 } 2294 2295 2296 LogicVRegister Simulator::sqxtn(VectorFormat vform, 2297 LogicVRegister dst, 2298 const LogicVRegister& src) { 2299 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2300 } 2301 2302 2303 LogicVRegister Simulator::sqxtun(VectorFormat vform, 2304 LogicVRegister dst, 2305 const LogicVRegister& src) { 2306 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2307 } 2308 2309 2310 LogicVRegister Simulator::uqxtn(VectorFormat vform, 2311 LogicVRegister dst, 2312 const LogicVRegister& src) { 2313 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2314 } 2315 2316 2317 LogicVRegister Simulator::absdiff(VectorFormat vform, 2318 LogicVRegister dst, 2319 const LogicVRegister& src1, 2320 const LogicVRegister& src2, 2321 bool is_signed) { 2322 dst.ClearForWrite(vform); 2323 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2324 bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i)) 2325 : (src1.Uint(vform, i) > src2.Uint(vform, i)); 2326 // Always calculate the answer using unsigned arithmetic, to avoid 2327 // implementation-defined signed overflow. 2328 if (src1_gt_src2) { 2329 dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i)); 2330 } else { 2331 dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i)); 2332 } 2333 } 2334 return dst; 2335 } 2336 2337 2338 LogicVRegister Simulator::saba(VectorFormat vform, 2339 LogicVRegister dst, 2340 const LogicVRegister& src1, 2341 const LogicVRegister& src2) { 2342 SimVRegister temp; 2343 dst.ClearForWrite(vform); 2344 absdiff(vform, temp, src1, src2, true); 2345 add(vform, dst, dst, temp); 2346 return dst; 2347 } 2348 2349 2350 LogicVRegister Simulator::uaba(VectorFormat vform, 2351 LogicVRegister dst, 2352 const LogicVRegister& src1, 2353 const LogicVRegister& src2) { 2354 SimVRegister temp; 2355 dst.ClearForWrite(vform); 2356 absdiff(vform, temp, src1, src2, false); 2357 add(vform, dst, dst, temp); 2358 return dst; 2359 } 2360 2361 2362 LogicVRegister Simulator::not_(VectorFormat vform, 2363 LogicVRegister dst, 2364 const LogicVRegister& src) { 2365 dst.ClearForWrite(vform); 2366 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2367 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2368 } 2369 return dst; 2370 } 2371 2372 2373 LogicVRegister Simulator::rbit(VectorFormat vform, 2374 LogicVRegister dst, 2375 const LogicVRegister& src) { 2376 uint64_t result[kZRegMaxSizeInBytes]; 2377 int lane_count = LaneCountFromFormat(vform); 2378 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform); 2379 uint64_t reversed_value; 2380 uint64_t value; 2381 for (int i = 0; i < lane_count; i++) { 2382 value = src.Uint(vform, i); 2383 reversed_value = 0; 2384 for (int j = 0; j < lane_size_in_bits; j++) { 2385 reversed_value = (reversed_value << 1) | (value & 1); 2386 value >>= 1; 2387 } 2388 result[i] = reversed_value; 2389 } 2390 2391 dst.ClearForWrite(vform); 2392 for (int i = 0; i < lane_count; ++i) { 2393 dst.SetUint(vform, i, result[i]); 2394 } 2395 return dst; 2396 } 2397 2398 2399 LogicVRegister Simulator::rev(VectorFormat vform, 2400 LogicVRegister dst, 2401 const LogicVRegister& src) { 2402 VIXL_ASSERT(IsSVEFormat(vform)); 2403 int lane_count = LaneCountFromFormat(vform); 2404 for (int i = 0; i < lane_count / 2; i++) { 2405 uint64_t t = src.Uint(vform, i); 2406 dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1)); 2407 dst.SetUint(vform, lane_count - i - 1, t); 2408 } 2409 return dst; 2410 } 2411 2412 2413 LogicVRegister Simulator::rev_byte(VectorFormat vform, 2414 LogicVRegister dst, 2415 const LogicVRegister& src, 2416 int rev_size) { 2417 uint64_t result[kZRegMaxSizeInBytes] = {}; 2418 int lane_count = LaneCountFromFormat(vform); 2419 int lane_size = LaneSizeInBytesFromFormat(vform); 2420 int lanes_per_loop = rev_size / lane_size; 2421 for (int i = 0; i < lane_count; i += lanes_per_loop) { 2422 for (int j = 0; j < lanes_per_loop; j++) { 2423 result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j); 2424 } 2425 } 2426 dst.ClearForWrite(vform); 2427 for (int i = 0; i < lane_count; ++i) { 2428 dst.SetUint(vform, i, result[i]); 2429 } 2430 return dst; 2431 } 2432 2433 2434 LogicVRegister Simulator::rev16(VectorFormat vform, 2435 LogicVRegister dst, 2436 const LogicVRegister& src) { 2437 return rev_byte(vform, dst, src, 2); 2438 } 2439 2440 2441 LogicVRegister Simulator::rev32(VectorFormat vform, 2442 LogicVRegister dst, 2443 const LogicVRegister& src) { 2444 return rev_byte(vform, dst, src, 4); 2445 } 2446 2447 2448 LogicVRegister Simulator::rev64(VectorFormat vform, 2449 LogicVRegister dst, 2450 const LogicVRegister& src) { 2451 return rev_byte(vform, dst, src, 8); 2452 } 2453 2454 LogicVRegister Simulator::addlp(VectorFormat vform, 2455 LogicVRegister dst, 2456 const LogicVRegister& src, 2457 bool is_signed, 2458 bool do_accumulate) { 2459 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2460 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize); 2461 2462 uint64_t result[kZRegMaxSizeInBytes]; 2463 int lane_count = LaneCountFromFormat(vform); 2464 for (int i = 0; i < lane_count; i++) { 2465 if (is_signed) { 2466 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + 2467 src.Int(vformsrc, 2 * i + 1)); 2468 } else { 2469 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2470 } 2471 } 2472 2473 dst.ClearForWrite(vform); 2474 for (int i = 0; i < lane_count; ++i) { 2475 if (do_accumulate) { 2476 result[i] += dst.Uint(vform, i); 2477 } 2478 dst.SetUint(vform, i, result[i]); 2479 } 2480 2481 return dst; 2482 } 2483 2484 2485 LogicVRegister Simulator::saddlp(VectorFormat vform, 2486 LogicVRegister dst, 2487 const LogicVRegister& src) { 2488 return addlp(vform, dst, src, true, false); 2489 } 2490 2491 2492 LogicVRegister Simulator::uaddlp(VectorFormat vform, 2493 LogicVRegister dst, 2494 const LogicVRegister& src) { 2495 return addlp(vform, dst, src, false, false); 2496 } 2497 2498 2499 LogicVRegister Simulator::sadalp(VectorFormat vform, 2500 LogicVRegister dst, 2501 const LogicVRegister& src) { 2502 return addlp(vform, dst, src, true, true); 2503 } 2504 2505 2506 LogicVRegister Simulator::uadalp(VectorFormat vform, 2507 LogicVRegister dst, 2508 const LogicVRegister& src) { 2509 return addlp(vform, dst, src, false, true); 2510 } 2511 2512 LogicVRegister Simulator::ror(VectorFormat vform, 2513 LogicVRegister dst, 2514 const LogicVRegister& src, 2515 int rotation) { 2516 int width = LaneSizeInBitsFromFormat(vform); 2517 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2518 uint64_t value = src.Uint(vform, i); 2519 dst.SetUint(vform, i, RotateRight(value, rotation, width)); 2520 } 2521 return dst; 2522 } 2523 2524 LogicVRegister Simulator::ext(VectorFormat vform, 2525 LogicVRegister dst, 2526 const LogicVRegister& src1, 2527 const LogicVRegister& src2, 2528 int index) { 2529 uint8_t result[kZRegMaxSizeInBytes] = {}; 2530 int lane_count = LaneCountFromFormat(vform); 2531 for (int i = 0; i < lane_count - index; ++i) { 2532 result[i] = src1.Uint(vform, i + index); 2533 } 2534 for (int i = 0; i < index; ++i) { 2535 result[lane_count - index + i] = src2.Uint(vform, i); 2536 } 2537 dst.ClearForWrite(vform); 2538 for (int i = 0; i < lane_count; ++i) { 2539 dst.SetUint(vform, i, result[i]); 2540 } 2541 return dst; 2542 } 2543 2544 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform, 2545 LogicVRegister dst, 2546 const LogicVRegister& src, 2547 int index) { 2548 if (index < 0) index += LaneCountFromFormat(vform); 2549 VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform))); 2550 index *= LaneSizeInBytesFromFormat(vform); 2551 return ext(kFormatVnB, dst, src, src, index); 2552 } 2553 2554 2555 template <typename T> 2556 LogicVRegister Simulator::fadda(VectorFormat vform, 2557 LogicVRegister acc, 2558 const LogicPRegister& pg, 2559 const LogicVRegister& src) { 2560 T result = acc.Float<T>(0); 2561 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2562 if (!pg.IsActive(vform, i)) continue; 2563 2564 result = FPAdd(result, src.Float<T>(i)); 2565 } 2566 VectorFormat vform_dst = 2567 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 2568 acc.ClearForWrite(vform_dst); 2569 acc.SetFloat(0, result); 2570 return acc; 2571 } 2572 2573 LogicVRegister Simulator::fadda(VectorFormat vform, 2574 LogicVRegister acc, 2575 const LogicPRegister& pg, 2576 const LogicVRegister& src) { 2577 switch (LaneSizeInBitsFromFormat(vform)) { 2578 case kHRegSize: 2579 fadda<SimFloat16>(vform, acc, pg, src); 2580 break; 2581 case kSRegSize: 2582 fadda<float>(vform, acc, pg, src); 2583 break; 2584 case kDRegSize: 2585 fadda<double>(vform, acc, pg, src); 2586 break; 2587 default: 2588 VIXL_UNREACHABLE(); 2589 } 2590 return acc; 2591 } 2592 2593 template <typename T> 2594 LogicVRegister Simulator::fcadd(VectorFormat vform, 2595 LogicVRegister dst, // d 2596 const LogicVRegister& src1, // n 2597 const LogicVRegister& src2, // m 2598 int rot) { 2599 int elements = LaneCountFromFormat(vform); 2600 2601 T element1, element3; 2602 rot = (rot == 1) ? 270 : 90; 2603 2604 // Loop example: 2605 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) 2606 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) 2607 2608 for (int e = 0; e <= (elements / 2) - 1; e++) { 2609 switch (rot) { 2610 case 90: 2611 element1 = FPNeg(src2.Float<T>(e * 2 + 1)); 2612 element3 = src2.Float<T>(e * 2); 2613 break; 2614 case 270: 2615 element1 = src2.Float<T>(e * 2 + 1); 2616 element3 = FPNeg(src2.Float<T>(e * 2)); 2617 break; 2618 default: 2619 VIXL_UNREACHABLE(); 2620 return dst; // prevents "element(n) may be unintialized" errors 2621 } 2622 dst.ClearForWrite(vform); 2623 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1)); 2624 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3)); 2625 } 2626 return dst; 2627 } 2628 2629 2630 LogicVRegister Simulator::fcadd(VectorFormat vform, 2631 LogicVRegister dst, // d 2632 const LogicVRegister& src1, // n 2633 const LogicVRegister& src2, // m 2634 int rot) { 2635 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 2636 fcadd<SimFloat16>(vform, dst, src1, src2, rot); 2637 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 2638 fcadd<float>(vform, dst, src1, src2, rot); 2639 } else { 2640 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 2641 fcadd<double>(vform, dst, src1, src2, rot); 2642 } 2643 return dst; 2644 } 2645 2646 template <typename T> 2647 LogicVRegister Simulator::fcmla(VectorFormat vform, 2648 LogicVRegister dst, 2649 const LogicVRegister& src1, 2650 const LogicVRegister& src2, 2651 const LogicVRegister& acc, 2652 int index, 2653 int rot) { 2654 int elements = LaneCountFromFormat(vform); 2655 2656 T element1, element2, element3, element4; 2657 rot *= 90; 2658 2659 // Loop example: 2660 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) 2661 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) 2662 2663 for (int e = 0; e <= (elements / 2) - 1; e++) { 2664 // Index == -1 indicates a vector/vector rather than vector/indexed-element 2665 // operation. 2666 int f = (index < 0) ? e : index; 2667 2668 switch (rot) { 2669 case 0: 2670 element1 = src2.Float<T>(f * 2); 2671 element2 = src1.Float<T>(e * 2); 2672 element3 = src2.Float<T>(f * 2 + 1); 2673 element4 = src1.Float<T>(e * 2); 2674 break; 2675 case 90: 2676 element1 = FPNeg(src2.Float<T>(f * 2 + 1)); 2677 element2 = src1.Float<T>(e * 2 + 1); 2678 element3 = src2.Float<T>(f * 2); 2679 element4 = src1.Float<T>(e * 2 + 1); 2680 break; 2681 case 180: 2682 element1 = FPNeg(src2.Float<T>(f * 2)); 2683 element2 = src1.Float<T>(e * 2); 2684 element3 = FPNeg(src2.Float<T>(f * 2 + 1)); 2685 element4 = src1.Float<T>(e * 2); 2686 break; 2687 case 270: 2688 element1 = src2.Float<T>(f * 2 + 1); 2689 element2 = src1.Float<T>(e * 2 + 1); 2690 element3 = FPNeg(src2.Float<T>(f * 2)); 2691 element4 = src1.Float<T>(e * 2 + 1); 2692 break; 2693 default: 2694 VIXL_UNREACHABLE(); 2695 return dst; // prevents "element(n) may be unintialized" errors 2696 } 2697 dst.ClearForWrite(vform); 2698 dst.SetFloat<T>(vform, 2699 e * 2, 2700 FPMulAdd(acc.Float<T>(e * 2), element2, element1)); 2701 dst.SetFloat<T>(vform, 2702 e * 2 + 1, 2703 FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3)); 2704 } 2705 return dst; 2706 } 2707 2708 LogicVRegister Simulator::fcmla(VectorFormat vform, 2709 LogicVRegister dst, 2710 const LogicVRegister& src1, 2711 const LogicVRegister& src2, 2712 const LogicVRegister& acc, 2713 int rot) { 2714 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 2715 fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot); 2716 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 2717 fcmla<float>(vform, dst, src1, src2, acc, -1, rot); 2718 } else { 2719 fcmla<double>(vform, dst, src1, src2, acc, -1, rot); 2720 } 2721 return dst; 2722 } 2723 2724 2725 LogicVRegister Simulator::fcmla(VectorFormat vform, 2726 LogicVRegister dst, // d 2727 const LogicVRegister& src1, // n 2728 const LogicVRegister& src2, // m 2729 int index, 2730 int rot) { 2731 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 2732 fcmla<SimFloat16>(vform, dst, src1, src2, dst, index, rot); 2733 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 2734 fcmla<float>(vform, dst, src1, src2, dst, index, rot); 2735 } else { 2736 fcmla<double>(vform, dst, src1, src2, dst, index, rot); 2737 } 2738 return dst; 2739 } 2740 2741 LogicVRegister Simulator::cadd(VectorFormat vform, 2742 LogicVRegister dst, 2743 const LogicVRegister& src1, 2744 const LogicVRegister& src2, 2745 int rot, 2746 bool saturate) { 2747 SimVRegister src1_r, src1_i; 2748 SimVRegister src2_r, src2_i; 2749 SimVRegister zero; 2750 zero.Clear(); 2751 uzp1(vform, src1_r, src1, zero); 2752 uzp2(vform, src1_i, src1, zero); 2753 uzp1(vform, src2_r, src2, zero); 2754 uzp2(vform, src2_i, src2, zero); 2755 2756 if (rot == 90) { 2757 if (saturate) { 2758 sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform); 2759 add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform); 2760 } else { 2761 sub(vform, src1_r, src1_r, src2_i); 2762 add(vform, src1_i, src1_i, src2_r); 2763 } 2764 } else { 2765 VIXL_ASSERT(rot == 270); 2766 if (saturate) { 2767 add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform); 2768 sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform); 2769 } else { 2770 add(vform, src1_r, src1_r, src2_i); 2771 sub(vform, src1_i, src1_i, src2_r); 2772 } 2773 } 2774 2775 zip1(vform, dst, src1_r, src1_i); 2776 return dst; 2777 } 2778 2779 LogicVRegister Simulator::cmla(VectorFormat vform, 2780 LogicVRegister dst, 2781 const LogicVRegister& srca, 2782 const LogicVRegister& src1, 2783 const LogicVRegister& src2, 2784 int rot) { 2785 SimVRegister src1_a; 2786 SimVRegister src2_a, src2_b; 2787 SimVRegister srca_i, srca_r; 2788 SimVRegister zero, temp; 2789 zero.Clear(); 2790 2791 if ((rot == 0) || (rot == 180)) { 2792 uzp1(vform, src1_a, src1, zero); 2793 uzp1(vform, src2_a, src2, zero); 2794 uzp2(vform, src2_b, src2, zero); 2795 } else { 2796 uzp2(vform, src1_a, src1, zero); 2797 uzp2(vform, src2_a, src2, zero); 2798 uzp1(vform, src2_b, src2, zero); 2799 } 2800 2801 uzp1(vform, srca_r, srca, zero); 2802 uzp2(vform, srca_i, srca, zero); 2803 2804 bool sub_r = (rot == 90) || (rot == 180); 2805 bool sub_i = (rot == 180) || (rot == 270); 2806 2807 mul(vform, temp, src1_a, src2_a); 2808 if (sub_r) { 2809 sub(vform, srca_r, srca_r, temp); 2810 } else { 2811 add(vform, srca_r, srca_r, temp); 2812 } 2813 2814 mul(vform, temp, src1_a, src2_b); 2815 if (sub_i) { 2816 sub(vform, srca_i, srca_i, temp); 2817 } else { 2818 add(vform, srca_i, srca_i, temp); 2819 } 2820 2821 zip1(vform, dst, srca_r, srca_i); 2822 return dst; 2823 } 2824 2825 LogicVRegister Simulator::cmla(VectorFormat vform, 2826 LogicVRegister dst, 2827 const LogicVRegister& srca, 2828 const LogicVRegister& src1, 2829 const LogicVRegister& src2, 2830 int index, 2831 int rot) { 2832 SimVRegister temp; 2833 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index); 2834 return cmla(vform, dst, srca, src1, temp, rot); 2835 } 2836 2837 LogicVRegister Simulator::bgrp(VectorFormat vform, 2838 LogicVRegister dst, 2839 const LogicVRegister& src1, 2840 const LogicVRegister& src2, 2841 bool do_bext) { 2842 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2843 uint64_t value = src1.Uint(vform, i); 2844 uint64_t mask = src2.Uint(vform, i); 2845 int high_pos = 0; 2846 int low_pos = 0; 2847 uint64_t result_high = 0; 2848 uint64_t result_low = 0; 2849 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) { 2850 if ((mask & 1) == 0) { 2851 result_high |= (value & 1) << high_pos; 2852 high_pos++; 2853 } else { 2854 result_low |= (value & 1) << low_pos; 2855 low_pos++; 2856 } 2857 mask >>= 1; 2858 value >>= 1; 2859 } 2860 2861 if (!do_bext) { 2862 result_low |= result_high << low_pos; 2863 } 2864 2865 dst.SetUint(vform, i, result_low); 2866 } 2867 return dst; 2868 } 2869 2870 LogicVRegister Simulator::bdep(VectorFormat vform, 2871 LogicVRegister dst, 2872 const LogicVRegister& src1, 2873 const LogicVRegister& src2) { 2874 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2875 uint64_t value = src1.Uint(vform, i); 2876 uint64_t mask = src2.Uint(vform, i); 2877 uint64_t result = 0; 2878 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) { 2879 if ((mask & 1) == 1) { 2880 result |= (value & 1) << j; 2881 value >>= 1; 2882 } 2883 mask >>= 1; 2884 } 2885 dst.SetUint(vform, i, result); 2886 } 2887 return dst; 2888 } 2889 2890 LogicVRegister Simulator::histogram(VectorFormat vform, 2891 LogicVRegister dst, 2892 const LogicPRegister& pg, 2893 const LogicVRegister& src1, 2894 const LogicVRegister& src2, 2895 bool do_segmented) { 2896 int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform); 2897 uint64_t result[kZRegMaxSizeInBytes]; 2898 2899 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2900 uint64_t count = 0; 2901 uint64_t value = src1.Uint(vform, i); 2902 2903 int segment = do_segmented ? (i / elements_per_segment) : 0; 2904 int segment_offset = segment * elements_per_segment; 2905 int hist_limit = do_segmented ? elements_per_segment : (i + 1); 2906 for (int j = 0; j < hist_limit; j++) { 2907 if (pg.IsActive(vform, j) && 2908 (value == src2.Uint(vform, j + segment_offset))) { 2909 count++; 2910 } 2911 } 2912 result[i] = count; 2913 } 2914 dst.SetUintArray(vform, result); 2915 return dst; 2916 } 2917 2918 LogicVRegister Simulator::dup_element(VectorFormat vform, 2919 LogicVRegister dst, 2920 const LogicVRegister& src, 2921 int src_index) { 2922 if ((vform == kFormatVnQ) || (vform == kFormatVnO)) { 2923 // When duplicating an element larger than 64 bits, split the element into 2924 // 64-bit parts, and duplicate the parts across the destination. 2925 uint64_t d[4]; 2926 int count = (vform == kFormatVnQ) ? 2 : 4; 2927 for (int i = 0; i < count; i++) { 2928 d[i] = src.Uint(kFormatVnD, (src_index * count) + i); 2929 } 2930 dst.Clear(); 2931 for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) { 2932 dst.SetUint(kFormatVnD, i, d[i % count]); 2933 } 2934 } else { 2935 int lane_count = LaneCountFromFormat(vform); 2936 uint64_t value = src.Uint(vform, src_index); 2937 dst.ClearForWrite(vform); 2938 for (int i = 0; i < lane_count; ++i) { 2939 dst.SetUint(vform, i, value); 2940 } 2941 } 2942 return dst; 2943 } 2944 2945 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform, 2946 LogicVRegister dst, 2947 const LogicVRegister& src, 2948 int src_index) { 2949 // In SVE, a segment is a 128-bit portion of a vector, like a Q register, 2950 // whereas in NEON, the size of segment is equal to the size of register 2951 // itself. 2952 int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform)); 2953 VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform))); 2954 int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform); 2955 2956 VIXL_ASSERT(src_index >= 0); 2957 VIXL_ASSERT(src_index < lanes_per_segment); 2958 2959 dst.ClearForWrite(vform); 2960 for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) { 2961 uint64_t value = src.Uint(vform, j + src_index); 2962 for (int i = 0; i < lanes_per_segment; i++) { 2963 dst.SetUint(vform, j + i, value); 2964 } 2965 } 2966 return dst; 2967 } 2968 2969 LogicVRegister Simulator::dup_elements_to_segments( 2970 VectorFormat vform, 2971 LogicVRegister dst, 2972 const std::pair<int, int>& src_and_index) { 2973 return dup_elements_to_segments(vform, 2974 dst, 2975 ReadVRegister(src_and_index.first), 2976 src_and_index.second); 2977 } 2978 2979 LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2980 LogicVRegister dst, 2981 uint64_t imm) { 2982 int lane_count = LaneCountFromFormat(vform); 2983 uint64_t value = imm & MaxUintFromFormat(vform); 2984 dst.ClearForWrite(vform); 2985 for (int i = 0; i < lane_count; ++i) { 2986 dst.SetUint(vform, i, value); 2987 } 2988 return dst; 2989 } 2990 2991 2992 LogicVRegister Simulator::ins_element(VectorFormat vform, 2993 LogicVRegister dst, 2994 int dst_index, 2995 const LogicVRegister& src, 2996 int src_index) { 2997 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2998 return dst; 2999 } 3000 3001 3002 LogicVRegister Simulator::ins_immediate(VectorFormat vform, 3003 LogicVRegister dst, 3004 int dst_index, 3005 uint64_t imm) { 3006 uint64_t value = imm & MaxUintFromFormat(vform); 3007 dst.SetUint(vform, dst_index, value); 3008 return dst; 3009 } 3010 3011 3012 LogicVRegister Simulator::index(VectorFormat vform, 3013 LogicVRegister dst, 3014 uint64_t start, 3015 uint64_t step) { 3016 VIXL_ASSERT(IsSVEFormat(vform)); 3017 uint64_t value = start; 3018 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3019 dst.SetUint(vform, i, value); 3020 value += step; 3021 } 3022 return dst; 3023 } 3024 3025 3026 LogicVRegister Simulator::insr(VectorFormat vform, 3027 LogicVRegister dst, 3028 uint64_t imm) { 3029 VIXL_ASSERT(IsSVEFormat(vform)); 3030 for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) { 3031 dst.SetUint(vform, i, dst.Uint(vform, i - 1)); 3032 } 3033 dst.SetUint(vform, 0, imm); 3034 return dst; 3035 } 3036 3037 3038 LogicVRegister Simulator::mov(VectorFormat vform, 3039 LogicVRegister dst, 3040 const LogicVRegister& src) { 3041 dst.ClearForWrite(vform); 3042 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { 3043 dst.SetUint(vform, lane, src.Uint(vform, lane)); 3044 } 3045 return dst; 3046 } 3047 3048 3049 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) { 3050 // Avoid a copy if the registers already alias. 3051 if (dst.Aliases(src)) return dst; 3052 3053 for (int i = 0; i < dst.GetChunkCount(); i++) { 3054 dst.SetChunk(i, src.GetChunk(i)); 3055 } 3056 return dst; 3057 } 3058 3059 3060 LogicVRegister Simulator::mov_merging(VectorFormat vform, 3061 LogicVRegister dst, 3062 const SimPRegister& pg, 3063 const LogicVRegister& src) { 3064 return sel(vform, dst, pg, src, dst); 3065 } 3066 3067 LogicVRegister Simulator::mov_zeroing(VectorFormat vform, 3068 LogicVRegister dst, 3069 const SimPRegister& pg, 3070 const LogicVRegister& src) { 3071 SimVRegister zero; 3072 dup_immediate(vform, zero, 0); 3073 return sel(vform, dst, pg, src, zero); 3074 } 3075 3076 LogicVRegister Simulator::mov_alternating(VectorFormat vform, 3077 LogicVRegister dst, 3078 const LogicVRegister& src, 3079 int start_at) { 3080 VIXL_ASSERT((start_at == 0) || (start_at == 1)); 3081 for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) { 3082 dst.SetUint(vform, i, src.Uint(vform, i)); 3083 } 3084 return dst; 3085 } 3086 3087 LogicPRegister Simulator::mov_merging(LogicPRegister dst, 3088 const LogicPRegister& pg, 3089 const LogicPRegister& src) { 3090 return sel(dst, pg, src, dst); 3091 } 3092 3093 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst, 3094 const LogicPRegister& pg, 3095 const LogicPRegister& src) { 3096 SimPRegister all_false; 3097 return sel(dst, pg, src, pfalse(all_false)); 3098 } 3099 3100 LogicVRegister Simulator::movi(VectorFormat vform, 3101 LogicVRegister dst, 3102 uint64_t imm) { 3103 int lane_count = LaneCountFromFormat(vform); 3104 dst.ClearForWrite(vform); 3105 for (int i = 0; i < lane_count; ++i) { 3106 dst.SetUint(vform, i, imm); 3107 } 3108 return dst; 3109 } 3110 3111 3112 LogicVRegister Simulator::mvni(VectorFormat vform, 3113 LogicVRegister dst, 3114 uint64_t imm) { 3115 int lane_count = LaneCountFromFormat(vform); 3116 dst.ClearForWrite(vform); 3117 for (int i = 0; i < lane_count; ++i) { 3118 dst.SetUint(vform, i, ~imm); 3119 } 3120 return dst; 3121 } 3122 3123 3124 LogicVRegister Simulator::orr(VectorFormat vform, 3125 LogicVRegister dst, 3126 const LogicVRegister& src, 3127 uint64_t imm) { 3128 uint64_t result[16]; 3129 int lane_count = LaneCountFromFormat(vform); 3130 for (int i = 0; i < lane_count; ++i) { 3131 result[i] = src.Uint(vform, i) | imm; 3132 } 3133 dst.ClearForWrite(vform); 3134 for (int i = 0; i < lane_count; ++i) { 3135 dst.SetUint(vform, i, result[i]); 3136 } 3137 return dst; 3138 } 3139 3140 3141 LogicVRegister Simulator::uxtl(VectorFormat vform, 3142 LogicVRegister dst, 3143 const LogicVRegister& src, 3144 bool is_2) { 3145 VectorFormat vform_half = VectorFormatHalfWidth(vform); 3146 int lane_count = LaneCountFromFormat(vform); 3147 int src_offset = is_2 ? lane_count : 0; 3148 3149 dst.ClearForWrite(vform); 3150 for (int i = 0; i < lane_count; i++) { 3151 dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i)); 3152 } 3153 return dst; 3154 } 3155 3156 3157 LogicVRegister Simulator::sxtl(VectorFormat vform, 3158 LogicVRegister dst, 3159 const LogicVRegister& src, 3160 bool is_2) { 3161 VectorFormat vform_half = VectorFormatHalfWidth(vform); 3162 int lane_count = LaneCountFromFormat(vform); 3163 int src_offset = is_2 ? lane_count : 0; 3164 3165 dst.ClearForWrite(vform); 3166 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3167 dst.SetInt(vform, i, src.Int(vform_half, src_offset + i)); 3168 } 3169 return dst; 3170 } 3171 3172 3173 LogicVRegister Simulator::uxtl2(VectorFormat vform, 3174 LogicVRegister dst, 3175 const LogicVRegister& src) { 3176 return uxtl(vform, dst, src, /* is_2 = */ true); 3177 } 3178 3179 3180 LogicVRegister Simulator::sxtl2(VectorFormat vform, 3181 LogicVRegister dst, 3182 const LogicVRegister& src) { 3183 return sxtl(vform, dst, src, /* is_2 = */ true); 3184 } 3185 3186 3187 LogicVRegister Simulator::uxt(VectorFormat vform, 3188 LogicVRegister dst, 3189 const LogicVRegister& src, 3190 unsigned from_size_in_bits) { 3191 int lane_count = LaneCountFromFormat(vform); 3192 uint64_t mask = GetUintMask(from_size_in_bits); 3193 3194 dst.ClearForWrite(vform); 3195 for (int i = 0; i < lane_count; i++) { 3196 dst.SetInt(vform, i, src.Uint(vform, i) & mask); 3197 } 3198 return dst; 3199 } 3200 3201 3202 LogicVRegister Simulator::sxt(VectorFormat vform, 3203 LogicVRegister dst, 3204 const LogicVRegister& src, 3205 unsigned from_size_in_bits) { 3206 int lane_count = LaneCountFromFormat(vform); 3207 3208 dst.ClearForWrite(vform); 3209 for (int i = 0; i < lane_count; i++) { 3210 uint64_t value = 3211 ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i)); 3212 dst.SetInt(vform, i, value); 3213 } 3214 return dst; 3215 } 3216 3217 3218 LogicVRegister Simulator::shrn(VectorFormat vform, 3219 LogicVRegister dst, 3220 const LogicVRegister& src, 3221 int shift) { 3222 SimVRegister temp; 3223 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 3224 VectorFormat vform_dst = vform; 3225 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 3226 return extractnarrow(vform_dst, dst, false, shifted_src, false); 3227 } 3228 3229 3230 LogicVRegister Simulator::shrn2(VectorFormat vform, 3231 LogicVRegister dst, 3232 const LogicVRegister& src, 3233 int shift) { 3234 SimVRegister temp; 3235 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 3236 VectorFormat vformdst = vform; 3237 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 3238 return extractnarrow(vformdst, dst, false, shifted_src, false); 3239 } 3240 3241 3242 LogicVRegister Simulator::rshrn(VectorFormat vform, 3243 LogicVRegister dst, 3244 const LogicVRegister& src, 3245 int shift) { 3246 SimVRegister temp; 3247 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 3248 VectorFormat vformdst = vform; 3249 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 3250 return extractnarrow(vformdst, dst, false, shifted_src, false); 3251 } 3252 3253 3254 LogicVRegister Simulator::rshrn2(VectorFormat vform, 3255 LogicVRegister dst, 3256 const LogicVRegister& src, 3257 int shift) { 3258 SimVRegister temp; 3259 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 3260 VectorFormat vformdst = vform; 3261 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 3262 return extractnarrow(vformdst, dst, false, shifted_src, false); 3263 } 3264 3265 LogicVRegister Simulator::Table(VectorFormat vform, 3266 LogicVRegister dst, 3267 const LogicVRegister& ind, 3268 bool zero_out_of_bounds, 3269 const LogicVRegister* tab1, 3270 const LogicVRegister* tab2, 3271 const LogicVRegister* tab3, 3272 const LogicVRegister* tab4) { 3273 VIXL_ASSERT(tab1 != NULL); 3274 int lane_count = LaneCountFromFormat(vform); 3275 VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16)); 3276 uint64_t table[kZRegMaxSizeInBytes * 2]; 3277 uint64_t result[kZRegMaxSizeInBytes]; 3278 3279 // For Neon, the table source registers are always 16B, and Neon allows only 3280 // 8B or 16B vform for the destination, so infer the table format from the 3281 // destination. 3282 VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform; 3283 3284 uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]); 3285 if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]); 3286 if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]); 3287 if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]); 3288 3289 for (int i = 0; i < lane_count; i++) { 3290 uint64_t index = ind.Uint(vform, i); 3291 result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i); 3292 if (index < tab_size) result[i] = table[index]; 3293 } 3294 dst.SetUintArray(vform, result); 3295 return dst; 3296 } 3297 3298 LogicVRegister Simulator::tbl(VectorFormat vform, 3299 LogicVRegister dst, 3300 const LogicVRegister& tab, 3301 const LogicVRegister& ind) { 3302 return Table(vform, dst, ind, true, &tab); 3303 } 3304 3305 3306 LogicVRegister Simulator::tbl(VectorFormat vform, 3307 LogicVRegister dst, 3308 const LogicVRegister& tab, 3309 const LogicVRegister& tab2, 3310 const LogicVRegister& ind) { 3311 return Table(vform, dst, ind, true, &tab, &tab2); 3312 } 3313 3314 3315 LogicVRegister Simulator::tbl(VectorFormat vform, 3316 LogicVRegister dst, 3317 const LogicVRegister& tab, 3318 const LogicVRegister& tab2, 3319 const LogicVRegister& tab3, 3320 const LogicVRegister& ind) { 3321 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); 3322 } 3323 3324 3325 LogicVRegister Simulator::tbl(VectorFormat vform, 3326 LogicVRegister dst, 3327 const LogicVRegister& tab, 3328 const LogicVRegister& tab2, 3329 const LogicVRegister& tab3, 3330 const LogicVRegister& tab4, 3331 const LogicVRegister& ind) { 3332 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); 3333 } 3334 3335 3336 LogicVRegister Simulator::tbx(VectorFormat vform, 3337 LogicVRegister dst, 3338 const LogicVRegister& tab, 3339 const LogicVRegister& ind) { 3340 return Table(vform, dst, ind, false, &tab); 3341 } 3342 3343 3344 LogicVRegister Simulator::tbx(VectorFormat vform, 3345 LogicVRegister dst, 3346 const LogicVRegister& tab, 3347 const LogicVRegister& tab2, 3348 const LogicVRegister& ind) { 3349 return Table(vform, dst, ind, false, &tab, &tab2); 3350 } 3351 3352 3353 LogicVRegister Simulator::tbx(VectorFormat vform, 3354 LogicVRegister dst, 3355 const LogicVRegister& tab, 3356 const LogicVRegister& tab2, 3357 const LogicVRegister& tab3, 3358 const LogicVRegister& ind) { 3359 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); 3360 } 3361 3362 3363 LogicVRegister Simulator::tbx(VectorFormat vform, 3364 LogicVRegister dst, 3365 const LogicVRegister& tab, 3366 const LogicVRegister& tab2, 3367 const LogicVRegister& tab3, 3368 const LogicVRegister& tab4, 3369 const LogicVRegister& ind) { 3370 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); 3371 } 3372 3373 3374 LogicVRegister Simulator::uqshrn(VectorFormat vform, 3375 LogicVRegister dst, 3376 const LogicVRegister& src, 3377 int shift) { 3378 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 3379 } 3380 3381 3382 LogicVRegister Simulator::uqshrn2(VectorFormat vform, 3383 LogicVRegister dst, 3384 const LogicVRegister& src, 3385 int shift) { 3386 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 3387 } 3388 3389 3390 LogicVRegister Simulator::uqrshrn(VectorFormat vform, 3391 LogicVRegister dst, 3392 const LogicVRegister& src, 3393 int shift) { 3394 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 3395 } 3396 3397 3398 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 3399 LogicVRegister dst, 3400 const LogicVRegister& src, 3401 int shift) { 3402 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 3403 } 3404 3405 3406 LogicVRegister Simulator::sqshrn(VectorFormat vform, 3407 LogicVRegister dst, 3408 const LogicVRegister& src, 3409 int shift) { 3410 SimVRegister temp; 3411 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 3412 VectorFormat vformdst = vform; 3413 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 3414 return sqxtn(vformdst, dst, shifted_src); 3415 } 3416 3417 3418 LogicVRegister Simulator::sqshrn2(VectorFormat vform, 3419 LogicVRegister dst, 3420 const LogicVRegister& src, 3421 int shift) { 3422 SimVRegister temp; 3423 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 3424 VectorFormat vformdst = vform; 3425 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 3426 return sqxtn(vformdst, dst, shifted_src); 3427 } 3428 3429 3430 LogicVRegister Simulator::sqrshrn(VectorFormat vform, 3431 LogicVRegister dst, 3432 const LogicVRegister& src, 3433 int shift) { 3434 SimVRegister temp; 3435 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 3436 VectorFormat vformdst = vform; 3437 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 3438 return sqxtn(vformdst, dst, shifted_src); 3439 } 3440 3441 3442 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 3443 LogicVRegister dst, 3444 const LogicVRegister& src, 3445 int shift) { 3446 SimVRegister temp; 3447 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 3448 VectorFormat vformdst = vform; 3449 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 3450 return sqxtn(vformdst, dst, shifted_src); 3451 } 3452 3453 3454 LogicVRegister Simulator::sqshrun(VectorFormat vform, 3455 LogicVRegister dst, 3456 const LogicVRegister& src, 3457 int shift) { 3458 SimVRegister temp; 3459 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 3460 VectorFormat vformdst = vform; 3461 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 3462 return sqxtun(vformdst, dst, shifted_src); 3463 } 3464 3465 3466 LogicVRegister Simulator::sqshrun2(VectorFormat vform, 3467 LogicVRegister dst, 3468 const LogicVRegister& src, 3469 int shift) { 3470 SimVRegister temp; 3471 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 3472 VectorFormat vformdst = vform; 3473 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 3474 return sqxtun(vformdst, dst, shifted_src); 3475 } 3476 3477 3478 LogicVRegister Simulator::sqrshrun(VectorFormat vform, 3479 LogicVRegister dst, 3480 const LogicVRegister& src, 3481 int shift) { 3482 SimVRegister temp; 3483 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 3484 VectorFormat vformdst = vform; 3485 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 3486 return sqxtun(vformdst, dst, shifted_src); 3487 } 3488 3489 3490 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 3491 LogicVRegister dst, 3492 const LogicVRegister& src, 3493 int shift) { 3494 SimVRegister temp; 3495 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 3496 VectorFormat vformdst = vform; 3497 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 3498 return sqxtun(vformdst, dst, shifted_src); 3499 } 3500 3501 3502 LogicVRegister Simulator::uaddl(VectorFormat vform, 3503 LogicVRegister dst, 3504 const LogicVRegister& src1, 3505 const LogicVRegister& src2) { 3506 SimVRegister temp1, temp2; 3507 uxtl(vform, temp1, src1); 3508 uxtl(vform, temp2, src2); 3509 add(vform, dst, temp1, temp2); 3510 return dst; 3511 } 3512 3513 3514 LogicVRegister Simulator::uaddl2(VectorFormat vform, 3515 LogicVRegister dst, 3516 const LogicVRegister& src1, 3517 const LogicVRegister& src2) { 3518 SimVRegister temp1, temp2; 3519 uxtl2(vform, temp1, src1); 3520 uxtl2(vform, temp2, src2); 3521 add(vform, dst, temp1, temp2); 3522 return dst; 3523 } 3524 3525 3526 LogicVRegister Simulator::uaddw(VectorFormat vform, 3527 LogicVRegister dst, 3528 const LogicVRegister& src1, 3529 const LogicVRegister& src2) { 3530 SimVRegister temp; 3531 uxtl(vform, temp, src2); 3532 add(vform, dst, src1, temp); 3533 return dst; 3534 } 3535 3536 3537 LogicVRegister Simulator::uaddw2(VectorFormat vform, 3538 LogicVRegister dst, 3539 const LogicVRegister& src1, 3540 const LogicVRegister& src2) { 3541 SimVRegister temp; 3542 uxtl2(vform, temp, src2); 3543 add(vform, dst, src1, temp); 3544 return dst; 3545 } 3546 3547 3548 LogicVRegister Simulator::saddl(VectorFormat vform, 3549 LogicVRegister dst, 3550 const LogicVRegister& src1, 3551 const LogicVRegister& src2) { 3552 SimVRegister temp1, temp2; 3553 sxtl(vform, temp1, src1); 3554 sxtl(vform, temp2, src2); 3555 add(vform, dst, temp1, temp2); 3556 return dst; 3557 } 3558 3559 3560 LogicVRegister Simulator::saddl2(VectorFormat vform, 3561 LogicVRegister dst, 3562 const LogicVRegister& src1, 3563 const LogicVRegister& src2) { 3564 SimVRegister temp1, temp2; 3565 sxtl2(vform, temp1, src1); 3566 sxtl2(vform, temp2, src2); 3567 add(vform, dst, temp1, temp2); 3568 return dst; 3569 } 3570 3571 3572 LogicVRegister Simulator::saddw(VectorFormat vform, 3573 LogicVRegister dst, 3574 const LogicVRegister& src1, 3575 const LogicVRegister& src2) { 3576 SimVRegister temp; 3577 sxtl(vform, temp, src2); 3578 add(vform, dst, src1, temp); 3579 return dst; 3580 } 3581 3582 3583 LogicVRegister Simulator::saddw2(VectorFormat vform, 3584 LogicVRegister dst, 3585 const LogicVRegister& src1, 3586 const LogicVRegister& src2) { 3587 SimVRegister temp; 3588 sxtl2(vform, temp, src2); 3589 add(vform, dst, src1, temp); 3590 return dst; 3591 } 3592 3593 3594 LogicVRegister Simulator::usubl(VectorFormat vform, 3595 LogicVRegister dst, 3596 const LogicVRegister& src1, 3597 const LogicVRegister& src2) { 3598 SimVRegister temp1, temp2; 3599 uxtl(vform, temp1, src1); 3600 uxtl(vform, temp2, src2); 3601 sub(vform, dst, temp1, temp2); 3602 return dst; 3603 } 3604 3605 3606 LogicVRegister Simulator::usubl2(VectorFormat vform, 3607 LogicVRegister dst, 3608 const LogicVRegister& src1, 3609 const LogicVRegister& src2) { 3610 SimVRegister temp1, temp2; 3611 uxtl2(vform, temp1, src1); 3612 uxtl2(vform, temp2, src2); 3613 sub(vform, dst, temp1, temp2); 3614 return dst; 3615 } 3616 3617 3618 LogicVRegister Simulator::usubw(VectorFormat vform, 3619 LogicVRegister dst, 3620 const LogicVRegister& src1, 3621 const LogicVRegister& src2) { 3622 SimVRegister temp; 3623 uxtl(vform, temp, src2); 3624 sub(vform, dst, src1, temp); 3625 return dst; 3626 } 3627 3628 3629 LogicVRegister Simulator::usubw2(VectorFormat vform, 3630 LogicVRegister dst, 3631 const LogicVRegister& src1, 3632 const LogicVRegister& src2) { 3633 SimVRegister temp; 3634 uxtl2(vform, temp, src2); 3635 sub(vform, dst, src1, temp); 3636 return dst; 3637 } 3638 3639 3640 LogicVRegister Simulator::ssubl(VectorFormat vform, 3641 LogicVRegister dst, 3642 const LogicVRegister& src1, 3643 const LogicVRegister& src2) { 3644 SimVRegister temp1, temp2; 3645 sxtl(vform, temp1, src1); 3646 sxtl(vform, temp2, src2); 3647 sub(vform, dst, temp1, temp2); 3648 return dst; 3649 } 3650 3651 3652 LogicVRegister Simulator::ssubl2(VectorFormat vform, 3653 LogicVRegister dst, 3654 const LogicVRegister& src1, 3655 const LogicVRegister& src2) { 3656 SimVRegister temp1, temp2; 3657 sxtl2(vform, temp1, src1); 3658 sxtl2(vform, temp2, src2); 3659 sub(vform, dst, temp1, temp2); 3660 return dst; 3661 } 3662 3663 3664 LogicVRegister Simulator::ssubw(VectorFormat vform, 3665 LogicVRegister dst, 3666 const LogicVRegister& src1, 3667 const LogicVRegister& src2) { 3668 SimVRegister temp; 3669 sxtl(vform, temp, src2); 3670 sub(vform, dst, src1, temp); 3671 return dst; 3672 } 3673 3674 3675 LogicVRegister Simulator::ssubw2(VectorFormat vform, 3676 LogicVRegister dst, 3677 const LogicVRegister& src1, 3678 const LogicVRegister& src2) { 3679 SimVRegister temp; 3680 sxtl2(vform, temp, src2); 3681 sub(vform, dst, src1, temp); 3682 return dst; 3683 } 3684 3685 3686 LogicVRegister Simulator::uabal(VectorFormat vform, 3687 LogicVRegister dst, 3688 const LogicVRegister& src1, 3689 const LogicVRegister& src2) { 3690 SimVRegister temp1, temp2; 3691 uxtl(vform, temp1, src1); 3692 uxtl(vform, temp2, src2); 3693 uaba(vform, dst, temp1, temp2); 3694 return dst; 3695 } 3696 3697 3698 LogicVRegister Simulator::uabal2(VectorFormat vform, 3699 LogicVRegister dst, 3700 const LogicVRegister& src1, 3701 const LogicVRegister& src2) { 3702 SimVRegister temp1, temp2; 3703 uxtl2(vform, temp1, src1); 3704 uxtl2(vform, temp2, src2); 3705 uaba(vform, dst, temp1, temp2); 3706 return dst; 3707 } 3708 3709 3710 LogicVRegister Simulator::sabal(VectorFormat vform, 3711 LogicVRegister dst, 3712 const LogicVRegister& src1, 3713 const LogicVRegister& src2) { 3714 SimVRegister temp1, temp2; 3715 sxtl(vform, temp1, src1); 3716 sxtl(vform, temp2, src2); 3717 saba(vform, dst, temp1, temp2); 3718 return dst; 3719 } 3720 3721 3722 LogicVRegister Simulator::sabal2(VectorFormat vform, 3723 LogicVRegister dst, 3724 const LogicVRegister& src1, 3725 const LogicVRegister& src2) { 3726 SimVRegister temp1, temp2; 3727 sxtl2(vform, temp1, src1); 3728 sxtl2(vform, temp2, src2); 3729 saba(vform, dst, temp1, temp2); 3730 return dst; 3731 } 3732 3733 3734 LogicVRegister Simulator::uabdl(VectorFormat vform, 3735 LogicVRegister dst, 3736 const LogicVRegister& src1, 3737 const LogicVRegister& src2) { 3738 SimVRegister temp1, temp2; 3739 uxtl(vform, temp1, src1); 3740 uxtl(vform, temp2, src2); 3741 absdiff(vform, dst, temp1, temp2, false); 3742 return dst; 3743 } 3744 3745 3746 LogicVRegister Simulator::uabdl2(VectorFormat vform, 3747 LogicVRegister dst, 3748 const LogicVRegister& src1, 3749 const LogicVRegister& src2) { 3750 SimVRegister temp1, temp2; 3751 uxtl2(vform, temp1, src1); 3752 uxtl2(vform, temp2, src2); 3753 absdiff(vform, dst, temp1, temp2, false); 3754 return dst; 3755 } 3756 3757 3758 LogicVRegister Simulator::sabdl(VectorFormat vform, 3759 LogicVRegister dst, 3760 const LogicVRegister& src1, 3761 const LogicVRegister& src2) { 3762 SimVRegister temp1, temp2; 3763 sxtl(vform, temp1, src1); 3764 sxtl(vform, temp2, src2); 3765 absdiff(vform, dst, temp1, temp2, true); 3766 return dst; 3767 } 3768 3769 3770 LogicVRegister Simulator::sabdl2(VectorFormat vform, 3771 LogicVRegister dst, 3772 const LogicVRegister& src1, 3773 const LogicVRegister& src2) { 3774 SimVRegister temp1, temp2; 3775 sxtl2(vform, temp1, src1); 3776 sxtl2(vform, temp2, src2); 3777 absdiff(vform, dst, temp1, temp2, true); 3778 return dst; 3779 } 3780 3781 3782 LogicVRegister Simulator::umull(VectorFormat vform, 3783 LogicVRegister dst, 3784 const LogicVRegister& src1, 3785 const LogicVRegister& src2, 3786 bool is_2) { 3787 SimVRegister temp1, temp2; 3788 uxtl(vform, temp1, src1, is_2); 3789 uxtl(vform, temp2, src2, is_2); 3790 mul(vform, dst, temp1, temp2); 3791 return dst; 3792 } 3793 3794 3795 LogicVRegister Simulator::umull2(VectorFormat vform, 3796 LogicVRegister dst, 3797 const LogicVRegister& src1, 3798 const LogicVRegister& src2) { 3799 return umull(vform, dst, src1, src2, /* is_2 = */ true); 3800 } 3801 3802 3803 LogicVRegister Simulator::smull(VectorFormat vform, 3804 LogicVRegister dst, 3805 const LogicVRegister& src1, 3806 const LogicVRegister& src2, 3807 bool is_2) { 3808 SimVRegister temp1, temp2; 3809 sxtl(vform, temp1, src1, is_2); 3810 sxtl(vform, temp2, src2, is_2); 3811 mul(vform, dst, temp1, temp2); 3812 return dst; 3813 } 3814 3815 3816 LogicVRegister Simulator::smull2(VectorFormat vform, 3817 LogicVRegister dst, 3818 const LogicVRegister& src1, 3819 const LogicVRegister& src2) { 3820 return smull(vform, dst, src1, src2, /* is_2 = */ true); 3821 } 3822 3823 3824 LogicVRegister Simulator::umlsl(VectorFormat vform, 3825 LogicVRegister dst, 3826 const LogicVRegister& src1, 3827 const LogicVRegister& src2, 3828 bool is_2) { 3829 SimVRegister temp1, temp2; 3830 uxtl(vform, temp1, src1, is_2); 3831 uxtl(vform, temp2, src2, is_2); 3832 mls(vform, dst, dst, temp1, temp2); 3833 return dst; 3834 } 3835 3836 3837 LogicVRegister Simulator::umlsl2(VectorFormat vform, 3838 LogicVRegister dst, 3839 const LogicVRegister& src1, 3840 const LogicVRegister& src2) { 3841 return umlsl(vform, dst, src1, src2, /* is_2 = */ true); 3842 } 3843 3844 3845 LogicVRegister Simulator::smlsl(VectorFormat vform, 3846 LogicVRegister dst, 3847 const LogicVRegister& src1, 3848 const LogicVRegister& src2, 3849 bool is_2) { 3850 SimVRegister temp1, temp2; 3851 sxtl(vform, temp1, src1, is_2); 3852 sxtl(vform, temp2, src2, is_2); 3853 mls(vform, dst, dst, temp1, temp2); 3854 return dst; 3855 } 3856 3857 3858 LogicVRegister Simulator::smlsl2(VectorFormat vform, 3859 LogicVRegister dst, 3860 const LogicVRegister& src1, 3861 const LogicVRegister& src2) { 3862 return smlsl(vform, dst, src1, src2, /* is_2 = */ true); 3863 } 3864 3865 3866 LogicVRegister Simulator::umlal(VectorFormat vform, 3867 LogicVRegister dst, 3868 const LogicVRegister& src1, 3869 const LogicVRegister& src2, 3870 bool is_2) { 3871 SimVRegister temp1, temp2; 3872 uxtl(vform, temp1, src1, is_2); 3873 uxtl(vform, temp2, src2, is_2); 3874 mla(vform, dst, dst, temp1, temp2); 3875 return dst; 3876 } 3877 3878 3879 LogicVRegister Simulator::umlal2(VectorFormat vform, 3880 LogicVRegister dst, 3881 const LogicVRegister& src1, 3882 const LogicVRegister& src2) { 3883 return umlal(vform, dst, src1, src2, /* is_2 = */ true); 3884 } 3885 3886 3887 LogicVRegister Simulator::smlal(VectorFormat vform, 3888 LogicVRegister dst, 3889 const LogicVRegister& src1, 3890 const LogicVRegister& src2, 3891 bool is_2) { 3892 SimVRegister temp1, temp2; 3893 sxtl(vform, temp1, src1, is_2); 3894 sxtl(vform, temp2, src2, is_2); 3895 mla(vform, dst, dst, temp1, temp2); 3896 return dst; 3897 } 3898 3899 3900 LogicVRegister Simulator::smlal2(VectorFormat vform, 3901 LogicVRegister dst, 3902 const LogicVRegister& src1, 3903 const LogicVRegister& src2) { 3904 return smlal(vform, dst, src1, src2, /* is_2 = */ true); 3905 } 3906 3907 3908 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3909 LogicVRegister dst, 3910 const LogicVRegister& src1, 3911 const LogicVRegister& src2, 3912 bool is_2) { 3913 SimVRegister temp; 3914 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2); 3915 return add(vform, dst, dst, product).SignedSaturate(vform); 3916 } 3917 3918 3919 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3920 LogicVRegister dst, 3921 const LogicVRegister& src1, 3922 const LogicVRegister& src2) { 3923 return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true); 3924 } 3925 3926 3927 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3928 LogicVRegister dst, 3929 const LogicVRegister& src1, 3930 const LogicVRegister& src2, 3931 bool is_2) { 3932 SimVRegister temp; 3933 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2); 3934 return sub(vform, dst, dst, product).SignedSaturate(vform); 3935 } 3936 3937 3938 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3939 LogicVRegister dst, 3940 const LogicVRegister& src1, 3941 const LogicVRegister& src2) { 3942 return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true); 3943 } 3944 3945 3946 LogicVRegister Simulator::sqdmull(VectorFormat vform, 3947 LogicVRegister dst, 3948 const LogicVRegister& src1, 3949 const LogicVRegister& src2, 3950 bool is_2) { 3951 SimVRegister temp; 3952 LogicVRegister product = smull(vform, temp, src1, src2, is_2); 3953 return add(vform, dst, product, product).SignedSaturate(vform); 3954 } 3955 3956 3957 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3958 LogicVRegister dst, 3959 const LogicVRegister& src1, 3960 const LogicVRegister& src2) { 3961 return sqdmull(vform, dst, src1, src2, /* is_2 = */ true); 3962 } 3963 3964 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3965 LogicVRegister dst, 3966 const LogicVRegister& src1, 3967 const LogicVRegister& src2, 3968 bool round) { 3969 int esize = LaneSizeInBitsFromFormat(vform); 3970 3971 SimVRegister temp_lo, temp_hi; 3972 3973 // Compute low and high multiplication results. 3974 mul(vform, temp_lo, src1, src2); 3975 smulh(vform, temp_hi, src1, src2); 3976 3977 // Double by shifting high half, and adding in most-significant bit of low 3978 // half. 3979 shl(vform, temp_hi, temp_hi, 1); 3980 usra(vform, temp_hi, temp_lo, esize - 1); 3981 3982 if (round) { 3983 // Add the second (due to doubling) most-significant bit of the low half 3984 // into the result. 3985 shl(vform, temp_lo, temp_lo, 1); 3986 usra(vform, temp_hi, temp_lo, esize - 1); 3987 } 3988 3989 SimPRegister not_sat; 3990 LogicPRegister ptemp(not_sat); 3991 dst.ClearForWrite(vform); 3992 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3993 // Saturation only occurs when src1 = src2 = minimum representable value. 3994 // Check this as a special case. 3995 ptemp.SetActive(vform, i, true); 3996 if ((src1.Int(vform, i) == MinIntFromFormat(vform)) && 3997 (src2.Int(vform, i) == MinIntFromFormat(vform))) { 3998 ptemp.SetActive(vform, i, false); 3999 } 4000 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 4001 } 4002 4003 mov_merging(vform, dst, not_sat, temp_hi); 4004 return dst; 4005 } 4006 4007 4008 LogicVRegister Simulator::dot(VectorFormat vform, 4009 LogicVRegister dst, 4010 const LogicVRegister& src1, 4011 const LogicVRegister& src2, 4012 bool is_src1_signed, 4013 bool is_src2_signed) { 4014 VectorFormat quarter_vform = 4015 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); 4016 4017 dst.ClearForWrite(vform); 4018 for (int e = 0; e < LaneCountFromFormat(vform); e++) { 4019 uint64_t result = 0; 4020 int64_t element1, element2; 4021 for (int i = 0; i < 4; i++) { 4022 int index = 4 * e + i; 4023 if (is_src1_signed) { 4024 element1 = src1.Int(quarter_vform, index); 4025 } else { 4026 element1 = src1.Uint(quarter_vform, index); 4027 } 4028 if (is_src2_signed) { 4029 element2 = src2.Int(quarter_vform, index); 4030 } else { 4031 element2 = src2.Uint(quarter_vform, index); 4032 } 4033 result += element1 * element2; 4034 } 4035 dst.SetUint(vform, e, result + dst.Uint(vform, e)); 4036 } 4037 return dst; 4038 } 4039 4040 4041 LogicVRegister Simulator::sdot(VectorFormat vform, 4042 LogicVRegister dst, 4043 const LogicVRegister& src1, 4044 const LogicVRegister& src2) { 4045 return dot(vform, dst, src1, src2, true, true); 4046 } 4047 4048 4049 LogicVRegister Simulator::udot(VectorFormat vform, 4050 LogicVRegister dst, 4051 const LogicVRegister& src1, 4052 const LogicVRegister& src2) { 4053 return dot(vform, dst, src1, src2, false, false); 4054 } 4055 4056 LogicVRegister Simulator::usdot(VectorFormat vform, 4057 LogicVRegister dst, 4058 const LogicVRegister& src1, 4059 const LogicVRegister& src2) { 4060 return dot(vform, dst, src1, src2, false, true); 4061 } 4062 4063 LogicVRegister Simulator::cdot(VectorFormat vform, 4064 LogicVRegister dst, 4065 const LogicVRegister& acc, 4066 const LogicVRegister& src1, 4067 const LogicVRegister& src2, 4068 int rot) { 4069 VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270)); 4070 VectorFormat quarter_vform = 4071 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); 4072 4073 int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1; 4074 int sel_b = 1 - sel_a; 4075 int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1; 4076 4077 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4078 int64_t result = acc.Int(vform, i); 4079 for (int j = 0; j < 2; j++) { 4080 int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0); 4081 int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1); 4082 int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a); 4083 int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b); 4084 result += (r1 * r2) + (sub_i * i1 * i2); 4085 } 4086 dst.SetInt(vform, i, result); 4087 } 4088 return dst; 4089 } 4090 4091 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform, 4092 LogicVRegister dst, 4093 const LogicVRegister& srca, 4094 const LogicVRegister& src1, 4095 const LogicVRegister& src2, 4096 int rot) { 4097 SimVRegister src1_a, src1_b; 4098 SimVRegister src2_a, src2_b; 4099 SimVRegister srca_i, srca_r; 4100 SimVRegister zero, temp; 4101 zero.Clear(); 4102 4103 if ((rot == 0) || (rot == 180)) { 4104 uzp1(vform, src1_a, src1, zero); 4105 uzp1(vform, src2_a, src2, zero); 4106 uzp2(vform, src2_b, src2, zero); 4107 } else { 4108 uzp2(vform, src1_a, src1, zero); 4109 uzp2(vform, src2_a, src2, zero); 4110 uzp1(vform, src2_b, src2, zero); 4111 } 4112 4113 uzp1(vform, srca_r, srca, zero); 4114 uzp2(vform, srca_i, srca, zero); 4115 4116 bool sub_r = (rot == 90) || (rot == 180); 4117 bool sub_i = (rot == 180) || (rot == 270); 4118 4119 const bool round = true; 4120 sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r); 4121 sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i); 4122 zip1(vform, dst, srca_r, srca_i); 4123 return dst; 4124 } 4125 4126 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform, 4127 LogicVRegister dst, 4128 const LogicVRegister& srca, 4129 const LogicVRegister& src1, 4130 const LogicVRegister& src2, 4131 int index, 4132 int rot) { 4133 SimVRegister temp; 4134 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index); 4135 return sqrdcmlah(vform, dst, srca, src1, temp, rot); 4136 } 4137 4138 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform, 4139 LogicVRegister dst, 4140 const LogicVRegister& src1, 4141 const LogicVRegister& src2, 4142 bool round, 4143 bool sub_op) { 4144 // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow. 4145 // To avoid this, we use: 4146 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 4147 // which is same as: 4148 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize. 4149 4150 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4151 int esize = kDRegSize; 4152 vixl_uint128_t round_const, accum; 4153 round_const.first = 0; 4154 if (round) { 4155 round_const.second = UINT64_C(1) << (esize - 2); 4156 } else { 4157 round_const.second = 0; 4158 } 4159 4160 dst.ClearForWrite(vform); 4161 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4162 // Shift the whole value left by `esize - 1` bits. 4163 accum.first = dst.Int(vform, i) >> 1; 4164 accum.second = dst.Int(vform, i) << (esize - 1); 4165 4166 vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i)); 4167 4168 if (sub_op) { 4169 product = Neg128(product); 4170 } 4171 accum = Add128(accum, product); 4172 4173 // Perform rounding. 4174 accum = Add128(accum, round_const); 4175 4176 // Arithmetic shift the whole value right by `esize - 1` bits. 4177 accum.second = (accum.first << 1) | (accum.second >> (esize - 1)); 4178 accum.first = -(accum.first >> (esize - 1)); 4179 4180 // Perform saturation. 4181 bool is_pos = (accum.first == 0) ? true : false; 4182 if (is_pos && 4183 (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) { 4184 accum.second = MaxIntFromFormat(vform); 4185 } else if (!is_pos && (accum.second < 4186 static_cast<uint64_t>(MinIntFromFormat(vform)))) { 4187 accum.second = MinIntFromFormat(vform); 4188 } 4189 4190 dst.SetInt(vform, i, accum.second); 4191 } 4192 4193 return dst; 4194 } 4195 4196 LogicVRegister Simulator::sqrdmlash(VectorFormat vform, 4197 LogicVRegister dst, 4198 const LogicVRegister& src1, 4199 const LogicVRegister& src2, 4200 bool round, 4201 bool sub_op) { 4202 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 4203 // To avoid this, we use: 4204 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 4205 // which is same as: 4206 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize. 4207 4208 if (vform == kFormatVnD) { 4209 return sqrdmlash_d(vform, dst, src1, src2, round, sub_op); 4210 } 4211 4212 int esize = LaneSizeInBitsFromFormat(vform); 4213 int round_const = round ? (1 << (esize - 2)) : 0; 4214 int64_t accum; 4215 4216 dst.ClearForWrite(vform); 4217 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4218 accum = dst.Int(vform, i) << (esize - 1); 4219 if (sub_op) { 4220 accum -= src1.Int(vform, i) * src2.Int(vform, i); 4221 } else { 4222 accum += src1.Int(vform, i) * src2.Int(vform, i); 4223 } 4224 accum += round_const; 4225 accum = accum >> (esize - 1); 4226 4227 if (accum > MaxIntFromFormat(vform)) { 4228 accum = MaxIntFromFormat(vform); 4229 } else if (accum < MinIntFromFormat(vform)) { 4230 accum = MinIntFromFormat(vform); 4231 } 4232 dst.SetInt(vform, i, accum); 4233 } 4234 return dst; 4235 } 4236 4237 4238 LogicVRegister Simulator::sqrdmlah(VectorFormat vform, 4239 LogicVRegister dst, 4240 const LogicVRegister& src1, 4241 const LogicVRegister& src2, 4242 bool round) { 4243 return sqrdmlash(vform, dst, src1, src2, round, false); 4244 } 4245 4246 4247 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, 4248 LogicVRegister dst, 4249 const LogicVRegister& src1, 4250 const LogicVRegister& src2, 4251 bool round) { 4252 return sqrdmlash(vform, dst, src1, src2, round, true); 4253 } 4254 4255 4256 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 4257 LogicVRegister dst, 4258 const LogicVRegister& src1, 4259 const LogicVRegister& src2) { 4260 return sqrdmulh(vform, dst, src1, src2, false); 4261 } 4262 4263 4264 LogicVRegister Simulator::addhn(VectorFormat vform, 4265 LogicVRegister dst, 4266 const LogicVRegister& src1, 4267 const LogicVRegister& src2) { 4268 SimVRegister temp; 4269 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 4270 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4271 return dst; 4272 } 4273 4274 4275 LogicVRegister Simulator::addhn2(VectorFormat vform, 4276 LogicVRegister dst, 4277 const LogicVRegister& src1, 4278 const LogicVRegister& src2) { 4279 SimVRegister temp; 4280 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 4281 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4282 return dst; 4283 } 4284 4285 4286 LogicVRegister Simulator::raddhn(VectorFormat vform, 4287 LogicVRegister dst, 4288 const LogicVRegister& src1, 4289 const LogicVRegister& src2) { 4290 SimVRegister temp; 4291 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 4292 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4293 return dst; 4294 } 4295 4296 4297 LogicVRegister Simulator::raddhn2(VectorFormat vform, 4298 LogicVRegister dst, 4299 const LogicVRegister& src1, 4300 const LogicVRegister& src2) { 4301 SimVRegister temp; 4302 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 4303 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4304 return dst; 4305 } 4306 4307 4308 LogicVRegister Simulator::subhn(VectorFormat vform, 4309 LogicVRegister dst, 4310 const LogicVRegister& src1, 4311 const LogicVRegister& src2) { 4312 SimVRegister temp; 4313 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 4314 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4315 return dst; 4316 } 4317 4318 4319 LogicVRegister Simulator::subhn2(VectorFormat vform, 4320 LogicVRegister dst, 4321 const LogicVRegister& src1, 4322 const LogicVRegister& src2) { 4323 SimVRegister temp; 4324 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 4325 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4326 return dst; 4327 } 4328 4329 4330 LogicVRegister Simulator::rsubhn(VectorFormat vform, 4331 LogicVRegister dst, 4332 const LogicVRegister& src1, 4333 const LogicVRegister& src2) { 4334 SimVRegister temp; 4335 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 4336 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4337 return dst; 4338 } 4339 4340 4341 LogicVRegister Simulator::rsubhn2(VectorFormat vform, 4342 LogicVRegister dst, 4343 const LogicVRegister& src1, 4344 const LogicVRegister& src2) { 4345 SimVRegister temp; 4346 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 4347 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 4348 return dst; 4349 } 4350 4351 4352 LogicVRegister Simulator::trn1(VectorFormat vform, 4353 LogicVRegister dst, 4354 const LogicVRegister& src1, 4355 const LogicVRegister& src2) { 4356 uint64_t result[kZRegMaxSizeInBytes] = {}; 4357 int lane_count = LaneCountFromFormat(vform); 4358 int pairs = lane_count / 2; 4359 for (int i = 0; i < pairs; ++i) { 4360 result[2 * i] = src1.Uint(vform, 2 * i); 4361 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 4362 } 4363 4364 dst.ClearForWrite(vform); 4365 for (int i = 0; i < lane_count; ++i) { 4366 dst.SetUint(vform, i, result[i]); 4367 } 4368 return dst; 4369 } 4370 4371 4372 LogicVRegister Simulator::trn2(VectorFormat vform, 4373 LogicVRegister dst, 4374 const LogicVRegister& src1, 4375 const LogicVRegister& src2) { 4376 uint64_t result[kZRegMaxSizeInBytes] = {}; 4377 int lane_count = LaneCountFromFormat(vform); 4378 int pairs = lane_count / 2; 4379 for (int i = 0; i < pairs; ++i) { 4380 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 4381 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 4382 } 4383 4384 dst.ClearForWrite(vform); 4385 for (int i = 0; i < lane_count; ++i) { 4386 dst.SetUint(vform, i, result[i]); 4387 } 4388 return dst; 4389 } 4390 4391 4392 LogicVRegister Simulator::zip1(VectorFormat vform, 4393 LogicVRegister dst, 4394 const LogicVRegister& src1, 4395 const LogicVRegister& src2) { 4396 uint64_t result[kZRegMaxSizeInBytes] = {}; 4397 int lane_count = LaneCountFromFormat(vform); 4398 int pairs = lane_count / 2; 4399 for (int i = 0; i < pairs; ++i) { 4400 result[2 * i] = src1.Uint(vform, i); 4401 result[(2 * i) + 1] = src2.Uint(vform, i); 4402 } 4403 4404 dst.ClearForWrite(vform); 4405 for (int i = 0; i < lane_count; ++i) { 4406 dst.SetUint(vform, i, result[i]); 4407 } 4408 return dst; 4409 } 4410 4411 4412 LogicVRegister Simulator::zip2(VectorFormat vform, 4413 LogicVRegister dst, 4414 const LogicVRegister& src1, 4415 const LogicVRegister& src2) { 4416 uint64_t result[kZRegMaxSizeInBytes] = {}; 4417 int lane_count = LaneCountFromFormat(vform); 4418 int pairs = lane_count / 2; 4419 for (int i = 0; i < pairs; ++i) { 4420 result[2 * i] = src1.Uint(vform, pairs + i); 4421 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 4422 } 4423 4424 dst.ClearForWrite(vform); 4425 for (int i = 0; i < lane_count; ++i) { 4426 dst.SetUint(vform, i, result[i]); 4427 } 4428 return dst; 4429 } 4430 4431 4432 LogicVRegister Simulator::uzp1(VectorFormat vform, 4433 LogicVRegister dst, 4434 const LogicVRegister& src1, 4435 const LogicVRegister& src2) { 4436 uint64_t result[kZRegMaxSizeInBytes * 2]; 4437 int lane_count = LaneCountFromFormat(vform); 4438 for (int i = 0; i < lane_count; ++i) { 4439 result[i] = src1.Uint(vform, i); 4440 result[lane_count + i] = src2.Uint(vform, i); 4441 } 4442 4443 dst.ClearForWrite(vform); 4444 for (int i = 0; i < lane_count; ++i) { 4445 dst.SetUint(vform, i, result[2 * i]); 4446 } 4447 return dst; 4448 } 4449 4450 4451 LogicVRegister Simulator::uzp2(VectorFormat vform, 4452 LogicVRegister dst, 4453 const LogicVRegister& src1, 4454 const LogicVRegister& src2) { 4455 uint64_t result[kZRegMaxSizeInBytes * 2]; 4456 int lane_count = LaneCountFromFormat(vform); 4457 for (int i = 0; i < lane_count; ++i) { 4458 result[i] = src1.Uint(vform, i); 4459 result[lane_count + i] = src2.Uint(vform, i); 4460 } 4461 4462 dst.ClearForWrite(vform); 4463 for (int i = 0; i < lane_count; ++i) { 4464 dst.SetUint(vform, i, result[(2 * i) + 1]); 4465 } 4466 return dst; 4467 } 4468 4469 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform, 4470 LogicVRegister dst, 4471 const LogicVRegister& src) { 4472 // Interleave the top and bottom half of a vector, ie. for a vector: 4473 // 4474 // [ ... | F | D | B | ... | E | C | A ] 4475 // 4476 // where B is the first element in the top half of the vector, produce a 4477 // result vector: 4478 // 4479 // [ ... | ... | F | E | D | C | B | A ] 4480 4481 uint64_t result[kZRegMaxSizeInBytes] = {}; 4482 int lane_count = LaneCountFromFormat(vform); 4483 for (int i = 0; i < lane_count; i += 2) { 4484 result[i] = src.Uint(vform, i / 2); 4485 result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2)); 4486 } 4487 dst.SetUintArray(vform, result); 4488 return dst; 4489 } 4490 4491 template <typename T> 4492 T Simulator::FPNeg(T op) { 4493 return -op; 4494 } 4495 4496 template <typename T> 4497 T Simulator::FPAdd(T op1, T op2) { 4498 T result = FPProcessNaNs(op1, op2); 4499 if (IsNaN(result)) { 4500 return result; 4501 } 4502 4503 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) { 4504 // inf + -inf returns the default NaN. 4505 FPProcessException(); 4506 return FPDefaultNaN<T>(); 4507 } else { 4508 // Other cases should be handled by standard arithmetic. 4509 return op1 + op2; 4510 } 4511 } 4512 4513 4514 template <typename T> 4515 T Simulator::FPSub(T op1, T op2) { 4516 // NaNs should be handled elsewhere. 4517 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); 4518 4519 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) { 4520 // inf - inf returns the default NaN. 4521 FPProcessException(); 4522 return FPDefaultNaN<T>(); 4523 } else { 4524 // Other cases should be handled by standard arithmetic. 4525 return op1 - op2; 4526 } 4527 } 4528 4529 template <typename T> 4530 T Simulator::FPMulNaNs(T op1, T op2) { 4531 T result = FPProcessNaNs(op1, op2); 4532 return IsNaN(result) ? result : FPMul(op1, op2); 4533 } 4534 4535 template <typename T> 4536 T Simulator::FPMul(T op1, T op2) { 4537 // NaNs should be handled elsewhere. 4538 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); 4539 4540 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) { 4541 // inf * 0.0 returns the default NaN. 4542 FPProcessException(); 4543 return FPDefaultNaN<T>(); 4544 } else { 4545 // Other cases should be handled by standard arithmetic. 4546 return op1 * op2; 4547 } 4548 } 4549 4550 4551 template <typename T> 4552 T Simulator::FPMulx(T op1, T op2) { 4553 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) { 4554 // inf * 0.0 returns +/-2.0. 4555 T two = 2.0; 4556 return copysign(1.0, op1) * copysign(1.0, op2) * two; 4557 } 4558 return FPMul(op1, op2); 4559 } 4560 4561 4562 template <typename T> 4563 T Simulator::FPMulAdd(T a, T op1, T op2) { 4564 T result = FPProcessNaNs3(a, op1, op2); 4565 4566 T sign_a = copysign(1.0, a); 4567 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 4568 bool isinf_prod = IsInf(op1) || IsInf(op2); 4569 bool operation_generates_nan = 4570 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0 4571 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf 4572 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 4573 4574 if (IsNaN(result)) { 4575 // Generated NaNs override quiet NaNs propagated from a. 4576 if (operation_generates_nan && IsQuietNaN(a)) { 4577 FPProcessException(); 4578 return FPDefaultNaN<T>(); 4579 } else { 4580 return result; 4581 } 4582 } 4583 4584 // If the operation would produce a NaN, return the default NaN. 4585 if (operation_generates_nan) { 4586 FPProcessException(); 4587 return FPDefaultNaN<T>(); 4588 } 4589 4590 // Work around broken fma implementations for exact zero results: The sign of 4591 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 4592 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 4593 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0; 4594 } 4595 4596 result = FusedMultiplyAdd(op1, op2, a); 4597 VIXL_ASSERT(!IsNaN(result)); 4598 4599 // Work around broken fma implementations for rounded zero results: If a is 4600 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 4601 if ((a == 0.0) && (result == 0.0)) { 4602 return copysign(0.0, sign_prod); 4603 } 4604 4605 return result; 4606 } 4607 4608 4609 template <typename T> 4610 T Simulator::FPDiv(T op1, T op2) { 4611 // NaNs should be handled elsewhere. 4612 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); 4613 4614 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 4615 // inf / inf and 0.0 / 0.0 return the default NaN. 4616 FPProcessException(); 4617 return FPDefaultNaN<T>(); 4618 } else { 4619 if (op2 == 0.0) { 4620 FPProcessException(); 4621 if (!IsNaN(op1)) { 4622 double op1_sign = copysign(1.0, op1); 4623 double op2_sign = copysign(1.0, op2); 4624 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); 4625 } 4626 } 4627 4628 // Other cases should be handled by standard arithmetic. 4629 return op1 / op2; 4630 } 4631 } 4632 4633 4634 template <typename T> 4635 T Simulator::FPSqrt(T op) { 4636 if (IsNaN(op)) { 4637 return FPProcessNaN(op); 4638 } else if (op < T(0.0)) { 4639 FPProcessException(); 4640 return FPDefaultNaN<T>(); 4641 } else { 4642 return sqrt(op); 4643 } 4644 } 4645 4646 4647 template <typename T> 4648 T Simulator::FPMax(T a, T b) { 4649 T result = FPProcessNaNs(a, b); 4650 if (IsNaN(result)) return result; 4651 4652 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 4653 // a and b are zero, and the sign differs: return +0.0. 4654 return 0.0; 4655 } else { 4656 return (a > b) ? a : b; 4657 } 4658 } 4659 4660 4661 template <typename T> 4662 T Simulator::FPMaxNM(T a, T b) { 4663 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 4664 a = kFP64NegativeInfinity; 4665 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 4666 b = kFP64NegativeInfinity; 4667 } 4668 4669 T result = FPProcessNaNs(a, b); 4670 return IsNaN(result) ? result : FPMax(a, b); 4671 } 4672 4673 4674 template <typename T> 4675 T Simulator::FPMin(T a, T b) { 4676 T result = FPProcessNaNs(a, b); 4677 if (IsNaN(result)) return result; 4678 4679 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 4680 // a and b are zero, and the sign differs: return -0.0. 4681 return -0.0; 4682 } else { 4683 return (a < b) ? a : b; 4684 } 4685 } 4686 4687 4688 template <typename T> 4689 T Simulator::FPMinNM(T a, T b) { 4690 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 4691 a = kFP64PositiveInfinity; 4692 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 4693 b = kFP64PositiveInfinity; 4694 } 4695 4696 T result = FPProcessNaNs(a, b); 4697 return IsNaN(result) ? result : FPMin(a, b); 4698 } 4699 4700 4701 template <typename T> 4702 T Simulator::FPRecipStepFused(T op1, T op2) { 4703 const T two = 2.0; 4704 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) { 4705 return two; 4706 } else if (IsInf(op1) || IsInf(op2)) { 4707 // Return +inf if signs match, otherwise -inf. 4708 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 4709 : kFP64NegativeInfinity; 4710 } else { 4711 return FusedMultiplyAdd(op1, op2, two); 4712 } 4713 } 4714 4715 template <typename T> 4716 bool IsNormal(T value) { 4717 return std::isnormal(value); 4718 } 4719 4720 template <> 4721 bool IsNormal(SimFloat16 value) { 4722 uint16_t rawbits = Float16ToRawbits(value); 4723 uint16_t exp_mask = 0x7c00; 4724 // Check that the exponent is neither all zeroes or all ones. 4725 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0); 4726 } 4727 4728 4729 template <typename T> 4730 T Simulator::FPRSqrtStepFused(T op1, T op2) { 4731 const T one_point_five = 1.5; 4732 const T two = 2.0; 4733 4734 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) { 4735 return one_point_five; 4736 } else if (IsInf(op1) || IsInf(op2)) { 4737 // Return +inf if signs match, otherwise -inf. 4738 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 4739 : kFP64NegativeInfinity; 4740 } else { 4741 // The multiply-add-halve operation must be fully fused, so avoid interim 4742 // rounding by checking which operand can be losslessly divided by two 4743 // before doing the multiply-add. 4744 if (IsNormal(op1 / two)) { 4745 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 4746 } else if (IsNormal(op2 / two)) { 4747 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 4748 } else { 4749 // Neither operand is normal after halving: the result is dominated by 4750 // the addition term, so just return that. 4751 return one_point_five; 4752 } 4753 } 4754 } 4755 4756 int32_t Simulator::FPToFixedJS(double value) { 4757 // The Z-flag is set when the conversion from double precision floating-point 4758 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN, 4759 // outside the bounds of a 32-bit integer, or isn't an exact integer then the 4760 // Z-flag is unset. 4761 int Z = 1; 4762 int32_t result; 4763 4764 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 4765 (value == kFP64NegativeInfinity)) { 4766 // +/- zero and infinity all return zero, however -0 and +/- Infinity also 4767 // unset the Z-flag. 4768 result = 0.0; 4769 if ((value != 0.0) || std::signbit(value)) { 4770 Z = 0; 4771 } 4772 } else if (std::isnan(value)) { 4773 // NaN values unset the Z-flag and set the result to 0. 4774 FPProcessNaN(value); 4775 result = 0; 4776 Z = 0; 4777 } else { 4778 // All other values are converted to an integer representation, rounded 4779 // toward zero. 4780 double int_result = std::floor(value); 4781 double error = value - int_result; 4782 4783 if ((error != 0.0) && (int_result < 0.0)) { 4784 int_result++; 4785 } 4786 4787 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost 4788 // write a one-liner with std::round, but the behaviour on ties is incorrect 4789 // for our purposes. 4790 double mod_const = static_cast<double>(UINT64_C(1) << 32); 4791 double mod_error = 4792 (int_result / mod_const) - std::floor(int_result / mod_const); 4793 double constrained; 4794 if (mod_error == 0.5) { 4795 constrained = INT32_MIN; 4796 } else { 4797 constrained = int_result - mod_const * round(int_result / mod_const); 4798 } 4799 4800 VIXL_ASSERT(std::floor(constrained) == constrained); 4801 VIXL_ASSERT(constrained >= INT32_MIN); 4802 VIXL_ASSERT(constrained <= INT32_MAX); 4803 4804 // Take the bottom 32 bits of the result as a 32-bit integer. 4805 result = static_cast<int32_t>(constrained); 4806 4807 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) || 4808 (error != 0.0)) { 4809 // If the integer result is out of range or the conversion isn't exact, 4810 // take exception and unset the Z-flag. 4811 FPProcessException(); 4812 Z = 0; 4813 } 4814 } 4815 4816 ReadNzcv().SetN(0); 4817 ReadNzcv().SetZ(Z); 4818 ReadNzcv().SetC(0); 4819 ReadNzcv().SetV(0); 4820 4821 return result; 4822 } 4823 4824 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) { 4825 VIXL_ASSERT((value != kFP64PositiveInfinity) && 4826 (value != kFP64NegativeInfinity)); 4827 VIXL_ASSERT(!IsNaN(value)); 4828 4829 double int_result = std::floor(value); 4830 double error = value - int_result; 4831 switch (round_mode) { 4832 case FPTieAway: { 4833 // Take care of correctly handling the range ]-0.5, -0.0], which must 4834 // yield -0.0. 4835 if ((-0.5 < value) && (value < 0.0)) { 4836 int_result = -0.0; 4837 4838 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 4839 // If the error is greater than 0.5, or is equal to 0.5 and the integer 4840 // result is positive, round up. 4841 int_result++; 4842 } 4843 break; 4844 } 4845 case FPTieEven: { 4846 // Take care of correctly handling the range [-0.5, -0.0], which must 4847 // yield -0.0. 4848 if ((-0.5 <= value) && (value < 0.0)) { 4849 int_result = -0.0; 4850 4851 // If the error is greater than 0.5, or is equal to 0.5 and the integer 4852 // result is odd, round up. 4853 } else if ((error > 0.5) || 4854 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 4855 int_result++; 4856 } 4857 break; 4858 } 4859 case FPZero: { 4860 // If value>0 then we take floor(value) 4861 // otherwise, ceil(value). 4862 if (value < 0) { 4863 int_result = ceil(value); 4864 } 4865 break; 4866 } 4867 case FPNegativeInfinity: { 4868 // We always use floor(value). 4869 break; 4870 } 4871 case FPPositiveInfinity: { 4872 // Take care of correctly handling the range ]-1.0, -0.0], which must 4873 // yield -0.0. 4874 if ((-1.0 < value) && (value < 0.0)) { 4875 int_result = -0.0; 4876 4877 // If the error is non-zero, round up. 4878 } else if (error > 0.0) { 4879 int_result++; 4880 } 4881 break; 4882 } 4883 default: 4884 VIXL_UNIMPLEMENTED(); 4885 } 4886 return int_result; 4887 } 4888 4889 double Simulator::FPRoundInt(double value, FPRounding round_mode) { 4890 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 4891 (value == kFP64NegativeInfinity)) { 4892 return value; 4893 } else if (IsNaN(value)) { 4894 return FPProcessNaN(value); 4895 } 4896 return FPRoundIntCommon(value, round_mode); 4897 } 4898 4899 double Simulator::FPRoundInt(double value, 4900 FPRounding round_mode, 4901 FrintMode frint_mode) { 4902 if (frint_mode == kFrintToInteger) { 4903 return FPRoundInt(value, round_mode); 4904 } 4905 4906 VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64)); 4907 4908 if (value == 0.0) { 4909 return value; 4910 } 4911 4912 if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) || 4913 IsNaN(value)) { 4914 if (frint_mode == kFrintToInt32) { 4915 return INT32_MIN; 4916 } else { 4917 return INT64_MIN; 4918 } 4919 } 4920 4921 double result = FPRoundIntCommon(value, round_mode); 4922 4923 // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly 4924 // representable as a double, and is rounded to (INT64_MAX + 1) when 4925 // converted. To avoid this, we compare `result >= int64_max_plus_one` 4926 // instead; this is safe because `result` is known to be integral, and 4927 // `int64_max_plus_one` is exactly representable as a double. 4928 constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1; 4929 VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>( 4930 int64_max_plus_one)) == int64_max_plus_one); 4931 4932 if (frint_mode == kFrintToInt32) { 4933 if ((result > INT32_MAX) || (result < INT32_MIN)) { 4934 return INT32_MIN; 4935 } 4936 } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) { 4937 return INT64_MIN; 4938 } 4939 4940 return result; 4941 } 4942 4943 int16_t Simulator::FPToInt16(double value, FPRounding rmode) { 4944 value = FPRoundInt(value, rmode); 4945 if (value >= kHMaxInt) { 4946 return kHMaxInt; 4947 } else if (value < kHMinInt) { 4948 return kHMinInt; 4949 } 4950 return IsNaN(value) ? 0 : static_cast<int16_t>(value); 4951 } 4952 4953 4954 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 4955 value = FPRoundInt(value, rmode); 4956 if (value >= kWMaxInt) { 4957 return kWMaxInt; 4958 } else if (value < kWMinInt) { 4959 return kWMinInt; 4960 } 4961 return IsNaN(value) ? 0 : static_cast<int32_t>(value); 4962 } 4963 4964 4965 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 4966 value = FPRoundInt(value, rmode); 4967 // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues 4968 // as a result of kMaxInt not being representable as a double. 4969 if (value >= 9223372036854775808.) { 4970 return kXMaxInt; 4971 } else if (value < kXMinInt) { 4972 return kXMinInt; 4973 } 4974 return IsNaN(value) ? 0 : static_cast<int64_t>(value); 4975 } 4976 4977 4978 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) { 4979 value = FPRoundInt(value, rmode); 4980 if (value >= kHMaxUInt) { 4981 return kHMaxUInt; 4982 } else if (value < 0.0) { 4983 return 0; 4984 } 4985 return IsNaN(value) ? 0 : static_cast<uint16_t>(value); 4986 } 4987 4988 4989 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 4990 value = FPRoundInt(value, rmode); 4991 if (value >= kWMaxUInt) { 4992 return kWMaxUInt; 4993 } else if (value < 0.0) { 4994 return 0; 4995 } 4996 return IsNaN(value) ? 0 : static_cast<uint32_t>(value); 4997 } 4998 4999 5000 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 5001 value = FPRoundInt(value, rmode); 5002 // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues 5003 // as a result of kMaxUInt not being representable as a double. 5004 if (value >= 18446744073709551616.) { 5005 return kXMaxUInt; 5006 } else if (value < 0.0) { 5007 return 0; 5008 } 5009 return IsNaN(value) ? 0 : static_cast<uint64_t>(value); 5010 } 5011 5012 5013 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 5014 template <typename T> \ 5015 LogicVRegister Simulator::FN(VectorFormat vform, \ 5016 LogicVRegister dst, \ 5017 const LogicVRegister& src1, \ 5018 const LogicVRegister& src2) { \ 5019 dst.ClearForWrite(vform); \ 5020 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 5021 T op1 = src1.Float<T>(i); \ 5022 T op2 = src2.Float<T>(i); \ 5023 T result; \ 5024 if (PROCNAN) { \ 5025 result = FPProcessNaNs(op1, op2); \ 5026 if (!IsNaN(result)) { \ 5027 result = OP(op1, op2); \ 5028 } \ 5029 } else { \ 5030 result = OP(op1, op2); \ 5031 } \ 5032 dst.SetFloat(vform, i, result); \ 5033 } \ 5034 return dst; \ 5035 } \ 5036 \ 5037 LogicVRegister Simulator::FN(VectorFormat vform, \ 5038 LogicVRegister dst, \ 5039 const LogicVRegister& src1, \ 5040 const LogicVRegister& src2) { \ 5041 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \ 5042 FN<SimFloat16>(vform, dst, src1, src2); \ 5043 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 5044 FN<float>(vform, dst, src1, src2); \ 5045 } else { \ 5046 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 5047 FN<double>(vform, dst, src1, src2); \ 5048 } \ 5049 return dst; \ 5050 } 5051 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 5052 #undef DEFINE_NEON_FP_VECTOR_OP 5053 5054 5055 LogicVRegister Simulator::fnmul(VectorFormat vform, 5056 LogicVRegister dst, 5057 const LogicVRegister& src1, 5058 const LogicVRegister& src2) { 5059 SimVRegister temp; 5060 LogicVRegister product = fmul(vform, temp, src1, src2); 5061 return fneg(vform, dst, product); 5062 } 5063 5064 5065 template <typename T> 5066 LogicVRegister Simulator::frecps(VectorFormat vform, 5067 LogicVRegister dst, 5068 const LogicVRegister& src1, 5069 const LogicVRegister& src2) { 5070 dst.ClearForWrite(vform); 5071 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5072 T op1 = -src1.Float<T>(i); 5073 T op2 = src2.Float<T>(i); 5074 T result = FPProcessNaNs(op1, op2); 5075 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2)); 5076 } 5077 return dst; 5078 } 5079 5080 5081 LogicVRegister Simulator::frecps(VectorFormat vform, 5082 LogicVRegister dst, 5083 const LogicVRegister& src1, 5084 const LogicVRegister& src2) { 5085 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5086 frecps<SimFloat16>(vform, dst, src1, src2); 5087 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5088 frecps<float>(vform, dst, src1, src2); 5089 } else { 5090 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5091 frecps<double>(vform, dst, src1, src2); 5092 } 5093 return dst; 5094 } 5095 5096 5097 template <typename T> 5098 LogicVRegister Simulator::frsqrts(VectorFormat vform, 5099 LogicVRegister dst, 5100 const LogicVRegister& src1, 5101 const LogicVRegister& src2) { 5102 dst.ClearForWrite(vform); 5103 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5104 T op1 = -src1.Float<T>(i); 5105 T op2 = src2.Float<T>(i); 5106 T result = FPProcessNaNs(op1, op2); 5107 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2)); 5108 } 5109 return dst; 5110 } 5111 5112 5113 LogicVRegister Simulator::frsqrts(VectorFormat vform, 5114 LogicVRegister dst, 5115 const LogicVRegister& src1, 5116 const LogicVRegister& src2) { 5117 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5118 frsqrts<SimFloat16>(vform, dst, src1, src2); 5119 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5120 frsqrts<float>(vform, dst, src1, src2); 5121 } else { 5122 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5123 frsqrts<double>(vform, dst, src1, src2); 5124 } 5125 return dst; 5126 } 5127 5128 5129 template <typename T> 5130 LogicVRegister Simulator::fcmp(VectorFormat vform, 5131 LogicVRegister dst, 5132 const LogicVRegister& src1, 5133 const LogicVRegister& src2, 5134 Condition cond) { 5135 dst.ClearForWrite(vform); 5136 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5137 bool result = false; 5138 T op1 = src1.Float<T>(i); 5139 T op2 = src2.Float<T>(i); 5140 bool unordered = IsNaN(FPProcessNaNs(op1, op2)); 5141 5142 switch (cond) { 5143 case eq: 5144 result = (op1 == op2); 5145 break; 5146 case ge: 5147 result = (op1 >= op2); 5148 break; 5149 case gt: 5150 result = (op1 > op2); 5151 break; 5152 case le: 5153 result = (op1 <= op2); 5154 break; 5155 case lt: 5156 result = (op1 < op2); 5157 break; 5158 case ne: 5159 result = (op1 != op2); 5160 break; 5161 case uo: 5162 result = unordered; 5163 break; 5164 default: 5165 // Other conditions are defined in terms of those above. 5166 VIXL_UNREACHABLE(); 5167 break; 5168 } 5169 5170 if (result && unordered) { 5171 // Only `uo` and `ne` can be true for unordered comparisons. 5172 VIXL_ASSERT((cond == uo) || (cond == ne)); 5173 } 5174 5175 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 5176 } 5177 return dst; 5178 } 5179 5180 5181 LogicVRegister Simulator::fcmp(VectorFormat vform, 5182 LogicVRegister dst, 5183 const LogicVRegister& src1, 5184 const LogicVRegister& src2, 5185 Condition cond) { 5186 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5187 fcmp<SimFloat16>(vform, dst, src1, src2, cond); 5188 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5189 fcmp<float>(vform, dst, src1, src2, cond); 5190 } else { 5191 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5192 fcmp<double>(vform, dst, src1, src2, cond); 5193 } 5194 return dst; 5195 } 5196 5197 5198 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 5199 LogicVRegister dst, 5200 const LogicVRegister& src, 5201 Condition cond) { 5202 SimVRegister temp; 5203 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5204 LogicVRegister zero_reg = 5205 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0))); 5206 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond); 5207 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5208 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); 5209 fcmp<float>(vform, dst, src, zero_reg, cond); 5210 } else { 5211 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5212 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); 5213 fcmp<double>(vform, dst, src, zero_reg, cond); 5214 } 5215 return dst; 5216 } 5217 5218 5219 LogicVRegister Simulator::fabscmp(VectorFormat vform, 5220 LogicVRegister dst, 5221 const LogicVRegister& src1, 5222 const LogicVRegister& src2, 5223 Condition cond) { 5224 SimVRegister temp1, temp2; 5225 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5226 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1); 5227 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2); 5228 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond); 5229 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5230 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 5231 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 5232 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 5233 } else { 5234 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5235 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 5236 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 5237 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 5238 } 5239 return dst; 5240 } 5241 5242 5243 template <typename T> 5244 LogicVRegister Simulator::fmla(VectorFormat vform, 5245 LogicVRegister dst, 5246 const LogicVRegister& srca, 5247 const LogicVRegister& src1, 5248 const LogicVRegister& src2) { 5249 dst.ClearForWrite(vform); 5250 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5251 T op1 = src1.Float<T>(i); 5252 T op2 = src2.Float<T>(i); 5253 T acc = srca.Float<T>(i); 5254 T result = FPMulAdd(acc, op1, op2); 5255 dst.SetFloat(vform, i, result); 5256 } 5257 return dst; 5258 } 5259 5260 5261 LogicVRegister Simulator::fmla(VectorFormat vform, 5262 LogicVRegister dst, 5263 const LogicVRegister& srca, 5264 const LogicVRegister& src1, 5265 const LogicVRegister& src2) { 5266 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5267 fmla<SimFloat16>(vform, dst, srca, src1, src2); 5268 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5269 fmla<float>(vform, dst, srca, src1, src2); 5270 } else { 5271 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5272 fmla<double>(vform, dst, srca, src1, src2); 5273 } 5274 return dst; 5275 } 5276 5277 5278 template <typename T> 5279 LogicVRegister Simulator::fmls(VectorFormat vform, 5280 LogicVRegister dst, 5281 const LogicVRegister& srca, 5282 const LogicVRegister& src1, 5283 const LogicVRegister& src2) { 5284 dst.ClearForWrite(vform); 5285 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5286 T op1 = -src1.Float<T>(i); 5287 T op2 = src2.Float<T>(i); 5288 T acc = srca.Float<T>(i); 5289 T result = FPMulAdd(acc, op1, op2); 5290 dst.SetFloat(i, result); 5291 } 5292 return dst; 5293 } 5294 5295 5296 LogicVRegister Simulator::fmls(VectorFormat vform, 5297 LogicVRegister dst, 5298 const LogicVRegister& srca, 5299 const LogicVRegister& src1, 5300 const LogicVRegister& src2) { 5301 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5302 fmls<SimFloat16>(vform, dst, srca, src1, src2); 5303 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5304 fmls<float>(vform, dst, srca, src1, src2); 5305 } else { 5306 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5307 fmls<double>(vform, dst, srca, src1, src2); 5308 } 5309 return dst; 5310 } 5311 5312 5313 LogicVRegister Simulator::fmlal(VectorFormat vform, 5314 LogicVRegister dst, 5315 const LogicVRegister& src1, 5316 const LogicVRegister& src2) { 5317 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5318 dst.ClearForWrite(vform); 5319 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5320 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN); 5321 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN); 5322 float acc = dst.Float<float>(i); 5323 float result = FPMulAdd(acc, op1, op2); 5324 dst.SetFloat(i, result); 5325 } 5326 return dst; 5327 } 5328 5329 5330 LogicVRegister Simulator::fmlal2(VectorFormat vform, 5331 LogicVRegister dst, 5332 const LogicVRegister& src1, 5333 const LogicVRegister& src2) { 5334 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5335 dst.ClearForWrite(vform); 5336 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5337 int src = i + LaneCountFromFormat(vform); 5338 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN); 5339 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN); 5340 float acc = dst.Float<float>(i); 5341 float result = FPMulAdd(acc, op1, op2); 5342 dst.SetFloat(i, result); 5343 } 5344 return dst; 5345 } 5346 5347 5348 LogicVRegister Simulator::fmlsl(VectorFormat vform, 5349 LogicVRegister dst, 5350 const LogicVRegister& src1, 5351 const LogicVRegister& src2) { 5352 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5353 dst.ClearForWrite(vform); 5354 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5355 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN); 5356 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN); 5357 float acc = dst.Float<float>(i); 5358 float result = FPMulAdd(acc, op1, op2); 5359 dst.SetFloat(i, result); 5360 } 5361 return dst; 5362 } 5363 5364 5365 LogicVRegister Simulator::fmlsl2(VectorFormat vform, 5366 LogicVRegister dst, 5367 const LogicVRegister& src1, 5368 const LogicVRegister& src2) { 5369 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5370 dst.ClearForWrite(vform); 5371 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5372 int src = i + LaneCountFromFormat(vform); 5373 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN); 5374 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN); 5375 float acc = dst.Float<float>(i); 5376 float result = FPMulAdd(acc, op1, op2); 5377 dst.SetFloat(i, result); 5378 } 5379 return dst; 5380 } 5381 5382 5383 LogicVRegister Simulator::fmlal(VectorFormat vform, 5384 LogicVRegister dst, 5385 const LogicVRegister& src1, 5386 const LogicVRegister& src2, 5387 int index) { 5388 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5389 dst.ClearForWrite(vform); 5390 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN); 5391 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5392 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN); 5393 float acc = dst.Float<float>(i); 5394 float result = FPMulAdd(acc, op1, op2); 5395 dst.SetFloat(i, result); 5396 } 5397 return dst; 5398 } 5399 5400 5401 LogicVRegister Simulator::fmlal2(VectorFormat vform, 5402 LogicVRegister dst, 5403 const LogicVRegister& src1, 5404 const LogicVRegister& src2, 5405 int index) { 5406 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5407 dst.ClearForWrite(vform); 5408 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN); 5409 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5410 int src = i + LaneCountFromFormat(vform); 5411 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN); 5412 float acc = dst.Float<float>(i); 5413 float result = FPMulAdd(acc, op1, op2); 5414 dst.SetFloat(i, result); 5415 } 5416 return dst; 5417 } 5418 5419 5420 LogicVRegister Simulator::fmlsl(VectorFormat vform, 5421 LogicVRegister dst, 5422 const LogicVRegister& src1, 5423 const LogicVRegister& src2, 5424 int index) { 5425 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5426 dst.ClearForWrite(vform); 5427 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN); 5428 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5429 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN); 5430 float acc = dst.Float<float>(i); 5431 float result = FPMulAdd(acc, op1, op2); 5432 dst.SetFloat(i, result); 5433 } 5434 return dst; 5435 } 5436 5437 5438 LogicVRegister Simulator::fmlsl2(VectorFormat vform, 5439 LogicVRegister dst, 5440 const LogicVRegister& src1, 5441 const LogicVRegister& src2, 5442 int index) { 5443 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 5444 dst.ClearForWrite(vform); 5445 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN); 5446 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5447 int src = i + LaneCountFromFormat(vform); 5448 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN); 5449 float acc = dst.Float<float>(i); 5450 float result = FPMulAdd(acc, op1, op2); 5451 dst.SetFloat(i, result); 5452 } 5453 return dst; 5454 } 5455 5456 5457 template <typename T> 5458 LogicVRegister Simulator::fneg(VectorFormat vform, 5459 LogicVRegister dst, 5460 const LogicVRegister& src) { 5461 dst.ClearForWrite(vform); 5462 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5463 T op = src.Float<T>(i); 5464 op = -op; 5465 dst.SetFloat(i, op); 5466 } 5467 return dst; 5468 } 5469 5470 5471 LogicVRegister Simulator::fneg(VectorFormat vform, 5472 LogicVRegister dst, 5473 const LogicVRegister& src) { 5474 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5475 fneg<SimFloat16>(vform, dst, src); 5476 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5477 fneg<float>(vform, dst, src); 5478 } else { 5479 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5480 fneg<double>(vform, dst, src); 5481 } 5482 return dst; 5483 } 5484 5485 5486 template <typename T> 5487 LogicVRegister Simulator::fabs_(VectorFormat vform, 5488 LogicVRegister dst, 5489 const LogicVRegister& src) { 5490 dst.ClearForWrite(vform); 5491 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5492 T op = src.Float<T>(i); 5493 if (copysign(1.0, op) < 0.0) { 5494 op = -op; 5495 } 5496 dst.SetFloat(i, op); 5497 } 5498 return dst; 5499 } 5500 5501 5502 LogicVRegister Simulator::fabs_(VectorFormat vform, 5503 LogicVRegister dst, 5504 const LogicVRegister& src) { 5505 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5506 fabs_<SimFloat16>(vform, dst, src); 5507 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5508 fabs_<float>(vform, dst, src); 5509 } else { 5510 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5511 fabs_<double>(vform, dst, src); 5512 } 5513 return dst; 5514 } 5515 5516 5517 LogicVRegister Simulator::fabd(VectorFormat vform, 5518 LogicVRegister dst, 5519 const LogicVRegister& src1, 5520 const LogicVRegister& src2) { 5521 SimVRegister temp; 5522 fsub(vform, temp, src1, src2); 5523 fabs_(vform, dst, temp); 5524 return dst; 5525 } 5526 5527 5528 LogicVRegister Simulator::fsqrt(VectorFormat vform, 5529 LogicVRegister dst, 5530 const LogicVRegister& src) { 5531 dst.ClearForWrite(vform); 5532 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5533 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5534 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i)); 5535 dst.SetFloat(i, result); 5536 } 5537 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5538 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5539 float result = FPSqrt(src.Float<float>(i)); 5540 dst.SetFloat(i, result); 5541 } 5542 } else { 5543 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5544 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5545 double result = FPSqrt(src.Float<double>(i)); 5546 dst.SetFloat(i, result); 5547 } 5548 } 5549 return dst; 5550 } 5551 5552 5553 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 5554 LogicVRegister Simulator::FNP(VectorFormat vform, \ 5555 LogicVRegister dst, \ 5556 const LogicVRegister& src1, \ 5557 const LogicVRegister& src2) { \ 5558 SimVRegister temp1, temp2; \ 5559 uzp1(vform, temp1, src1, src2); \ 5560 uzp2(vform, temp2, src1, src2); \ 5561 FN(vform, dst, temp1, temp2); \ 5562 if (IsSVEFormat(vform)) { \ 5563 interleave_top_bottom(vform, dst, dst); \ 5564 } \ 5565 return dst; \ 5566 } \ 5567 \ 5568 LogicVRegister Simulator::FNP(VectorFormat vform, \ 5569 LogicVRegister dst, \ 5570 const LogicVRegister& src) { \ 5571 if (vform == kFormatH) { \ 5572 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \ 5573 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \ 5574 dst.SetUint(vform, 0, Float16ToRawbits(result)); \ 5575 } else if (vform == kFormatS) { \ 5576 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 5577 dst.SetFloat(0, result); \ 5578 } else { \ 5579 VIXL_ASSERT(vform == kFormatD); \ 5580 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 5581 dst.SetFloat(0, result); \ 5582 } \ 5583 dst.ClearForWrite(vform); \ 5584 return dst; \ 5585 } 5586 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 5587 #undef DEFINE_NEON_FP_PAIR_OP 5588 5589 template <typename T> 5590 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform, 5591 LogicVRegister dst, 5592 const LogicVRegister& src, 5593 typename TFPPairOp<T>::type fn, 5594 uint64_t inactive_value) { 5595 int lane_count = LaneCountFromFormat(vform); 5596 T result[kZRegMaxSizeInBytes / sizeof(T)]; 5597 // Copy the source vector into a working array. Initialise the unused elements 5598 // at the end of the array to the same value that a false predicate would set. 5599 for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) { 5600 result[i] = (i < lane_count) 5601 ? src.Float<T>(i) 5602 : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value); 5603 } 5604 5605 // Pairwise reduce the elements to a single value, using the pair op function 5606 // argument. 5607 for (int step = 1; step < lane_count; step *= 2) { 5608 for (int i = 0; i < lane_count; i += step * 2) { 5609 result[i] = (this->*fn)(result[i], result[i + step]); 5610 } 5611 } 5612 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 5613 dst.SetFloat<T>(0, result[0]); 5614 return dst; 5615 } 5616 5617 LogicVRegister Simulator::FPPairedAcrossHelper( 5618 VectorFormat vform, 5619 LogicVRegister dst, 5620 const LogicVRegister& src, 5621 typename TFPPairOp<SimFloat16>::type fn16, 5622 typename TFPPairOp<float>::type fn32, 5623 typename TFPPairOp<double>::type fn64, 5624 uint64_t inactive_value) { 5625 switch (LaneSizeInBitsFromFormat(vform)) { 5626 case kHRegSize: 5627 return FPPairedAcrossHelper<SimFloat16>(vform, 5628 dst, 5629 src, 5630 fn16, 5631 inactive_value); 5632 case kSRegSize: 5633 return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value); 5634 default: 5635 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5636 return FPPairedAcrossHelper<double>(vform, 5637 dst, 5638 src, 5639 fn64, 5640 inactive_value); 5641 } 5642 } 5643 5644 LogicVRegister Simulator::faddv(VectorFormat vform, 5645 LogicVRegister dst, 5646 const LogicVRegister& src) { 5647 return FPPairedAcrossHelper(vform, 5648 dst, 5649 src, 5650 &Simulator::FPAdd<SimFloat16>, 5651 &Simulator::FPAdd<float>, 5652 &Simulator::FPAdd<double>, 5653 0); 5654 } 5655 5656 LogicVRegister Simulator::fmaxv(VectorFormat vform, 5657 LogicVRegister dst, 5658 const LogicVRegister& src) { 5659 int lane_size = LaneSizeInBitsFromFormat(vform); 5660 uint64_t inactive_value = 5661 FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity); 5662 return FPPairedAcrossHelper(vform, 5663 dst, 5664 src, 5665 &Simulator::FPMax<SimFloat16>, 5666 &Simulator::FPMax<float>, 5667 &Simulator::FPMax<double>, 5668 inactive_value); 5669 } 5670 5671 5672 LogicVRegister Simulator::fminv(VectorFormat vform, 5673 LogicVRegister dst, 5674 const LogicVRegister& src) { 5675 int lane_size = LaneSizeInBitsFromFormat(vform); 5676 uint64_t inactive_value = 5677 FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity); 5678 return FPPairedAcrossHelper(vform, 5679 dst, 5680 src, 5681 &Simulator::FPMin<SimFloat16>, 5682 &Simulator::FPMin<float>, 5683 &Simulator::FPMin<double>, 5684 inactive_value); 5685 } 5686 5687 5688 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 5689 LogicVRegister dst, 5690 const LogicVRegister& src) { 5691 int lane_size = LaneSizeInBitsFromFormat(vform); 5692 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); 5693 return FPPairedAcrossHelper(vform, 5694 dst, 5695 src, 5696 &Simulator::FPMaxNM<SimFloat16>, 5697 &Simulator::FPMaxNM<float>, 5698 &Simulator::FPMaxNM<double>, 5699 inactive_value); 5700 } 5701 5702 5703 LogicVRegister Simulator::fminnmv(VectorFormat vform, 5704 LogicVRegister dst, 5705 const LogicVRegister& src) { 5706 int lane_size = LaneSizeInBitsFromFormat(vform); 5707 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN); 5708 return FPPairedAcrossHelper(vform, 5709 dst, 5710 src, 5711 &Simulator::FPMinNM<SimFloat16>, 5712 &Simulator::FPMinNM<float>, 5713 &Simulator::FPMinNM<double>, 5714 inactive_value); 5715 } 5716 5717 5718 LogicVRegister Simulator::fmul(VectorFormat vform, 5719 LogicVRegister dst, 5720 const LogicVRegister& src1, 5721 const LogicVRegister& src2, 5722 int index) { 5723 dst.ClearForWrite(vform); 5724 SimVRegister temp; 5725 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5726 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 5727 fmul<SimFloat16>(vform, dst, src1, index_reg); 5728 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5729 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 5730 fmul<float>(vform, dst, src1, index_reg); 5731 } else { 5732 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5733 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 5734 fmul<double>(vform, dst, src1, index_reg); 5735 } 5736 return dst; 5737 } 5738 5739 5740 LogicVRegister Simulator::fmla(VectorFormat vform, 5741 LogicVRegister dst, 5742 const LogicVRegister& src1, 5743 const LogicVRegister& src2, 5744 int index) { 5745 dst.ClearForWrite(vform); 5746 SimVRegister temp; 5747 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5748 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 5749 fmla<SimFloat16>(vform, dst, dst, src1, index_reg); 5750 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5751 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 5752 fmla<float>(vform, dst, dst, src1, index_reg); 5753 } else { 5754 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5755 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 5756 fmla<double>(vform, dst, dst, src1, index_reg); 5757 } 5758 return dst; 5759 } 5760 5761 5762 LogicVRegister Simulator::fmls(VectorFormat vform, 5763 LogicVRegister dst, 5764 const LogicVRegister& src1, 5765 const LogicVRegister& src2, 5766 int index) { 5767 dst.ClearForWrite(vform); 5768 SimVRegister temp; 5769 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5770 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 5771 fmls<SimFloat16>(vform, dst, dst, src1, index_reg); 5772 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5773 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 5774 fmls<float>(vform, dst, dst, src1, index_reg); 5775 } else { 5776 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5777 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 5778 fmls<double>(vform, dst, dst, src1, index_reg); 5779 } 5780 return dst; 5781 } 5782 5783 5784 LogicVRegister Simulator::fmulx(VectorFormat vform, 5785 LogicVRegister dst, 5786 const LogicVRegister& src1, 5787 const LogicVRegister& src2, 5788 int index) { 5789 dst.ClearForWrite(vform); 5790 SimVRegister temp; 5791 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5792 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 5793 fmulx<SimFloat16>(vform, dst, src1, index_reg); 5794 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5795 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 5796 fmulx<float>(vform, dst, src1, index_reg); 5797 } else { 5798 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5799 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 5800 fmulx<double>(vform, dst, src1, index_reg); 5801 } 5802 return dst; 5803 } 5804 5805 5806 LogicVRegister Simulator::frint(VectorFormat vform, 5807 LogicVRegister dst, 5808 const LogicVRegister& src, 5809 FPRounding rounding_mode, 5810 bool inexact_exception, 5811 FrintMode frint_mode) { 5812 dst.ClearForWrite(vform); 5813 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5814 VIXL_ASSERT(frint_mode == kFrintToInteger); 5815 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5816 SimFloat16 input = src.Float<SimFloat16>(i); 5817 SimFloat16 rounded = FPRoundInt(input, rounding_mode); 5818 if (inexact_exception && !IsNaN(input) && (input != rounded)) { 5819 FPProcessException(); 5820 } 5821 dst.SetFloat<SimFloat16>(i, rounded); 5822 } 5823 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5824 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5825 float input = src.Float<float>(i); 5826 float rounded = FPRoundInt(input, rounding_mode, frint_mode); 5827 5828 if (inexact_exception && !IsNaN(input) && (input != rounded)) { 5829 FPProcessException(); 5830 } 5831 dst.SetFloat<float>(i, rounded); 5832 } 5833 } else { 5834 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5835 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5836 double input = src.Float<double>(i); 5837 double rounded = FPRoundInt(input, rounding_mode, frint_mode); 5838 if (inexact_exception && !IsNaN(input) && (input != rounded)) { 5839 FPProcessException(); 5840 } 5841 dst.SetFloat<double>(i, rounded); 5842 } 5843 } 5844 return dst; 5845 } 5846 5847 LogicVRegister Simulator::fcvt(VectorFormat dst_vform, 5848 VectorFormat src_vform, 5849 LogicVRegister dst, 5850 const LogicPRegister& pg, 5851 const LogicVRegister& src) { 5852 unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform); 5853 unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform); 5854 VectorFormat vform = SVEFormatFromLaneSizeInBits( 5855 std::max(dst_data_size_in_bits, src_data_size_in_bits)); 5856 5857 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5858 if (!pg.IsActive(vform, i)) continue; 5859 5860 uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, 5861 0, 5862 src.Uint(vform, i)); 5863 double dst_value = 5864 RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits); 5865 5866 uint64_t dst_raw_bits = 5867 FPToRawbitsWithSize(dst_data_size_in_bits, dst_value); 5868 5869 dst.SetUint(vform, i, dst_raw_bits); 5870 } 5871 5872 return dst; 5873 } 5874 5875 LogicVRegister Simulator::fcvts(VectorFormat vform, 5876 unsigned dst_data_size_in_bits, 5877 unsigned src_data_size_in_bits, 5878 LogicVRegister dst, 5879 const LogicPRegister& pg, 5880 const LogicVRegister& src, 5881 FPRounding round, 5882 int fbits) { 5883 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); 5884 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); 5885 5886 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5887 if (!pg.IsActive(vform, i)) continue; 5888 5889 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, 5890 0, 5891 src.Uint(vform, i)); 5892 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) * 5893 std::pow(2.0, fbits); 5894 5895 switch (dst_data_size_in_bits) { 5896 case kHRegSize: 5897 dst.SetInt(vform, i, FPToInt16(result, round)); 5898 break; 5899 case kSRegSize: 5900 dst.SetInt(vform, i, FPToInt32(result, round)); 5901 break; 5902 case kDRegSize: 5903 dst.SetInt(vform, i, FPToInt64(result, round)); 5904 break; 5905 default: 5906 VIXL_UNIMPLEMENTED(); 5907 break; 5908 } 5909 } 5910 5911 return dst; 5912 } 5913 5914 LogicVRegister Simulator::fcvts(VectorFormat vform, 5915 LogicVRegister dst, 5916 const LogicVRegister& src, 5917 FPRounding round, 5918 int fbits) { 5919 dst.ClearForWrite(vform); 5920 return fcvts(vform, 5921 LaneSizeInBitsFromFormat(vform), 5922 LaneSizeInBitsFromFormat(vform), 5923 dst, 5924 GetPTrue(), 5925 src, 5926 round, 5927 fbits); 5928 } 5929 5930 LogicVRegister Simulator::fcvtu(VectorFormat vform, 5931 unsigned dst_data_size_in_bits, 5932 unsigned src_data_size_in_bits, 5933 LogicVRegister dst, 5934 const LogicPRegister& pg, 5935 const LogicVRegister& src, 5936 FPRounding round, 5937 int fbits) { 5938 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); 5939 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); 5940 5941 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5942 if (!pg.IsActive(vform, i)) continue; 5943 5944 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, 5945 0, 5946 src.Uint(vform, i)); 5947 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) * 5948 std::pow(2.0, fbits); 5949 5950 switch (dst_data_size_in_bits) { 5951 case kHRegSize: 5952 dst.SetUint(vform, i, FPToUInt16(result, round)); 5953 break; 5954 case kSRegSize: 5955 dst.SetUint(vform, i, FPToUInt32(result, round)); 5956 break; 5957 case kDRegSize: 5958 dst.SetUint(vform, i, FPToUInt64(result, round)); 5959 break; 5960 default: 5961 VIXL_UNIMPLEMENTED(); 5962 break; 5963 } 5964 } 5965 5966 return dst; 5967 } 5968 5969 LogicVRegister Simulator::fcvtu(VectorFormat vform, 5970 LogicVRegister dst, 5971 const LogicVRegister& src, 5972 FPRounding round, 5973 int fbits) { 5974 dst.ClearForWrite(vform); 5975 return fcvtu(vform, 5976 LaneSizeInBitsFromFormat(vform), 5977 LaneSizeInBitsFromFormat(vform), 5978 dst, 5979 GetPTrue(), 5980 src, 5981 round, 5982 fbits); 5983 } 5984 5985 LogicVRegister Simulator::fcvtl(VectorFormat vform, 5986 LogicVRegister dst, 5987 const LogicVRegister& src) { 5988 dst.ClearForWrite(vform); 5989 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5990 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 5991 // TODO: Full support for SimFloat16 in SimRegister(s). 5992 dst.SetFloat(i, 5993 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)), 5994 ReadDN())); 5995 } 5996 } else { 5997 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5998 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 5999 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN())); 6000 } 6001 } 6002 return dst; 6003 } 6004 6005 6006 LogicVRegister Simulator::fcvtl2(VectorFormat vform, 6007 LogicVRegister dst, 6008 const LogicVRegister& src) { 6009 dst.ClearForWrite(vform); 6010 int lane_count = LaneCountFromFormat(vform); 6011 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 6012 for (int i = 0; i < lane_count; i++) { 6013 // TODO: Full support for SimFloat16 in SimRegister(s). 6014 dst.SetFloat(i, 6015 FPToFloat(RawbitsToFloat16( 6016 src.Float<uint16_t>(i + lane_count)), 6017 ReadDN())); 6018 } 6019 } else { 6020 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 6021 for (int i = 0; i < lane_count; i++) { 6022 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN())); 6023 } 6024 } 6025 return dst; 6026 } 6027 6028 6029 LogicVRegister Simulator::fcvtn(VectorFormat vform, 6030 LogicVRegister dst, 6031 const LogicVRegister& src) { 6032 SimVRegister tmp; 6033 LogicVRegister srctmp = mov(kFormat2D, tmp, src); 6034 dst.ClearForWrite(vform); 6035 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 6036 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6037 dst.SetFloat(i, 6038 Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i), 6039 FPTieEven, 6040 ReadDN()))); 6041 } 6042 } else { 6043 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 6044 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6045 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN())); 6046 } 6047 } 6048 return dst; 6049 } 6050 6051 6052 LogicVRegister Simulator::fcvtn2(VectorFormat vform, 6053 LogicVRegister dst, 6054 const LogicVRegister& src) { 6055 dst.ClearForWrite(vform); 6056 int lane_count = LaneCountFromFormat(vform) / 2; 6057 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 6058 for (int i = lane_count - 1; i >= 0; i--) { 6059 dst.SetFloat(i + lane_count, 6060 Float16ToRawbits( 6061 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()))); 6062 } 6063 } else { 6064 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 6065 for (int i = lane_count - 1; i >= 0; i--) { 6066 dst.SetFloat(i + lane_count, 6067 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN())); 6068 } 6069 } 6070 return dst; 6071 } 6072 6073 6074 LogicVRegister Simulator::fcvtxn(VectorFormat vform, 6075 LogicVRegister dst, 6076 const LogicVRegister& src) { 6077 SimVRegister tmp; 6078 LogicVRegister srctmp = mov(kFormat2D, tmp, src); 6079 int input_lane_count = LaneCountFromFormat(vform); 6080 if (IsSVEFormat(vform)) { 6081 mov(kFormatVnB, tmp, src); 6082 input_lane_count /= 2; 6083 } 6084 6085 dst.ClearForWrite(vform); 6086 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 6087 6088 for (int i = 0; i < input_lane_count; i++) { 6089 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN())); 6090 } 6091 return dst; 6092 } 6093 6094 6095 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 6096 LogicVRegister dst, 6097 const LogicVRegister& src) { 6098 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 6099 dst.ClearForWrite(vform); 6100 int lane_count = LaneCountFromFormat(vform) / 2; 6101 for (int i = lane_count - 1; i >= 0; i--) { 6102 dst.SetFloat(i + lane_count, 6103 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN())); 6104 } 6105 return dst; 6106 } 6107 6108 6109 // Based on reference C function recip_sqrt_estimate from ARM ARM. 6110 double Simulator::recip_sqrt_estimate(double a) { 6111 int quot0, quot1, s; 6112 double r; 6113 if (a < 0.5) { 6114 quot0 = static_cast<int>(a * 512.0); 6115 r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0); 6116 } else { 6117 quot1 = static_cast<int>(a * 256.0); 6118 r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0); 6119 } 6120 s = static_cast<int>(256.0 * r + 0.5); 6121 return static_cast<double>(s) / 256.0; 6122 } 6123 6124 6125 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 6126 return ExtractUnsignedBitfield64(start_bit, end_bit, val); 6127 } 6128 6129 6130 template <typename T> 6131 T Simulator::FPRecipSqrtEstimate(T op) { 6132 if (IsNaN(op)) { 6133 return FPProcessNaN(op); 6134 } else if (op == 0.0) { 6135 if (copysign(1.0, op) < 0.0) { 6136 return kFP64NegativeInfinity; 6137 } else { 6138 return kFP64PositiveInfinity; 6139 } 6140 } else if (copysign(1.0, op) < 0.0) { 6141 FPProcessException(); 6142 return FPDefaultNaN<T>(); 6143 } else if (IsInf(op)) { 6144 return 0.0; 6145 } else { 6146 uint64_t fraction; 6147 int exp, result_exp; 6148 6149 if (IsFloat16<T>()) { 6150 exp = Float16Exp(op); 6151 fraction = Float16Mantissa(op); 6152 fraction <<= 42; 6153 } else if (IsFloat32<T>()) { 6154 exp = FloatExp(op); 6155 fraction = FloatMantissa(op); 6156 fraction <<= 29; 6157 } else { 6158 VIXL_ASSERT(IsFloat64<T>()); 6159 exp = DoubleExp(op); 6160 fraction = DoubleMantissa(op); 6161 } 6162 6163 if (exp == 0) { 6164 while (Bits(fraction, 51, 51) == 0) { 6165 fraction = Bits(fraction, 50, 0) << 1; 6166 exp -= 1; 6167 } 6168 fraction = Bits(fraction, 50, 0) << 1; 6169 } 6170 6171 double scaled; 6172 if (Bits(exp, 0, 0) == 0) { 6173 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 6174 } else { 6175 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); 6176 } 6177 6178 if (IsFloat16<T>()) { 6179 result_exp = (44 - exp) / 2; 6180 } else if (IsFloat32<T>()) { 6181 result_exp = (380 - exp) / 2; 6182 } else { 6183 VIXL_ASSERT(IsFloat64<T>()); 6184 result_exp = (3068 - exp) / 2; 6185 } 6186 6187 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); 6188 6189 if (IsFloat16<T>()) { 6190 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0)); 6191 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42)); 6192 return Float16Pack(0, exp_bits, est_bits); 6193 } else if (IsFloat32<T>()) { 6194 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 6195 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 6196 return FloatPack(0, exp_bits, est_bits); 6197 } else { 6198 VIXL_ASSERT(IsFloat64<T>()); 6199 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 6200 } 6201 } 6202 } 6203 6204 6205 LogicVRegister Simulator::frsqrte(VectorFormat vform, 6206 LogicVRegister dst, 6207 const LogicVRegister& src) { 6208 dst.ClearForWrite(vform); 6209 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 6210 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6211 SimFloat16 input = src.Float<SimFloat16>(i); 6212 dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input)); 6213 } 6214 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 6215 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6216 float input = src.Float<float>(i); 6217 dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input)); 6218 } 6219 } else { 6220 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 6221 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6222 double input = src.Float<double>(i); 6223 dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input)); 6224 } 6225 } 6226 return dst; 6227 } 6228 6229 template <typename T> 6230 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 6231 uint32_t sign; 6232 6233 if (IsFloat16<T>()) { 6234 sign = Float16Sign(op); 6235 } else if (IsFloat32<T>()) { 6236 sign = FloatSign(op); 6237 } else { 6238 VIXL_ASSERT(IsFloat64<T>()); 6239 sign = DoubleSign(op); 6240 } 6241 6242 if (IsNaN(op)) { 6243 return FPProcessNaN(op); 6244 } else if (IsInf(op)) { 6245 return (sign == 1) ? -0.0 : 0.0; 6246 } else if (op == 0.0) { 6247 FPProcessException(); // FPExc_DivideByZero exception. 6248 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 6249 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) || 6250 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) || 6251 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) { 6252 bool overflow_to_inf = false; 6253 switch (rounding) { 6254 case FPTieEven: 6255 overflow_to_inf = true; 6256 break; 6257 case FPPositiveInfinity: 6258 overflow_to_inf = (sign == 0); 6259 break; 6260 case FPNegativeInfinity: 6261 overflow_to_inf = (sign == 1); 6262 break; 6263 case FPZero: 6264 overflow_to_inf = false; 6265 break; 6266 default: 6267 break; 6268 } 6269 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 6270 if (overflow_to_inf) { 6271 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 6272 } else { 6273 // Return FPMaxNormal(sign). 6274 if (IsFloat16<T>()) { 6275 return Float16Pack(sign, 0x1f, 0x3ff); 6276 } else if (IsFloat32<T>()) { 6277 return FloatPack(sign, 0xfe, 0x07fffff); 6278 } else { 6279 VIXL_ASSERT(IsFloat64<T>()); 6280 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); 6281 } 6282 } 6283 } else { 6284 uint64_t fraction; 6285 int exp, result_exp; 6286 6287 if (IsFloat16<T>()) { 6288 sign = Float16Sign(op); 6289 exp = Float16Exp(op); 6290 fraction = Float16Mantissa(op); 6291 fraction <<= 42; 6292 } else if (IsFloat32<T>()) { 6293 sign = FloatSign(op); 6294 exp = FloatExp(op); 6295 fraction = FloatMantissa(op); 6296 fraction <<= 29; 6297 } else { 6298 VIXL_ASSERT(IsFloat64<T>()); 6299 sign = DoubleSign(op); 6300 exp = DoubleExp(op); 6301 fraction = DoubleMantissa(op); 6302 } 6303 6304 if (exp == 0) { 6305 if (Bits(fraction, 51, 51) == 0) { 6306 exp -= 1; 6307 fraction = Bits(fraction, 49, 0) << 2; 6308 } else { 6309 fraction = Bits(fraction, 50, 0) << 1; 6310 } 6311 } 6312 6313 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 6314 6315 if (IsFloat16<T>()) { 6316 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30. 6317 } else if (IsFloat32<T>()) { 6318 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 6319 } else { 6320 VIXL_ASSERT(IsFloat64<T>()); 6321 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 6322 } 6323 6324 double estimate = recip_estimate(scaled); 6325 6326 fraction = DoubleMantissa(estimate); 6327 if (result_exp == 0) { 6328 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 6329 } else if (result_exp == -1) { 6330 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 6331 result_exp = 0; 6332 } 6333 if (IsFloat16<T>()) { 6334 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0)); 6335 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42)); 6336 return Float16Pack(sign, exp_bits, frac_bits); 6337 } else if (IsFloat32<T>()) { 6338 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 6339 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 6340 return FloatPack(sign, exp_bits, frac_bits); 6341 } else { 6342 VIXL_ASSERT(IsFloat64<T>()); 6343 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 6344 } 6345 } 6346 } 6347 6348 6349 LogicVRegister Simulator::frecpe(VectorFormat vform, 6350 LogicVRegister dst, 6351 const LogicVRegister& src, 6352 FPRounding round) { 6353 dst.ClearForWrite(vform); 6354 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 6355 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6356 SimFloat16 input = src.Float<SimFloat16>(i); 6357 dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round)); 6358 } 6359 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 6360 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6361 float input = src.Float<float>(i); 6362 dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round)); 6363 } 6364 } else { 6365 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 6366 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6367 double input = src.Float<double>(i); 6368 dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round)); 6369 } 6370 } 6371 return dst; 6372 } 6373 6374 6375 LogicVRegister Simulator::ursqrte(VectorFormat vform, 6376 LogicVRegister dst, 6377 const LogicVRegister& src) { 6378 dst.ClearForWrite(vform); 6379 uint64_t operand; 6380 uint32_t result; 6381 double dp_operand, dp_result; 6382 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6383 operand = src.Uint(vform, i); 6384 if (operand <= 0x3FFFFFFF) { 6385 result = 0xFFFFFFFF; 6386 } else { 6387 dp_operand = operand * std::pow(2.0, -32); 6388 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 6389 result = static_cast<uint32_t>(dp_result); 6390 } 6391 dst.SetUint(vform, i, result); 6392 } 6393 return dst; 6394 } 6395 6396 6397 // Based on reference C function recip_estimate from ARM ARM. 6398 double Simulator::recip_estimate(double a) { 6399 int q, s; 6400 double r; 6401 q = static_cast<int>(a * 512.0); 6402 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 6403 s = static_cast<int>(256.0 * r + 0.5); 6404 return static_cast<double>(s) / 256.0; 6405 } 6406 6407 6408 LogicVRegister Simulator::urecpe(VectorFormat vform, 6409 LogicVRegister dst, 6410 const LogicVRegister& src) { 6411 dst.ClearForWrite(vform); 6412 uint64_t operand; 6413 uint32_t result; 6414 double dp_operand, dp_result; 6415 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6416 operand = src.Uint(vform, i); 6417 if (operand <= 0x7FFFFFFF) { 6418 result = 0xFFFFFFFF; 6419 } else { 6420 dp_operand = operand * std::pow(2.0, -32); 6421 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 6422 result = static_cast<uint32_t>(dp_result); 6423 } 6424 dst.SetUint(vform, i, result); 6425 } 6426 return dst; 6427 } 6428 6429 LogicPRegister Simulator::pfalse(LogicPRegister dst) { 6430 dst.Clear(); 6431 return dst; 6432 } 6433 6434 LogicPRegister Simulator::pfirst(LogicPRegister dst, 6435 const LogicPRegister& pg, 6436 const LogicPRegister& src) { 6437 int first_pg = GetFirstActive(kFormatVnB, pg); 6438 VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB)); 6439 mov(dst, src); 6440 if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true); 6441 return dst; 6442 } 6443 6444 LogicPRegister Simulator::ptrue(VectorFormat vform, 6445 LogicPRegister dst, 6446 int pattern) { 6447 int count = GetPredicateConstraintLaneCount(vform, pattern); 6448 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6449 dst.SetActive(vform, i, i < count); 6450 } 6451 return dst; 6452 } 6453 6454 LogicPRegister Simulator::pnext(VectorFormat vform, 6455 LogicPRegister dst, 6456 const LogicPRegister& pg, 6457 const LogicPRegister& src) { 6458 int next = GetLastActive(vform, src) + 1; 6459 while (next < LaneCountFromFormat(vform)) { 6460 if (pg.IsActive(vform, next)) break; 6461 next++; 6462 } 6463 6464 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6465 dst.SetActive(vform, i, (i == next)); 6466 } 6467 return dst; 6468 } 6469 6470 template <typename T> 6471 LogicVRegister Simulator::frecpx(VectorFormat vform, 6472 LogicVRegister dst, 6473 const LogicVRegister& src) { 6474 dst.ClearForWrite(vform); 6475 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6476 T op = src.Float<T>(i); 6477 T result; 6478 if (IsNaN(op)) { 6479 result = FPProcessNaN(op); 6480 } else { 6481 int exp; 6482 uint32_t sign; 6483 if (IsFloat16<T>()) { 6484 sign = Float16Sign(op); 6485 exp = Float16Exp(op); 6486 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0)); 6487 result = Float16Pack(sign, exp, 0); 6488 } else if (IsFloat32<T>()) { 6489 sign = FloatSign(op); 6490 exp = FloatExp(op); 6491 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 6492 result = FloatPack(sign, exp, 0); 6493 } else { 6494 VIXL_ASSERT(IsFloat64<T>()); 6495 sign = DoubleSign(op); 6496 exp = DoubleExp(op); 6497 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 6498 result = DoublePack(sign, exp, 0); 6499 } 6500 } 6501 dst.SetFloat(i, result); 6502 } 6503 return dst; 6504 } 6505 6506 6507 LogicVRegister Simulator::frecpx(VectorFormat vform, 6508 LogicVRegister dst, 6509 const LogicVRegister& src) { 6510 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 6511 frecpx<SimFloat16>(vform, dst, src); 6512 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 6513 frecpx<float>(vform, dst, src); 6514 } else { 6515 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 6516 frecpx<double>(vform, dst, src); 6517 } 6518 return dst; 6519 } 6520 6521 LogicVRegister Simulator::flogb(VectorFormat vform, 6522 LogicVRegister dst, 6523 const LogicVRegister& src) { 6524 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6525 double op = 0.0; 6526 switch (vform) { 6527 case kFormatVnH: 6528 op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN); 6529 break; 6530 case kFormatVnS: 6531 op = src.Float<float>(i); 6532 break; 6533 case kFormatVnD: 6534 op = src.Float<double>(i); 6535 break; 6536 default: 6537 VIXL_UNREACHABLE(); 6538 } 6539 6540 switch (std::fpclassify(op)) { 6541 case FP_INFINITE: 6542 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 6543 break; 6544 case FP_NAN: 6545 case FP_ZERO: 6546 dst.SetInt(vform, i, MinIntFromFormat(vform)); 6547 break; 6548 case FP_SUBNORMAL: { 6549 // DoubleMantissa returns the mantissa of its input, leaving 12 zero 6550 // bits where the sign and exponent would be. We subtract 12 to 6551 // find the number of leading zero bits in the mantissa itself. 6552 int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12; 6553 // Log2 of a subnormal is the lowest exponent a normal number can 6554 // represent, together with the zeros in the mantissa. 6555 dst.SetInt(vform, i, -1023 - mant_zero_count); 6556 break; 6557 } 6558 case FP_NORMAL: 6559 // Log2 of a normal number is the exponent minus the bias. 6560 dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023); 6561 break; 6562 } 6563 } 6564 return dst; 6565 } 6566 6567 LogicVRegister Simulator::ftsmul(VectorFormat vform, 6568 LogicVRegister dst, 6569 const LogicVRegister& src1, 6570 const LogicVRegister& src2) { 6571 SimVRegister maybe_neg_src1; 6572 6573 // The bottom bit of src2 controls the sign of the result. Use it to 6574 // conditionally invert the sign of one `fmul` operand. 6575 shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1); 6576 eor(vform, maybe_neg_src1, maybe_neg_src1, src1); 6577 6578 // Multiply src1 by the modified neg_src1, which is potentially its negation. 6579 // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1, 6580 // rather than neg_src1, must be the first source argument. 6581 fmul(vform, dst, src1, maybe_neg_src1); 6582 6583 return dst; 6584 } 6585 6586 LogicVRegister Simulator::ftssel(VectorFormat vform, 6587 LogicVRegister dst, 6588 const LogicVRegister& src1, 6589 const LogicVRegister& src2) { 6590 unsigned lane_bits = LaneSizeInBitsFromFormat(vform); 6591 uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1); 6592 uint64_t one; 6593 6594 if (lane_bits == kHRegSize) { 6595 one = Float16ToRawbits(Float16(1.0)); 6596 } else if (lane_bits == kSRegSize) { 6597 one = FloatToRawbits(1.0); 6598 } else { 6599 VIXL_ASSERT(lane_bits == kDRegSize); 6600 one = DoubleToRawbits(1.0); 6601 } 6602 6603 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6604 // Use integer accessors for this operation, as this is a data manipulation 6605 // task requiring no calculation. 6606 uint64_t op = src1.Uint(vform, i); 6607 6608 // Only the bottom two bits of the src2 register are significant, indicating 6609 // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1 6610 // determines the sign of the value written to dst. 6611 uint64_t q = src2.Uint(vform, i); 6612 if ((q & 1) == 1) op = one; 6613 if ((q & 2) == 2) op ^= sign_bit; 6614 6615 dst.SetUint(vform, i, op); 6616 } 6617 6618 return dst; 6619 } 6620 6621 template <typename T> 6622 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform, 6623 LogicVRegister dst, 6624 const LogicVRegister& src1, 6625 const LogicVRegister& src2, 6626 uint64_t coeff_pos, 6627 uint64_t coeff_neg) { 6628 SimVRegister zero; 6629 dup_immediate(kFormatVnB, zero, 0); 6630 6631 SimVRegister cf; 6632 SimVRegister cfn; 6633 dup_immediate(vform, cf, coeff_pos); 6634 dup_immediate(vform, cfn, coeff_neg); 6635 6636 // The specification requires testing the top bit of the raw value, rather 6637 // than the sign of the floating point number, so use an integer comparison 6638 // here. 6639 SimPRegister is_neg; 6640 SVEIntCompareVectorsHelper(lt, 6641 vform, 6642 is_neg, 6643 GetPTrue(), 6644 src2, 6645 zero, 6646 false, 6647 LeaveFlags); 6648 mov_merging(vform, cf, is_neg, cfn); 6649 6650 SimVRegister temp; 6651 fabs_<T>(vform, temp, src2); 6652 fmla<T>(vform, cf, cf, src1, temp); 6653 mov(vform, dst, cf); 6654 return dst; 6655 } 6656 6657 6658 LogicVRegister Simulator::ftmad(VectorFormat vform, 6659 LogicVRegister dst, 6660 const LogicVRegister& src1, 6661 const LogicVRegister& src2, 6662 unsigned index) { 6663 static const uint64_t ftmad_coeff16[] = {0x3c00, 6664 0xb155, 6665 0x2030, 6666 0x0000, 6667 0x0000, 6668 0x0000, 6669 0x0000, 6670 0x0000, 6671 0x3c00, 6672 0xb800, 6673 0x293a, 6674 0x0000, 6675 0x0000, 6676 0x0000, 6677 0x0000, 6678 0x0000}; 6679 6680 static const uint64_t ftmad_coeff32[] = {0x3f800000, 6681 0xbe2aaaab, 6682 0x3c088886, 6683 0xb95008b9, 6684 0x36369d6d, 6685 0x00000000, 6686 0x00000000, 6687 0x00000000, 6688 0x3f800000, 6689 0xbf000000, 6690 0x3d2aaaa6, 6691 0xbab60705, 6692 0x37cd37cc, 6693 0x00000000, 6694 0x00000000, 6695 0x00000000}; 6696 6697 static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000, 6698 0xbfc5555555555543, 6699 0x3f8111111110f30c, 6700 0xbf2a01a019b92fc6, 6701 0x3ec71de351f3d22b, 6702 0xbe5ae5e2b60f7b91, 6703 0x3de5d8408868552f, 6704 0x0000000000000000, 6705 0x3ff0000000000000, 6706 0xbfe0000000000000, 6707 0x3fa5555555555536, 6708 0xbf56c16c16c13a0b, 6709 0x3efa01a019b1e8d8, 6710 0xbe927e4f7282f468, 6711 0x3e21ee96d2641b13, 6712 0xbda8f76380fbb401}; 6713 VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64)); 6714 VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64)); 6715 VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64)); 6716 6717 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 6718 FTMaddHelper<SimFloat16>(vform, 6719 dst, 6720 src1, 6721 src2, 6722 ftmad_coeff16[index], 6723 ftmad_coeff16[index + 8]); 6724 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 6725 FTMaddHelper<float>(vform, 6726 dst, 6727 src1, 6728 src2, 6729 ftmad_coeff32[index], 6730 ftmad_coeff32[index + 8]); 6731 } else { 6732 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 6733 FTMaddHelper<double>(vform, 6734 dst, 6735 src1, 6736 src2, 6737 ftmad_coeff64[index], 6738 ftmad_coeff64[index + 8]); 6739 } 6740 return dst; 6741 } 6742 6743 LogicVRegister Simulator::fexpa(VectorFormat vform, 6744 LogicVRegister dst, 6745 const LogicVRegister& src) { 6746 static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045, 6747 0x005d, 0x0075, 0x008e, 0x00a8, 6748 0x00c2, 0x00dc, 0x00f8, 0x0114, 6749 0x0130, 0x014d, 0x016b, 0x0189, 6750 0x01a8, 0x01c8, 0x01e8, 0x0209, 6751 0x022b, 0x024e, 0x0271, 0x0295, 6752 0x02ba, 0x02e0, 0x0306, 0x032e, 6753 0x0356, 0x037f, 0x03a9, 0x03d4}; 6754 6755 static const uint64_t fexpa_coeff32[] = 6756 {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f, 6757 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b, 6758 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532, 6759 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a, 6760 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf, 6761 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75, 6762 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd, 6763 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a, 6764 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3, 6765 0x7d3e0c}; 6766 6767 static const uint64_t fexpa_coeff64[] = 6768 {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8, 6769 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0, 6770 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6, 6771 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b, 6772 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7, 6773 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0, 6774 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da, 6775 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225, 6776 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9, 6777 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed, 6778 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50, 6779 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf, 6780 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2, 6781 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c, 6782 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6, 6783 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8}; 6784 6785 unsigned lane_size = LaneSizeInBitsFromFormat(vform); 6786 int index_highbit = 5; 6787 int op_highbit, op_shift; 6788 const uint64_t* fexpa_coeff; 6789 6790 if (lane_size == kHRegSize) { 6791 index_highbit = 4; 6792 VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1))); 6793 fexpa_coeff = fexpa_coeff16; 6794 op_highbit = 9; 6795 op_shift = 10; 6796 } else if (lane_size == kSRegSize) { 6797 VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1))); 6798 fexpa_coeff = fexpa_coeff32; 6799 op_highbit = 13; 6800 op_shift = 23; 6801 } else { 6802 VIXL_ASSERT(lane_size == kDRegSize); 6803 VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1))); 6804 fexpa_coeff = fexpa_coeff64; 6805 op_highbit = 16; 6806 op_shift = 52; 6807 } 6808 6809 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6810 uint64_t op = src.Uint(vform, i); 6811 uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)]; 6812 result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift); 6813 dst.SetUint(vform, i, result); 6814 } 6815 return dst; 6816 } 6817 6818 template <typename T> 6819 LogicVRegister Simulator::fscale(VectorFormat vform, 6820 LogicVRegister dst, 6821 const LogicVRegister& src1, 6822 const LogicVRegister& src2) { 6823 T two = T(2.0); 6824 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6825 T src1_val = src1.Float<T>(i); 6826 if (!IsNaN(src1_val)) { 6827 int64_t scale = src2.Int(vform, i); 6828 // TODO: this is a low-performance implementation, but it's simple and 6829 // less likely to be buggy. Consider replacing it with something faster. 6830 6831 // Scales outside of these bounds become infinity or zero, so there's no 6832 // point iterating further. 6833 scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048); 6834 6835 // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and 6836 // decrement scale until it's zero. 6837 while (scale-- > 0) { 6838 src1_val = FPMul(src1_val, two); 6839 } 6840 6841 // If scale is negative, divide by two and increment scale until it's 6842 // zero. Initially, scale is (src2 - 1), so we pre-increment. 6843 while (++scale < 0) { 6844 src1_val = FPDiv(src1_val, two); 6845 } 6846 } 6847 dst.SetFloat<T>(i, src1_val); 6848 } 6849 return dst; 6850 } 6851 6852 LogicVRegister Simulator::fscale(VectorFormat vform, 6853 LogicVRegister dst, 6854 const LogicVRegister& src1, 6855 const LogicVRegister& src2) { 6856 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 6857 fscale<SimFloat16>(vform, dst, src1, src2); 6858 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 6859 fscale<float>(vform, dst, src1, src2); 6860 } else { 6861 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 6862 fscale<double>(vform, dst, src1, src2); 6863 } 6864 return dst; 6865 } 6866 6867 LogicVRegister Simulator::scvtf(VectorFormat vform, 6868 unsigned dst_data_size_in_bits, 6869 unsigned src_data_size_in_bits, 6870 LogicVRegister dst, 6871 const LogicPRegister& pg, 6872 const LogicVRegister& src, 6873 FPRounding round, 6874 int fbits) { 6875 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); 6876 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); 6877 dst.ClearForWrite(vform); 6878 6879 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6880 if (!pg.IsActive(vform, i)) continue; 6881 6882 int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1, 6883 0, 6884 src.Uint(vform, i)); 6885 6886 switch (dst_data_size_in_bits) { 6887 case kHRegSize: { 6888 SimFloat16 result = FixedToFloat16(value, fbits, round); 6889 dst.SetUint(vform, i, Float16ToRawbits(result)); 6890 break; 6891 } 6892 case kSRegSize: { 6893 float result = FixedToFloat(value, fbits, round); 6894 dst.SetUint(vform, i, FloatToRawbits(result)); 6895 break; 6896 } 6897 case kDRegSize: { 6898 double result = FixedToDouble(value, fbits, round); 6899 dst.SetUint(vform, i, DoubleToRawbits(result)); 6900 break; 6901 } 6902 default: 6903 VIXL_UNIMPLEMENTED(); 6904 break; 6905 } 6906 } 6907 6908 return dst; 6909 } 6910 6911 LogicVRegister Simulator::scvtf(VectorFormat vform, 6912 LogicVRegister dst, 6913 const LogicVRegister& src, 6914 int fbits, 6915 FPRounding round) { 6916 return scvtf(vform, 6917 LaneSizeInBitsFromFormat(vform), 6918 LaneSizeInBitsFromFormat(vform), 6919 dst, 6920 GetPTrue(), 6921 src, 6922 round, 6923 fbits); 6924 } 6925 6926 LogicVRegister Simulator::ucvtf(VectorFormat vform, 6927 unsigned dst_data_size_in_bits, 6928 unsigned src_data_size_in_bits, 6929 LogicVRegister dst, 6930 const LogicPRegister& pg, 6931 const LogicVRegister& src, 6932 FPRounding round, 6933 int fbits) { 6934 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits); 6935 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits); 6936 dst.ClearForWrite(vform); 6937 6938 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 6939 if (!pg.IsActive(vform, i)) continue; 6940 6941 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1, 6942 0, 6943 src.Uint(vform, i)); 6944 6945 switch (dst_data_size_in_bits) { 6946 case kHRegSize: { 6947 SimFloat16 result = UFixedToFloat16(value, fbits, round); 6948 dst.SetUint(vform, i, Float16ToRawbits(result)); 6949 break; 6950 } 6951 case kSRegSize: { 6952 float result = UFixedToFloat(value, fbits, round); 6953 dst.SetUint(vform, i, FloatToRawbits(result)); 6954 break; 6955 } 6956 case kDRegSize: { 6957 double result = UFixedToDouble(value, fbits, round); 6958 dst.SetUint(vform, i, DoubleToRawbits(result)); 6959 break; 6960 } 6961 default: 6962 VIXL_UNIMPLEMENTED(); 6963 break; 6964 } 6965 } 6966 6967 return dst; 6968 } 6969 6970 LogicVRegister Simulator::ucvtf(VectorFormat vform, 6971 LogicVRegister dst, 6972 const LogicVRegister& src, 6973 int fbits, 6974 FPRounding round) { 6975 return ucvtf(vform, 6976 LaneSizeInBitsFromFormat(vform), 6977 LaneSizeInBitsFromFormat(vform), 6978 dst, 6979 GetPTrue(), 6980 src, 6981 round, 6982 fbits); 6983 } 6984 6985 LogicVRegister Simulator::unpk(VectorFormat vform, 6986 LogicVRegister dst, 6987 const LogicVRegister& src, 6988 UnpackType unpack_type, 6989 ExtendType extend_type) { 6990 VectorFormat vform_half = VectorFormatHalfWidth(vform); 6991 const int lane_count = LaneCountFromFormat(vform); 6992 const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count; 6993 6994 switch (extend_type) { 6995 case kSignedExtend: { 6996 int64_t result[kZRegMaxSizeInBytes]; 6997 for (int i = 0; i < lane_count; ++i) { 6998 result[i] = src.Int(vform_half, i + src_start_lane); 6999 } 7000 for (int i = 0; i < lane_count; ++i) { 7001 dst.SetInt(vform, i, result[i]); 7002 } 7003 break; 7004 } 7005 case kUnsignedExtend: { 7006 uint64_t result[kZRegMaxSizeInBytes]; 7007 for (int i = 0; i < lane_count; ++i) { 7008 result[i] = src.Uint(vform_half, i + src_start_lane); 7009 } 7010 for (int i = 0; i < lane_count; ++i) { 7011 dst.SetUint(vform, i, result[i]); 7012 } 7013 break; 7014 } 7015 default: 7016 VIXL_UNREACHABLE(); 7017 } 7018 return dst; 7019 } 7020 7021 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond, 7022 VectorFormat vform, 7023 LogicPRegister dst, 7024 const LogicPRegister& mask, 7025 const LogicVRegister& src1, 7026 const LogicVRegister& src2, 7027 bool is_wide_elements, 7028 FlagsUpdate flags) { 7029 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { 7030 bool result = false; 7031 if (mask.IsActive(vform, lane)) { 7032 int64_t op1 = 0xbadbeef; 7033 int64_t op2 = 0xbadbeef; 7034 int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize; 7035 switch (cond) { 7036 case eq: 7037 case ge: 7038 case gt: 7039 case lt: 7040 case le: 7041 case ne: 7042 op1 = src1.Int(vform, lane); 7043 op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane) 7044 : src2.Int(vform, lane); 7045 break; 7046 case hi: 7047 case hs: 7048 case ls: 7049 case lo: 7050 op1 = src1.Uint(vform, lane); 7051 op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane) 7052 : src2.Uint(vform, lane); 7053 break; 7054 default: 7055 VIXL_UNREACHABLE(); 7056 } 7057 7058 switch (cond) { 7059 case eq: 7060 result = (op1 == op2); 7061 break; 7062 case ne: 7063 result = (op1 != op2); 7064 break; 7065 case ge: 7066 result = (op1 >= op2); 7067 break; 7068 case gt: 7069 result = (op1 > op2); 7070 break; 7071 case le: 7072 result = (op1 <= op2); 7073 break; 7074 case lt: 7075 result = (op1 < op2); 7076 break; 7077 case hs: 7078 result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2)); 7079 break; 7080 case hi: 7081 result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2)); 7082 break; 7083 case ls: 7084 result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2)); 7085 break; 7086 case lo: 7087 result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2)); 7088 break; 7089 default: 7090 VIXL_UNREACHABLE(); 7091 } 7092 } 7093 dst.SetActive(vform, lane, result); 7094 } 7095 7096 if (flags == SetFlags) PredTest(vform, mask, dst); 7097 7098 return dst; 7099 } 7100 7101 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op, 7102 VectorFormat vform, 7103 LogicVRegister dst, 7104 const LogicVRegister& src1, 7105 const LogicVRegister& src2, 7106 bool is_wide_elements) { 7107 unsigned lane_size = LaneSizeInBitsFromFormat(vform); 7108 VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform; 7109 7110 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) { 7111 int shift_src_lane = lane; 7112 if (is_wide_elements) { 7113 // If the shift amount comes from wide elements, select the D-sized lane 7114 // which occupies the corresponding lanes of the value to be shifted. 7115 shift_src_lane = (lane * lane_size) / kDRegSize; 7116 } 7117 uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane); 7118 7119 // Saturate shift_amount to the size of the lane that will be shifted. 7120 if (shift_amount > lane_size) shift_amount = lane_size; 7121 7122 uint64_t value = src1.Uint(vform, lane); 7123 int64_t result = ShiftOperand(lane_size, 7124 value, 7125 shift_op, 7126 static_cast<unsigned>(shift_amount)); 7127 dst.SetUint(vform, lane, result); 7128 } 7129 7130 return dst; 7131 } 7132 7133 LogicVRegister Simulator::asrd(VectorFormat vform, 7134 LogicVRegister dst, 7135 const LogicVRegister& src1, 7136 int shift) { 7137 VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <= 7138 LaneSizeInBitsFromFormat(vform))); 7139 7140 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7141 int64_t value = src1.Int(vform, i); 7142 if (shift <= 63) { 7143 if (value < 0) { 7144 // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely 7145 // cast to int64_t, and cannot cause signed overflow in the result. 7146 value = value + GetUintMask(shift); 7147 } 7148 value = ShiftOperand(kDRegSize, value, ASR, shift); 7149 } else { 7150 value = 0; 7151 } 7152 dst.SetInt(vform, i, value); 7153 } 7154 return dst; 7155 } 7156 7157 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper( 7158 LogicalOp logical_op, 7159 VectorFormat vform, 7160 LogicVRegister zd, 7161 const LogicVRegister& zn, 7162 const LogicVRegister& zm) { 7163 VIXL_ASSERT(IsSVEFormat(vform)); 7164 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7165 uint64_t op1 = zn.Uint(vform, i); 7166 uint64_t op2 = zm.Uint(vform, i); 7167 uint64_t result = 0; 7168 switch (logical_op) { 7169 case AND: 7170 result = op1 & op2; 7171 break; 7172 case BIC: 7173 result = op1 & ~op2; 7174 break; 7175 case EOR: 7176 result = op1 ^ op2; 7177 break; 7178 case ORR: 7179 result = op1 | op2; 7180 break; 7181 default: 7182 VIXL_UNIMPLEMENTED(); 7183 } 7184 zd.SetUint(vform, i, result); 7185 } 7186 7187 return zd; 7188 } 7189 7190 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op, 7191 LogicPRegister pd, 7192 const LogicPRegister& pn, 7193 const LogicPRegister& pm) { 7194 for (int i = 0; i < pn.GetChunkCount(); i++) { 7195 LogicPRegister::ChunkType op1 = pn.GetChunk(i); 7196 LogicPRegister::ChunkType op2 = pm.GetChunk(i); 7197 LogicPRegister::ChunkType result = 0; 7198 switch (op) { 7199 case ANDS_p_p_pp_z: 7200 case AND_p_p_pp_z: 7201 result = op1 & op2; 7202 break; 7203 case BICS_p_p_pp_z: 7204 case BIC_p_p_pp_z: 7205 result = op1 & ~op2; 7206 break; 7207 case EORS_p_p_pp_z: 7208 case EOR_p_p_pp_z: 7209 result = op1 ^ op2; 7210 break; 7211 case NANDS_p_p_pp_z: 7212 case NAND_p_p_pp_z: 7213 result = ~(op1 & op2); 7214 break; 7215 case NORS_p_p_pp_z: 7216 case NOR_p_p_pp_z: 7217 result = ~(op1 | op2); 7218 break; 7219 case ORNS_p_p_pp_z: 7220 case ORN_p_p_pp_z: 7221 result = op1 | ~op2; 7222 break; 7223 case ORRS_p_p_pp_z: 7224 case ORR_p_p_pp_z: 7225 result = op1 | op2; 7226 break; 7227 default: 7228 VIXL_UNIMPLEMENTED(); 7229 } 7230 pd.SetChunk(i, result); 7231 } 7232 return pd; 7233 } 7234 7235 LogicVRegister Simulator::SVEBitwiseImmHelper( 7236 SVEBitwiseLogicalWithImm_UnpredicatedOp op, 7237 VectorFormat vform, 7238 LogicVRegister zd, 7239 uint64_t imm) { 7240 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7241 uint64_t op1 = zd.Uint(vform, i); 7242 uint64_t result = 0; 7243 switch (op) { 7244 case AND_z_zi: 7245 result = op1 & imm; 7246 break; 7247 case EOR_z_zi: 7248 result = op1 ^ imm; 7249 break; 7250 case ORR_z_zi: 7251 result = op1 | imm; 7252 break; 7253 default: 7254 VIXL_UNIMPLEMENTED(); 7255 } 7256 zd.SetUint(vform, i, result); 7257 } 7258 7259 return zd; 7260 } 7261 7262 void Simulator::SVEStructuredStoreHelper(VectorFormat vform, 7263 const LogicPRegister& pg, 7264 unsigned zt_code, 7265 const LogicSVEAddressVector& addr) { 7266 VIXL_ASSERT(zt_code < kNumberOfZRegisters); 7267 7268 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); 7269 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); 7270 int msize_in_bytes = addr.GetMsizeInBytes(); 7271 int reg_count = addr.GetRegCount(); 7272 7273 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); 7274 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); 7275 7276 unsigned zt_codes[4] = {zt_code, 7277 (zt_code + 1) % kNumberOfZRegisters, 7278 (zt_code + 2) % kNumberOfZRegisters, 7279 (zt_code + 3) % kNumberOfZRegisters}; 7280 7281 LogicVRegister zt[4] = { 7282 ReadVRegister(zt_codes[0]), 7283 ReadVRegister(zt_codes[1]), 7284 ReadVRegister(zt_codes[2]), 7285 ReadVRegister(zt_codes[3]), 7286 }; 7287 7288 // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes 7289 // are ignored, so read the source register using the VectorFormat that 7290 // corresponds with the storage format, and multiply the index accordingly. 7291 VectorFormat unpack_vform = 7292 SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2); 7293 int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2; 7294 7295 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7296 if (!pg.IsActive(vform, i)) continue; 7297 7298 for (int r = 0; r < reg_count; r++) { 7299 uint64_t element_address = addr.GetElementAddress(i, r); 7300 if (!StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address)) { 7301 return; 7302 } 7303 } 7304 } 7305 7306 if (ShouldTraceWrites()) { 7307 PrintRegisterFormat format = GetPrintRegisterFormat(vform); 7308 if (esize_in_bytes_log2 == msize_in_bytes_log2) { 7309 // Use an FP format where it's likely that we're accessing FP data. 7310 format = GetPrintRegisterFormatTryFP(format); 7311 } 7312 // Stores don't represent a change to the source register's value, so only 7313 // print the relevant part of the value. 7314 format = GetPrintRegPartial(format); 7315 7316 PrintZStructAccess(zt_code, 7317 reg_count, 7318 pg, 7319 format, 7320 msize_in_bytes, 7321 "->", 7322 addr); 7323 } 7324 } 7325 7326 bool Simulator::SVEStructuredLoadHelper(VectorFormat vform, 7327 const LogicPRegister& pg, 7328 unsigned zt_code, 7329 const LogicSVEAddressVector& addr, 7330 bool is_signed) { 7331 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform); 7332 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2(); 7333 int msize_in_bytes = addr.GetMsizeInBytes(); 7334 int reg_count = addr.GetRegCount(); 7335 7336 VIXL_ASSERT(zt_code < kNumberOfZRegisters); 7337 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2); 7338 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4)); 7339 7340 unsigned zt_codes[4] = {zt_code, 7341 (zt_code + 1) % kNumberOfZRegisters, 7342 (zt_code + 2) % kNumberOfZRegisters, 7343 (zt_code + 3) % kNumberOfZRegisters}; 7344 LogicVRegister zt[4] = { 7345 ReadVRegister(zt_codes[0]), 7346 ReadVRegister(zt_codes[1]), 7347 ReadVRegister(zt_codes[2]), 7348 ReadVRegister(zt_codes[3]), 7349 }; 7350 7351 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7352 for (int r = 0; r < reg_count; r++) { 7353 uint64_t element_address = addr.GetElementAddress(i, r); 7354 7355 if (!pg.IsActive(vform, i)) { 7356 zt[r].SetUint(vform, i, 0); 7357 continue; 7358 } 7359 7360 if (is_signed) { 7361 if (!LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address)) { 7362 return false; 7363 } 7364 } else { 7365 if (!LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address)) { 7366 return false; 7367 } 7368 } 7369 } 7370 } 7371 7372 if (ShouldTraceVRegs()) { 7373 PrintRegisterFormat format = GetPrintRegisterFormat(vform); 7374 if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) { 7375 // Use an FP format where it's likely that we're accessing FP data. 7376 format = GetPrintRegisterFormatTryFP(format); 7377 } 7378 PrintZStructAccess(zt_code, 7379 reg_count, 7380 pg, 7381 format, 7382 msize_in_bytes, 7383 "<-", 7384 addr); 7385 } 7386 return true; 7387 } 7388 7389 LogicPRegister Simulator::brka(LogicPRegister pd, 7390 const LogicPRegister& pg, 7391 const LogicPRegister& pn) { 7392 bool break_ = false; 7393 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { 7394 if (pg.IsActive(kFormatVnB, i)) { 7395 pd.SetActive(kFormatVnB, i, !break_); 7396 break_ |= pn.IsActive(kFormatVnB, i); 7397 } 7398 } 7399 7400 return pd; 7401 } 7402 7403 LogicPRegister Simulator::brkb(LogicPRegister pd, 7404 const LogicPRegister& pg, 7405 const LogicPRegister& pn) { 7406 bool break_ = false; 7407 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { 7408 if (pg.IsActive(kFormatVnB, i)) { 7409 break_ |= pn.IsActive(kFormatVnB, i); 7410 pd.SetActive(kFormatVnB, i, !break_); 7411 } 7412 } 7413 7414 return pd; 7415 } 7416 7417 LogicPRegister Simulator::brkn(LogicPRegister pdm, 7418 const LogicPRegister& pg, 7419 const LogicPRegister& pn) { 7420 if (!IsLastActive(kFormatVnB, pg, pn)) { 7421 pfalse(pdm); 7422 } 7423 return pdm; 7424 } 7425 7426 LogicPRegister Simulator::brkpa(LogicPRegister pd, 7427 const LogicPRegister& pg, 7428 const LogicPRegister& pn, 7429 const LogicPRegister& pm) { 7430 bool last_active = IsLastActive(kFormatVnB, pg, pn); 7431 7432 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { 7433 bool active = false; 7434 if (pg.IsActive(kFormatVnB, i)) { 7435 active = last_active; 7436 last_active = last_active && !pm.IsActive(kFormatVnB, i); 7437 } 7438 pd.SetActive(kFormatVnB, i, active); 7439 } 7440 7441 return pd; 7442 } 7443 7444 LogicPRegister Simulator::brkpb(LogicPRegister pd, 7445 const LogicPRegister& pg, 7446 const LogicPRegister& pn, 7447 const LogicPRegister& pm) { 7448 bool last_active = IsLastActive(kFormatVnB, pg, pn); 7449 7450 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) { 7451 bool active = false; 7452 if (pg.IsActive(kFormatVnB, i)) { 7453 last_active = last_active && !pm.IsActive(kFormatVnB, i); 7454 active = last_active; 7455 } 7456 pd.SetActive(kFormatVnB, i, active); 7457 } 7458 7459 return pd; 7460 } 7461 7462 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform, 7463 const LogicPRegister& pg, 7464 unsigned zt_code, 7465 const LogicSVEAddressVector& addr, 7466 SVEFaultTolerantLoadType type, 7467 bool is_signed) { 7468 int esize_in_bytes = LaneSizeInBytesFromFormat(vform); 7469 int msize_in_bits = addr.GetMsizeInBits(); 7470 int msize_in_bytes = addr.GetMsizeInBytes(); 7471 7472 VIXL_ASSERT(zt_code < kNumberOfZRegisters); 7473 VIXL_ASSERT(esize_in_bytes >= msize_in_bytes); 7474 VIXL_ASSERT(addr.GetRegCount() == 1); 7475 7476 LogicVRegister zt = ReadVRegister(zt_code); 7477 LogicPRegister ffr = ReadFFR(); 7478 7479 // Non-faulting loads are allowed to fail arbitrarily. To stress user 7480 // code, fail a random element in roughly one in eight full-vector loads. 7481 uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_)); 7482 int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8); 7483 7484 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7485 uint64_t value = 0; 7486 7487 if (pg.IsActive(vform, i)) { 7488 uint64_t element_address = addr.GetElementAddress(i, 0); 7489 7490 if (type == kSVEFirstFaultLoad) { 7491 // First-faulting loads always load the first active element, regardless 7492 // of FFR. The result will be discarded if its FFR lane is inactive, but 7493 // it could still generate a fault. 7494 VIXL_DEFINE_OR_RETURN(mem_result, 7495 MemReadUint(msize_in_bytes, element_address)); 7496 value = mem_result; 7497 // All subsequent elements have non-fault semantics. 7498 type = kSVENonFaultLoad; 7499 7500 } else if (ffr.IsActive(vform, i)) { 7501 // Simulation of fault-tolerant loads relies on system calls, and is 7502 // likely to be relatively slow, so we only actually perform the load if 7503 // its FFR lane is active. 7504 7505 bool can_read = (i < fake_fault_at_lane) && 7506 CanReadMemory(element_address, msize_in_bytes); 7507 if (can_read) { 7508 VIXL_DEFINE_OR_RETURN(mem_result, 7509 MemReadUint(msize_in_bytes, element_address)); 7510 value = mem_result; 7511 } else { 7512 // Propagate the fault to the end of FFR. 7513 for (int j = i; j < LaneCountFromFormat(vform); j++) { 7514 ffr.SetActive(vform, j, false); 7515 } 7516 } 7517 } 7518 } 7519 7520 // The architecture permits a few possible results for inactive FFR lanes 7521 // (including those caused by a fault in this instruction). We choose to 7522 // leave the register value unchanged (like merging predication) because 7523 // no other input to this instruction can have the same behaviour. 7524 // 7525 // Note that this behaviour takes precedence over pg's zeroing predication. 7526 7527 if (ffr.IsActive(vform, i)) { 7528 int msb = msize_in_bits - 1; 7529 if (is_signed) { 7530 zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value)); 7531 } else { 7532 zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value)); 7533 } 7534 } 7535 } 7536 7537 if (ShouldTraceVRegs()) { 7538 PrintRegisterFormat format = GetPrintRegisterFormat(vform); 7539 if ((esize_in_bytes == msize_in_bytes) && !is_signed) { 7540 // Use an FP format where it's likely that we're accessing FP data. 7541 format = GetPrintRegisterFormatTryFP(format); 7542 } 7543 // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess 7544 // expects a single mask, so combine the two predicates. 7545 SimPRegister mask; 7546 SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr); 7547 PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr); 7548 } 7549 } 7550 7551 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr, 7552 VectorFormat vform, 7553 SVEOffsetModifier mod) { 7554 bool is_signed = instr->ExtractBit(14) == 0; 7555 bool is_ff = instr->ExtractBit(13) == 1; 7556 // Note that these instructions don't use the Dtype encoding. 7557 int msize_in_bytes_log2 = instr->ExtractBits(24, 23); 7558 int scale = instr->ExtractBit(21) * msize_in_bytes_log2; 7559 uint64_t base = ReadXRegister(instr->GetRn(), Reg31IsStackPointer); 7560 LogicSVEAddressVector addr(base, 7561 &ReadVRegister(instr->GetRm()), 7562 vform, 7563 mod, 7564 scale); 7565 addr.SetMsizeInBytesLog2(msize_in_bytes_log2); 7566 if (is_ff) { 7567 SVEFaultTolerantLoadHelper(vform, 7568 ReadPRegister(instr->GetPgLow8()), 7569 instr->GetRt(), 7570 addr, 7571 kSVEFirstFaultLoad, 7572 is_signed); 7573 } else { 7574 SVEStructuredLoadHelper(vform, 7575 ReadPRegister(instr->GetPgLow8()), 7576 instr->GetRt(), 7577 addr, 7578 is_signed); 7579 } 7580 } 7581 7582 int Simulator::GetFirstActive(VectorFormat vform, 7583 const LogicPRegister& pg) const { 7584 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7585 if (pg.IsActive(vform, i)) return i; 7586 } 7587 return -1; 7588 } 7589 7590 int Simulator::GetLastActive(VectorFormat vform, 7591 const LogicPRegister& pg) const { 7592 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 7593 if (pg.IsActive(vform, i)) return i; 7594 } 7595 return -1; 7596 } 7597 7598 int Simulator::CountActiveLanes(VectorFormat vform, 7599 const LogicPRegister& pg) const { 7600 int count = 0; 7601 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7602 count += pg.IsActive(vform, i) ? 1 : 0; 7603 } 7604 return count; 7605 } 7606 7607 int Simulator::CountActiveAndTrueLanes(VectorFormat vform, 7608 const LogicPRegister& pg, 7609 const LogicPRegister& pn) const { 7610 int count = 0; 7611 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7612 count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0; 7613 } 7614 return count; 7615 } 7616 7617 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform, 7618 int pattern) const { 7619 VIXL_ASSERT(IsSVEFormat(vform)); 7620 int all = LaneCountFromFormat(vform); 7621 VIXL_ASSERT(all > 0); 7622 7623 switch (pattern) { 7624 case SVE_VL1: 7625 case SVE_VL2: 7626 case SVE_VL3: 7627 case SVE_VL4: 7628 case SVE_VL5: 7629 case SVE_VL6: 7630 case SVE_VL7: 7631 case SVE_VL8: 7632 // VL1-VL8 are encoded directly. 7633 VIXL_STATIC_ASSERT(SVE_VL1 == 1); 7634 VIXL_STATIC_ASSERT(SVE_VL8 == 8); 7635 return (pattern <= all) ? pattern : 0; 7636 case SVE_VL16: 7637 case SVE_VL32: 7638 case SVE_VL64: 7639 case SVE_VL128: 7640 case SVE_VL256: { 7641 // VL16-VL256 are encoded as log2(N) + c. 7642 int min = 16 << (pattern - SVE_VL16); 7643 return (min <= all) ? min : 0; 7644 } 7645 // Special cases. 7646 case SVE_POW2: 7647 return 1 << HighestSetBitPosition(all); 7648 case SVE_MUL4: 7649 return all - (all % 4); 7650 case SVE_MUL3: 7651 return all - (all % 3); 7652 case SVE_ALL: 7653 return all; 7654 } 7655 // Unnamed cases architecturally return 0. 7656 return 0; 7657 } 7658 7659 LogicPRegister Simulator::match(VectorFormat vform, 7660 LogicPRegister dst, 7661 const LogicVRegister& haystack, 7662 const LogicVRegister& needles, 7663 bool negate_match) { 7664 SimVRegister ztemp; 7665 SimPRegister ptemp; 7666 7667 pfalse(dst); 7668 int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform); 7669 for (int i = 0; i < lanes_per_segment; i++) { 7670 dup_elements_to_segments(vform, ztemp, needles, i); 7671 SVEIntCompareVectorsHelper(eq, 7672 vform, 7673 ptemp, 7674 GetPTrue(), 7675 haystack, 7676 ztemp, 7677 false, 7678 LeaveFlags); 7679 SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp); 7680 } 7681 if (negate_match) { 7682 ptrue(vform, ptemp, SVE_ALL); 7683 SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp); 7684 } 7685 return dst; 7686 } 7687 7688 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const { 7689 if (IsContiguous()) { 7690 return base_ + (lane * GetRegCount()) * GetMsizeInBytes(); 7691 } 7692 7693 VIXL_ASSERT(IsScatterGather()); 7694 VIXL_ASSERT(vector_ != NULL); 7695 7696 // For scatter-gather accesses, we need to extract the offset from vector_, 7697 // and apply modifiers. 7698 7699 uint64_t offset = 0; 7700 switch (vector_form_) { 7701 case kFormatVnS: 7702 offset = vector_->GetLane<uint32_t>(lane); 7703 break; 7704 case kFormatVnD: 7705 offset = vector_->GetLane<uint64_t>(lane); 7706 break; 7707 default: 7708 VIXL_UNIMPLEMENTED(); 7709 break; 7710 } 7711 7712 switch (vector_mod_) { 7713 case SVE_MUL_VL: 7714 VIXL_UNIMPLEMENTED(); 7715 break; 7716 case SVE_LSL: 7717 // We apply the shift below. There's nothing to do here. 7718 break; 7719 case NO_SVE_OFFSET_MODIFIER: 7720 VIXL_ASSERT(vector_shift_ == 0); 7721 break; 7722 case SVE_UXTW: 7723 offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset); 7724 break; 7725 case SVE_SXTW: 7726 offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset); 7727 break; 7728 } 7729 7730 return base_ + (offset << vector_shift_); 7731 } 7732 7733 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform, 7734 LogicVRegister dst, 7735 const LogicVRegister& src) { 7736 SimVRegister zero; 7737 zero.Clear(); 7738 return uzp2(vform, dst, src, zero); 7739 } 7740 7741 LogicVRegister Simulator::pack_even_elements(VectorFormat vform, 7742 LogicVRegister dst, 7743 const LogicVRegister& src) { 7744 SimVRegister zero; 7745 zero.Clear(); 7746 return uzp1(vform, dst, src, zero); 7747 } 7748 7749 LogicVRegister Simulator::adcl(VectorFormat vform, 7750 LogicVRegister dst, 7751 const LogicVRegister& src1, 7752 const LogicVRegister& src2, 7753 bool top) { 7754 unsigned reg_size = LaneSizeInBitsFromFormat(vform); 7755 VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize)); 7756 7757 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 7758 uint64_t left = src1.Uint(vform, i + (top ? 1 : 0)); 7759 uint64_t right = dst.Uint(vform, i); 7760 unsigned carry_in = src2.Uint(vform, i + 1) & 1; 7761 std::pair<uint64_t, uint8_t> val_and_flags = 7762 AddWithCarry(reg_size, left, right, carry_in); 7763 7764 // Set even lanes to the result of the addition. 7765 dst.SetUint(vform, i, val_and_flags.first); 7766 7767 // Set odd lanes to the carry flag from the addition. 7768 uint64_t carry_out = (val_and_flags.second >> 1) & 1; 7769 dst.SetUint(vform, i + 1, carry_out); 7770 } 7771 return dst; 7772 } 7773 7774 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add 7775 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst. 7776 // 7777 // Matrices of the form: 7778 // 7779 // src1 = ( a b c d e f g h ) src2 = ( A B ) 7780 // ( i j k l m n o p ) ( C D ) 7781 // ( E F ) 7782 // ( G H ) 7783 // ( I J ) 7784 // ( K L ) 7785 // ( M N ) 7786 // ( O P ) 7787 // 7788 // Are stored in the input vector registers as: 7789 // 7790 // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 7791 // src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ] 7792 // src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ] 7793 // 7794 LogicVRegister Simulator::matmul(VectorFormat vform_dst, 7795 LogicVRegister srcdst, 7796 const LogicVRegister& src1, 7797 const LogicVRegister& src2, 7798 bool src1_signed, 7799 bool src2_signed) { 7800 // Two destination forms are supported: Q register containing four S-sized 7801 // elements (4S) and Z register containing n S-sized elements (VnS). 7802 VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS)); 7803 VectorFormat vform_src = kFormatVnB; 7804 int b_per_segment = kQRegSize / kBRegSize; 7805 int s_per_segment = kQRegSize / kSRegSize; 7806 int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {}; 7807 int segment_count = LaneCountFromFormat(vform_dst) / 4; 7808 for (int seg = 0; seg < segment_count; seg++) { 7809 for (int i = 0; i < 2; i++) { 7810 for (int j = 0; j < 2; j++) { 7811 int dstidx = (2 * i) + j + (seg * s_per_segment); 7812 int64_t sum = srcdst.Int(vform_dst, dstidx); 7813 for (int k = 0; k < 8; k++) { 7814 int idx1 = (8 * i) + k + (seg * b_per_segment); 7815 int idx2 = (8 * j) + k + (seg * b_per_segment); 7816 int64_t e1 = src1_signed ? src1.Int(vform_src, idx1) 7817 : src1.Uint(vform_src, idx1); 7818 int64_t e2 = src2_signed ? src2.Int(vform_src, idx2) 7819 : src2.Uint(vform_src, idx2); 7820 sum += e1 * e2; 7821 } 7822 result[dstidx] = sum; 7823 } 7824 } 7825 } 7826 srcdst.SetIntArray(vform_dst, result); 7827 return srcdst; 7828 } 7829 7830 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2 7831 // result to the matrix in srcdst, and write back to srcdst. 7832 // 7833 // Matrices of the form: 7834 // 7835 // src1 = ( a b ) src2 = ( A B ) 7836 // ( c d ) ( C D ) 7837 // 7838 // Are stored in the input vector registers as: 7839 // 7840 // 3 2 1 0 7841 // src1 = [ d | c | b | a ] 7842 // src2 = [ D | B | C | A ] 7843 // 7844 template <typename T> 7845 LogicVRegister Simulator::fmatmul(VectorFormat vform, 7846 LogicVRegister srcdst, 7847 const LogicVRegister& src1, 7848 const LogicVRegister& src2) { 7849 T result[kZRegMaxSizeInBytes / sizeof(T)]; 7850 int T_per_segment = 4; 7851 int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T)); 7852 for (int seg = 0; seg < segment_count; seg++) { 7853 int segoff = seg * T_per_segment; 7854 for (int i = 0; i < 2; i++) { 7855 for (int j = 0; j < 2; j++) { 7856 T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff), 7857 src2.Float<T>(2 * j + 0 + segoff)); 7858 T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff), 7859 src2.Float<T>(2 * j + 1 + segoff)); 7860 T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0); 7861 result[2 * i + j + segoff] = FPAdd(sum, prod1); 7862 } 7863 } 7864 } 7865 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 7866 // Elements outside a multiple of 4T are set to zero. This happens only 7867 // for double precision operations, when the VL is a multiple of 128 bits, 7868 // but not a multiple of 256 bits. 7869 T value = (i < (T_per_segment * segment_count)) ? result[i] : 0; 7870 srcdst.SetFloat<T>(vform, i, value); 7871 } 7872 return srcdst; 7873 } 7874 7875 LogicVRegister Simulator::fmatmul(VectorFormat vform, 7876 LogicVRegister dst, 7877 const LogicVRegister& src1, 7878 const LogicVRegister& src2) { 7879 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 7880 fmatmul<float>(vform, dst, src1, src2); 7881 } else { 7882 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 7883 fmatmul<double>(vform, dst, src1, src2); 7884 } 7885 return dst; 7886 } 7887 7888 } // namespace aarch64 7889 } // namespace vixl 7890 7891 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64