duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

macro-assembler-aarch64.cc (103670B)


      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include "macro-assembler-aarch64.h"
     28 
     29 #include <cctype>
     30 
     31 namespace vixl {
     32 namespace aarch64 {
     33 
     34 
     35 void Pool::Release() {
     36   if (--monitor_ == 0) {
     37     // Ensure the pool has not been blocked for too long.
     38     VIXL_ASSERT(masm_->GetCursorOffset() < checkpoint_);
     39   }
     40 }
     41 
     42 
     43 void Pool::SetNextCheckpoint(ptrdiff_t checkpoint) {
     44   masm_->checkpoint_ = std::min(masm_->checkpoint_, checkpoint);
     45   checkpoint_ = checkpoint;
     46 }
     47 
     48 
     49 LiteralPool::LiteralPool(MacroAssembler* masm)
     50     : Pool(masm),
     51       size_(0),
     52       first_use_(-1),
     53       recommended_checkpoint_(kNoCheckpointRequired) {}
     54 
     55 
     56 LiteralPool::~LiteralPool() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
     57   VIXL_ASSERT(IsEmpty());
     58   VIXL_ASSERT(!IsBlocked());
     59   for (std::vector<RawLiteral*>::iterator it = deleted_on_destruction_.begin();
     60        it != deleted_on_destruction_.end();
     61        it++) {
     62     delete *it;
     63   }
     64 }
     65 
     66 
     67 void LiteralPool::Reset() {
     68   std::vector<RawLiteral*>::iterator it, end;
     69   for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
     70     RawLiteral* literal = *it;
     71     if (literal->deletion_policy_ == RawLiteral::kDeletedOnPlacementByPool) {
     72       delete literal;
     73     }
     74   }
     75   entries_.clear();
     76   size_ = 0;
     77   first_use_ = -1;
     78   Pool::Reset();
     79   recommended_checkpoint_ = kNoCheckpointRequired;
     80 }
     81 
     82 
     83 void LiteralPool::CheckEmitFor(size_t amount, EmitOption option) {
     84   if (IsEmpty() || IsBlocked()) return;
     85 
     86   ptrdiff_t distance = masm_->GetCursorOffset() + amount - first_use_;
     87   if (distance >= kRecommendedLiteralPoolRange) {
     88     Emit(option);
     89   }
     90 }
     91 
     92 
     93 void LiteralPool::CheckEmitForBranch(size_t range) {
     94   if (IsEmpty() || IsBlocked()) return;
     95   if (GetMaxSize() >= range) Emit();
     96 }
     97 
     98 // We use a subclass to access the protected `ExactAssemblyScope` constructor
     99 // giving us control over the pools. This allows us to use this scope within
    100 // code emitting pools without creating a circular dependency.
    101 // We keep the constructor private to restrict usage of this helper class.
    102 class ExactAssemblyScopeWithoutPoolsCheck : public ExactAssemblyScope {
    103  private:
    104   ExactAssemblyScopeWithoutPoolsCheck(MacroAssembler* masm, size_t size)
    105       : ExactAssemblyScope(masm,
    106                            size,
    107                            ExactAssemblyScope::kExactSize,
    108                            ExactAssemblyScope::kIgnorePools) {}
    109 
    110   friend void LiteralPool::Emit(LiteralPool::EmitOption);
    111   friend void VeneerPool::Emit(VeneerPool::EmitOption, size_t);
    112 };
    113 
    114 
    115 void LiteralPool::Emit(EmitOption option) {
    116   // There is an issue if we are asked to emit a blocked or empty pool.
    117   VIXL_ASSERT(!IsBlocked());
    118   VIXL_ASSERT(!IsEmpty());
    119 
    120   size_t pool_size = GetSize();
    121   size_t emit_size = pool_size;
    122   if (option == kBranchRequired) emit_size += kInstructionSize;
    123   Label end_of_pool;
    124 
    125   VIXL_ASSERT(emit_size % kInstructionSize == 0);
    126   {
    127     CodeBufferCheckScope guard(masm_,
    128                                emit_size,
    129                                CodeBufferCheckScope::kCheck,
    130                                CodeBufferCheckScope::kExactSize);
    131 #ifdef VIXL_DEBUG
    132     // Also explicitly disallow usage of the `MacroAssembler` here.
    133     masm_->SetAllowMacroInstructions(false);
    134 #endif
    135     if (option == kBranchRequired) {
    136       ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
    137       masm_->b(&end_of_pool);
    138     }
    139 
    140     {
    141       // Marker indicating the size of the literal pool in 32-bit words.
    142       VIXL_ASSERT((pool_size % kWRegSizeInBytes) == 0);
    143       ExactAssemblyScopeWithoutPoolsCheck eas_guard(masm_, kInstructionSize);
    144       masm_->ldr(xzr, static_cast<int>(pool_size / kWRegSizeInBytes));
    145     }
    146 
    147     // Now populate the literal pool.
    148     std::vector<RawLiteral*>::iterator it, end;
    149     for (it = entries_.begin(), end = entries_.end(); it != end; ++it) {
    150       VIXL_ASSERT((*it)->IsUsed());
    151       masm_->place(*it);
    152     }
    153 
    154     if (option == kBranchRequired) masm_->bind(&end_of_pool);
    155 #ifdef VIXL_DEBUG
    156     masm_->SetAllowMacroInstructions(true);
    157 #endif
    158   }
    159 
    160   Reset();
    161 }
    162 
    163 
    164 void LiteralPool::AddEntry(RawLiteral* literal) {
    165   // A literal must be registered immediately before its first use. Here we
    166   // cannot control that it is its first use, but we check no code has been
    167   // emitted since its last use.
    168   VIXL_ASSERT(masm_->GetCursorOffset() == literal->GetLastUse());
    169 
    170   UpdateFirstUse(masm_->GetCursorOffset());
    171   VIXL_ASSERT(masm_->GetCursorOffset() >= first_use_);
    172   entries_.push_back(literal);
    173   size_ += literal->GetSize();
    174 }
    175 
    176 
    177 void LiteralPool::UpdateFirstUse(ptrdiff_t use_position) {
    178   first_use_ = std::min(first_use_, use_position);
    179   if (first_use_ == -1) {
    180     first_use_ = use_position;
    181     SetNextRecommendedCheckpoint(GetNextRecommendedCheckpoint());
    182     SetNextCheckpoint(first_use_ + Instruction::kLoadLiteralRange);
    183   } else {
    184     VIXL_ASSERT(use_position > first_use_);
    185   }
    186 }
    187 
    188 
    189 void VeneerPool::Reset() {
    190   Pool::Reset();
    191   unresolved_branches_.Reset();
    192 }
    193 
    194 
    195 void VeneerPool::Release() {
    196   if (--monitor_ == 0) {
    197     VIXL_ASSERT(IsEmpty() || masm_->GetCursorOffset() <
    198                                  unresolved_branches_.GetFirstLimit());
    199   }
    200 }
    201 
    202 
    203 void VeneerPool::RegisterUnresolvedBranch(ptrdiff_t branch_pos,
    204                                           Label* label,
    205                                           ImmBranchType branch_type) {
    206   VIXL_ASSERT(!label->IsBound());
    207   BranchInfo branch_info = BranchInfo(branch_pos, label, branch_type);
    208   unresolved_branches_.insert(branch_info);
    209   UpdateNextCheckPoint();
    210   // TODO: In debug mode register the label with the assembler to make sure it
    211   // is bound with masm Bind and not asm bind.
    212 }
    213 
    214 
    215 void VeneerPool::DeleteUnresolvedBranchInfoForLabel(Label* label) {
    216   if (IsEmpty()) {
    217     VIXL_ASSERT(checkpoint_ == kNoCheckpointRequired);
    218     return;
    219   }
    220 
    221   if (label->IsLinked()) {
    222     Label::LabelLinksIterator links_it(label);
    223     for (; !links_it.Done(); links_it.Advance()) {
    224       ptrdiff_t link_offset = *links_it.Current();
    225       Instruction* link = masm_->GetInstructionAt(link_offset);
    226 
    227       // ADR instructions are not handled.
    228       if (BranchTypeUsesVeneers(link->GetBranchType())) {
    229         BranchInfo branch_info(link_offset, label, link->GetBranchType());
    230         unresolved_branches_.erase(branch_info);
    231       }
    232     }
    233   }
    234 
    235   UpdateNextCheckPoint();
    236 }
    237 
    238 
    239 bool VeneerPool::ShouldEmitVeneer(int64_t first_unreacheable_pc,
    240                                   size_t amount) {
    241   ptrdiff_t offset =
    242       kPoolNonVeneerCodeSize + amount + GetMaxSize() + GetOtherPoolsMaxSize();
    243   return (masm_->GetCursorOffset() + offset) > first_unreacheable_pc;
    244 }
    245 
    246 
    247 void VeneerPool::CheckEmitFor(size_t amount, EmitOption option) {
    248   if (IsEmpty()) return;
    249 
    250   VIXL_ASSERT(masm_->GetCursorOffset() + kPoolNonVeneerCodeSize <
    251               unresolved_branches_.GetFirstLimit());
    252 
    253   if (IsBlocked()) return;
    254 
    255   if (ShouldEmitVeneers(amount)) {
    256     Emit(option, amount);
    257   } else {
    258     UpdateNextCheckPoint();
    259   }
    260 }
    261 
    262 
    263 void VeneerPool::Emit(EmitOption option, size_t amount) {
    264   // There is an issue if we are asked to emit a blocked or empty pool.
    265   VIXL_ASSERT(!IsBlocked());
    266   VIXL_ASSERT(!IsEmpty());
    267 
    268   Label end;
    269   if (option == kBranchRequired) {
    270     ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
    271     masm_->b(&end);
    272   }
    273 
    274   // We want to avoid generating veneer pools too often, so generate veneers for
    275   // branches that don't immediately require a veneer but will soon go out of
    276   // range.
    277   static const size_t kVeneerEmissionMargin = 1 * KBytes;
    278 
    279   for (BranchInfoSetIterator it(&unresolved_branches_); !it.Done();) {
    280     BranchInfo* branch_info = it.Current();
    281     if (ShouldEmitVeneer(branch_info->first_unreacheable_pc_,
    282                          amount + kVeneerEmissionMargin)) {
    283       CodeBufferCheckScope scope(masm_,
    284                                  kVeneerCodeSize,
    285                                  CodeBufferCheckScope::kCheck,
    286                                  CodeBufferCheckScope::kExactSize);
    287       ptrdiff_t branch_pos = branch_info->pc_offset_;
    288       Instruction* branch = masm_->GetInstructionAt(branch_pos);
    289       Label* label = branch_info->label_;
    290 
    291       // Patch the branch to point to the current position, and emit a branch
    292       // to the label.
    293       Instruction* veneer = masm_->GetCursorAddress<Instruction*>();
    294       branch->SetImmPCOffsetTarget(veneer);
    295       {
    296         ExactAssemblyScopeWithoutPoolsCheck guard(masm_, kInstructionSize);
    297         masm_->b(label);
    298       }
    299 
    300       // Update the label. The branch patched does not point to it any longer.
    301       label->DeleteLink(branch_pos);
    302 
    303       it.DeleteCurrentAndAdvance();
    304     } else {
    305       it.AdvanceToNextType();
    306     }
    307   }
    308 
    309   UpdateNextCheckPoint();
    310 
    311   masm_->bind(&end);
    312 }
    313 
    314 
    315 MacroAssembler::MacroAssembler(byte* buffer,
    316                                size_t capacity,
    317                                PositionIndependentCodeOption pic)
    318     : Assembler(buffer, capacity, pic),
    319 #ifdef VIXL_DEBUG
    320       allow_macro_instructions_(true),
    321 #endif
    322       generate_simulator_code_(VIXL_AARCH64_GENERATE_SIMULATOR_CODE),
    323       sp_(sp),
    324       tmp_list_(ip0, ip1),
    325       v_tmp_list_(d31),
    326       p_tmp_list_(CPURegList::Empty(CPURegister::kPRegister)),
    327       current_scratch_scope_(NULL),
    328       literal_pool_(this),
    329       veneer_pool_(this),
    330       recommended_checkpoint_(Pool::kNoCheckpointRequired),
    331       fp_nan_propagation_(NoFPMacroNaNPropagationSelected) {
    332   checkpoint_ = GetNextCheckPoint();
    333 }
    334 
    335 
    336 MacroAssembler::~MacroAssembler() {}
    337 
    338 
    339 void MacroAssembler::Reset() {
    340   Assembler::Reset();
    341 
    342   VIXL_ASSERT(!literal_pool_.IsBlocked());
    343   literal_pool_.Reset();
    344   veneer_pool_.Reset();
    345 
    346   checkpoint_ = GetNextCheckPoint();
    347 }
    348 
    349 
    350 void MacroAssembler::FinalizeCode(FinalizeOption option) {
    351   if (!literal_pool_.IsEmpty()) {
    352     // The user may decide to emit more code after Finalize, emit a branch if
    353     // that's the case.
    354     literal_pool_.Emit(option == kUnreachable ? Pool::kNoBranchRequired
    355                                               : Pool::kBranchRequired);
    356   }
    357   VIXL_ASSERT(veneer_pool_.IsEmpty());
    358 
    359   Assembler::FinalizeCode();
    360 }
    361 
    362 
    363 void MacroAssembler::CheckEmitFor(size_t amount) {
    364   CheckEmitPoolsFor(amount);
    365   VIXL_ASSERT(GetBuffer()->HasSpaceFor(amount));
    366 }
    367 
    368 
    369 void MacroAssembler::CheckEmitPoolsFor(size_t amount) {
    370   literal_pool_.CheckEmitFor(amount);
    371   veneer_pool_.CheckEmitFor(amount);
    372   checkpoint_ = GetNextCheckPoint();
    373 }
    374 
    375 
    376 int MacroAssembler::MoveImmediateHelper(MacroAssembler* masm,
    377                                         const Register& rd,
    378                                         uint64_t imm) {
    379   bool emit_code = (masm != NULL);
    380   VIXL_ASSERT(IsUint32(imm) || IsInt32(imm) || rd.Is64Bits());
    381   // The worst case for size is mov 64-bit immediate to sp:
    382   //  * up to 4 instructions to materialise the constant
    383   //  * 1 instruction to move to sp
    384   MacroEmissionCheckScope guard(masm);
    385 
    386   // Immediates on Aarch64 can be produced using an initial value, and zero to
    387   // three move keep operations.
    388   //
    389   // Initial values can be generated with:
    390   //  1. 64-bit move zero (movz).
    391   //  2. 32-bit move inverted (movn).
    392   //  3. 64-bit move inverted.
    393   //  4. 32-bit orr immediate.
    394   //  5. 64-bit orr immediate.
    395   // Move-keep may then be used to modify each of the 16-bit half words.
    396   //
    397   // The code below supports all five initial value generators, and
    398   // applying move-keep operations to move-zero and move-inverted initial
    399   // values.
    400 
    401   // Try to move the immediate in one instruction, and if that fails, switch to
    402   // using multiple instructions.
    403   if (OneInstrMoveImmediateHelper(masm, rd, imm)) {
    404     return 1;
    405   } else {
    406     int instruction_count = 0;
    407     unsigned reg_size = rd.GetSizeInBits();
    408 
    409     // Generic immediate case. Imm will be represented by
    410     //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
    411     // A move-zero or move-inverted is generated for the first non-zero or
    412     // non-0xffff immX, and a move-keep for subsequent non-zero immX.
    413 
    414     uint64_t ignored_halfword = 0;
    415     bool invert_move = false;
    416     // If the number of 0xffff halfwords is greater than the number of 0x0000
    417     // halfwords, it's more efficient to use move-inverted.
    418     if (CountClearHalfWords(~imm, reg_size) >
    419         CountClearHalfWords(imm, reg_size)) {
    420       ignored_halfword = 0xffff;
    421       invert_move = true;
    422     }
    423 
    424     // Mov instructions can't move values into the stack pointer, so set up a
    425     // temporary register, if needed.
    426     UseScratchRegisterScope temps;
    427     Register temp;
    428     if (emit_code) {
    429       temps.Open(masm);
    430       temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
    431     }
    432 
    433     // Iterate through the halfwords. Use movn/movz for the first non-ignored
    434     // halfword, and movk for subsequent halfwords.
    435     VIXL_ASSERT((reg_size % 16) == 0);
    436     bool first_mov_done = false;
    437     for (unsigned i = 0; i < (reg_size / 16); i++) {
    438       uint64_t imm16 = (imm >> (16 * i)) & 0xffff;
    439       if (imm16 != ignored_halfword) {
    440         if (!first_mov_done) {
    441           if (invert_move) {
    442             if (emit_code) masm->movn(temp, ~imm16 & 0xffff, 16 * i);
    443             instruction_count++;
    444           } else {
    445             if (emit_code) masm->movz(temp, imm16, 16 * i);
    446             instruction_count++;
    447           }
    448           first_mov_done = true;
    449         } else {
    450           // Construct a wider constant.
    451           if (emit_code) masm->movk(temp, imm16, 16 * i);
    452           instruction_count++;
    453         }
    454       }
    455     }
    456 
    457     VIXL_ASSERT(first_mov_done);
    458 
    459     // Move the temporary if the original destination register was the stack
    460     // pointer.
    461     if (rd.IsSP()) {
    462       if (emit_code) masm->mov(rd, temp);
    463       instruction_count++;
    464     }
    465     return instruction_count;
    466   }
    467 }
    468 
    469 
    470 void MacroAssembler::B(Label* label, BranchType type, Register reg, int bit) {
    471   VIXL_ASSERT((reg.Is(NoReg) || (type >= kBranchTypeFirstUsingReg)) &&
    472               ((bit == -1) || (type >= kBranchTypeFirstUsingBit)));
    473   if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
    474     B(static_cast<Condition>(type), label);
    475   } else {
    476     switch (type) {
    477       case always:
    478         B(label);
    479         break;
    480       case never:
    481         break;
    482       case reg_zero:
    483         Cbz(reg, label);
    484         break;
    485       case reg_not_zero:
    486         Cbnz(reg, label);
    487         break;
    488       case reg_bit_clear:
    489         Tbz(reg, bit, label);
    490         break;
    491       case reg_bit_set:
    492         Tbnz(reg, bit, label);
    493         break;
    494       default:
    495         VIXL_UNREACHABLE();
    496     }
    497   }
    498 }
    499 
    500 
    501 void MacroAssembler::B(Label* label) {
    502   // We don't need to check the size of the literal pool, because the size of
    503   // the literal pool is already bounded by the literal range, which is smaller
    504   // than the range of this branch.
    505   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(UncondBranchType) >
    506               Instruction::kLoadLiteralRange);
    507   SingleEmissionCheckScope guard(this);
    508   b(label);
    509 }
    510 
    511 
    512 void MacroAssembler::B(Label* label, Condition cond) {
    513   // We don't need to check the size of the literal pool, because the size of
    514   // the literal pool is already bounded by the literal range, which is smaller
    515   // than the range of this branch.
    516   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CondBranchType) >
    517               Instruction::kLoadLiteralRange);
    518   VIXL_ASSERT(allow_macro_instructions_);
    519   VIXL_ASSERT((cond != al) && (cond != nv));
    520   EmissionCheckScope guard(this, 2 * kInstructionSize);
    521 
    522   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
    523     Label done;
    524     b(&done, InvertCondition(cond));
    525     b(label);
    526     bind(&done);
    527   } else {
    528     if (!label->IsBound()) {
    529       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    530                                             label,
    531                                             CondBranchType);
    532     }
    533     b(label, cond);
    534   }
    535 }
    536 
    537 
    538 void MacroAssembler::Cbnz(const Register& rt, Label* label) {
    539   // We don't need to check the size of the literal pool, because the size of
    540   // the literal pool is already bounded by the literal range, which is smaller
    541   // than the range of this branch.
    542   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
    543               Instruction::kLoadLiteralRange);
    544   VIXL_ASSERT(allow_macro_instructions_);
    545   VIXL_ASSERT(!rt.IsZero());
    546   EmissionCheckScope guard(this, 2 * kInstructionSize);
    547 
    548   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
    549     Label done;
    550     cbz(rt, &done);
    551     b(label);
    552     bind(&done);
    553   } else {
    554     if (!label->IsBound()) {
    555       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    556                                             label,
    557                                             CompareBranchType);
    558     }
    559     cbnz(rt, label);
    560   }
    561 }
    562 
    563 
    564 void MacroAssembler::Cbz(const Register& rt, Label* label) {
    565   // We don't need to check the size of the literal pool, because the size of
    566   // the literal pool is already bounded by the literal range, which is smaller
    567   // than the range of this branch.
    568   VIXL_ASSERT(Instruction::GetImmBranchForwardRange(CompareBranchType) >
    569               Instruction::kLoadLiteralRange);
    570   VIXL_ASSERT(allow_macro_instructions_);
    571   VIXL_ASSERT(!rt.IsZero());
    572   EmissionCheckScope guard(this, 2 * kInstructionSize);
    573 
    574   if (label->IsBound() && LabelIsOutOfRange(label, CondBranchType)) {
    575     Label done;
    576     cbnz(rt, &done);
    577     b(label);
    578     bind(&done);
    579   } else {
    580     if (!label->IsBound()) {
    581       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    582                                             label,
    583                                             CompareBranchType);
    584     }
    585     cbz(rt, label);
    586   }
    587 }
    588 
    589 
    590 void MacroAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
    591   // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
    592   // can become impossible because we emit the literal pool first.
    593   literal_pool_.CheckEmitForBranch(
    594       Instruction::GetImmBranchForwardRange(TestBranchType));
    595   VIXL_ASSERT(allow_macro_instructions_);
    596   VIXL_ASSERT(!rt.IsZero());
    597   EmissionCheckScope guard(this, 2 * kInstructionSize);
    598 
    599   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
    600     Label done;
    601     tbz(rt, bit_pos, &done);
    602     b(label);
    603     bind(&done);
    604   } else {
    605     if (!label->IsBound()) {
    606       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    607                                             label,
    608                                             TestBranchType);
    609     }
    610     tbnz(rt, bit_pos, label);
    611   }
    612 }
    613 
    614 
    615 void MacroAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
    616   // This is to avoid a situation where emitting a veneer for a TBZ/TBNZ branch
    617   // can become impossible because we emit the literal pool first.
    618   literal_pool_.CheckEmitForBranch(
    619       Instruction::GetImmBranchForwardRange(TestBranchType));
    620   VIXL_ASSERT(allow_macro_instructions_);
    621   VIXL_ASSERT(!rt.IsZero());
    622   EmissionCheckScope guard(this, 2 * kInstructionSize);
    623 
    624   if (label->IsBound() && LabelIsOutOfRange(label, TestBranchType)) {
    625     Label done;
    626     tbnz(rt, bit_pos, &done);
    627     b(label);
    628     bind(&done);
    629   } else {
    630     if (!label->IsBound()) {
    631       veneer_pool_.RegisterUnresolvedBranch(GetCursorOffset(),
    632                                             label,
    633                                             TestBranchType);
    634     }
    635     tbz(rt, bit_pos, label);
    636   }
    637 }
    638 
    639 void MacroAssembler::Bind(Label* label, BranchTargetIdentifier id) {
    640   VIXL_ASSERT(allow_macro_instructions_);
    641   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
    642   if (id == EmitBTI_none) {
    643     bind(label);
    644   } else {
    645     // Emit this inside an ExactAssemblyScope to ensure there are no extra
    646     // instructions between the bind and the target identifier instruction.
    647     ExactAssemblyScope scope(this, kInstructionSize);
    648     bind(label);
    649     if (id == EmitPACIASP) {
    650       paciasp();
    651     } else if (id == EmitPACIBSP) {
    652       pacibsp();
    653     } else {
    654       bti(id);
    655     }
    656   }
    657 }
    658 
    659 // Bind a label to a specified offset from the start of the buffer.
    660 void MacroAssembler::BindToOffset(Label* label, ptrdiff_t offset) {
    661   VIXL_ASSERT(allow_macro_instructions_);
    662   veneer_pool_.DeleteUnresolvedBranchInfoForLabel(label);
    663   Assembler::BindToOffset(label, offset);
    664 }
    665 
    666 
    667 void MacroAssembler::And(const Register& rd,
    668                          const Register& rn,
    669                          const Operand& operand) {
    670   VIXL_ASSERT(allow_macro_instructions_);
    671   LogicalMacro(rd, rn, operand, AND);
    672 }
    673 
    674 
    675 void MacroAssembler::Ands(const Register& rd,
    676                           const Register& rn,
    677                           const Operand& operand) {
    678   VIXL_ASSERT(allow_macro_instructions_);
    679   LogicalMacro(rd, rn, operand, ANDS);
    680 }
    681 
    682 
    683 void MacroAssembler::Tst(const Register& rn, const Operand& operand) {
    684   VIXL_ASSERT(allow_macro_instructions_);
    685   Ands(AppropriateZeroRegFor(rn), rn, operand);
    686 }
    687 
    688 
    689 void MacroAssembler::Bic(const Register& rd,
    690                          const Register& rn,
    691                          const Operand& operand) {
    692   VIXL_ASSERT(allow_macro_instructions_);
    693   LogicalMacro(rd, rn, operand, BIC);
    694 }
    695 
    696 
    697 void MacroAssembler::Bics(const Register& rd,
    698                           const Register& rn,
    699                           const Operand& operand) {
    700   VIXL_ASSERT(allow_macro_instructions_);
    701   LogicalMacro(rd, rn, operand, BICS);
    702 }
    703 
    704 
    705 void MacroAssembler::Orr(const Register& rd,
    706                          const Register& rn,
    707                          const Operand& operand) {
    708   VIXL_ASSERT(allow_macro_instructions_);
    709   LogicalMacro(rd, rn, operand, ORR);
    710 }
    711 
    712 
    713 void MacroAssembler::Orn(const Register& rd,
    714                          const Register& rn,
    715                          const Operand& operand) {
    716   VIXL_ASSERT(allow_macro_instructions_);
    717   LogicalMacro(rd, rn, operand, ORN);
    718 }
    719 
    720 
    721 void MacroAssembler::Eor(const Register& rd,
    722                          const Register& rn,
    723                          const Operand& operand) {
    724   VIXL_ASSERT(allow_macro_instructions_);
    725   LogicalMacro(rd, rn, operand, EOR);
    726 }
    727 
    728 
    729 void MacroAssembler::Eon(const Register& rd,
    730                          const Register& rn,
    731                          const Operand& operand) {
    732   VIXL_ASSERT(allow_macro_instructions_);
    733   LogicalMacro(rd, rn, operand, EON);
    734 }
    735 
    736 
    737 void MacroAssembler::LogicalMacro(const Register& rd,
    738                                   const Register& rn,
    739                                   const Operand& operand,
    740                                   LogicalOp op) {
    741   // The worst case for size is logical immediate to sp:
    742   //  * up to 4 instructions to materialise the constant
    743   //  * 1 instruction to do the operation
    744   //  * 1 instruction to move to sp
    745   MacroEmissionCheckScope guard(this);
    746   UseScratchRegisterScope temps(this);
    747   // Use `rd` as a temp, if we can.
    748   temps.Include(rd);
    749   // We read `rn` after evaluating `operand`.
    750   temps.Exclude(rn);
    751   // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
    752   // because we don't need it after it is evaluated.
    753 
    754   if (operand.IsImmediate()) {
    755     uint64_t immediate = operand.GetImmediate();
    756     unsigned reg_size = rd.GetSizeInBits();
    757 
    758     // If the operation is NOT, invert the operation and immediate.
    759     if ((op & NOT) == NOT) {
    760       op = static_cast<LogicalOp>(op & ~NOT);
    761       immediate = ~immediate;
    762     }
    763 
    764     // Ignore the top 32 bits of an immediate if we're moving to a W register.
    765     if (rd.Is32Bits()) {
    766       // Check that the top 32 bits are consistent.
    767       VIXL_ASSERT(((immediate >> kWRegSize) == 0) ||
    768                   ((immediate >> kWRegSize) == 0xffffffff));
    769       immediate &= kWRegMask;
    770     }
    771 
    772     VIXL_ASSERT(rd.Is64Bits() || IsUint32(immediate));
    773 
    774     // Special cases for all set or all clear immediates.
    775     if (immediate == 0) {
    776       switch (op) {
    777         case AND:
    778           Mov(rd, 0);
    779           return;
    780         case ORR:
    781           VIXL_FALLTHROUGH();
    782         case EOR:
    783           Mov(rd, rn);
    784           return;
    785         case ANDS:
    786           VIXL_FALLTHROUGH();
    787         case BICS:
    788           break;
    789         default:
    790           VIXL_UNREACHABLE();
    791       }
    792     } else if ((rd.Is64Bits() && (immediate == UINT64_C(0xffffffffffffffff))) ||
    793                (rd.Is32Bits() && (immediate == UINT64_C(0x00000000ffffffff)))) {
    794       switch (op) {
    795         case AND:
    796           Mov(rd, rn);
    797           return;
    798         case ORR:
    799           Mov(rd, immediate);
    800           return;
    801         case EOR:
    802           Mvn(rd, rn);
    803           return;
    804         case ANDS:
    805           VIXL_FALLTHROUGH();
    806         case BICS:
    807           break;
    808         default:
    809           VIXL_UNREACHABLE();
    810       }
    811     }
    812 
    813     unsigned n, imm_s, imm_r;
    814     if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
    815       // Immediate can be encoded in the instruction.
    816       LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
    817     } else {
    818       // Immediate can't be encoded: synthesize using move immediate.
    819       Register temp = temps.AcquireSameSizeAs(rn);
    820       VIXL_ASSERT(!temp.Aliases(rn));
    821 
    822       // If the left-hand input is the stack pointer, we can't pre-shift the
    823       // immediate, as the encoding won't allow the subsequent post shift.
    824       PreShiftImmMode mode = rn.IsSP() ? kNoShift : kAnyShift;
    825       Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode);
    826 
    827       if (rd.Is(sp) || rd.Is(wsp)) {
    828         // If rd is the stack pointer we cannot use it as the destination
    829         // register so we use the temp register as an intermediate again.
    830         Logical(temp, rn, imm_operand, op);
    831         Mov(rd, temp);
    832       } else {
    833         Logical(rd, rn, imm_operand, op);
    834       }
    835     }
    836   } else if (operand.IsExtendedRegister()) {
    837     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
    838     // Add/sub extended supports shift <= 4. We want to support exactly the
    839     // same modes here.
    840     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
    841     VIXL_ASSERT(
    842         operand.GetRegister().Is64Bits() ||
    843         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
    844 
    845     Register temp = temps.AcquireSameSizeAs(rn);
    846     VIXL_ASSERT(!temp.Aliases(rn));
    847     EmitExtendShift(temp,
    848                     operand.GetRegister(),
    849                     operand.GetExtend(),
    850                     operand.GetShiftAmount());
    851     Logical(rd, rn, Operand(temp), op);
    852   } else {
    853     // The operand can be encoded in the instruction.
    854     VIXL_ASSERT(operand.IsShiftedRegister());
    855     Logical(rd, rn, operand, op);
    856   }
    857 }
    858 
    859 
    860 void MacroAssembler::Mov(const Register& rd,
    861                          const Operand& operand,
    862                          DiscardMoveMode discard_mode) {
    863   VIXL_ASSERT(allow_macro_instructions_);
    864   // The worst case for size is mov immediate with up to 4 instructions.
    865   MacroEmissionCheckScope guard(this);
    866 
    867   if (operand.IsImmediate()) {
    868     // Call the macro assembler for generic immediates.
    869     Mov(rd, operand.GetImmediate());
    870   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
    871     // Emit a shift instruction if moving a shifted register. This operation
    872     // could also be achieved using an orr instruction (like orn used by Mvn),
    873     // but using a shift instruction makes the disassembly clearer.
    874     EmitShift(rd,
    875               operand.GetRegister(),
    876               operand.GetShift(),
    877               operand.GetShiftAmount());
    878   } else if (operand.IsExtendedRegister()) {
    879     // Emit an extend instruction if moving an extended register. This handles
    880     // extend with post-shift operations, too.
    881     EmitExtendShift(rd,
    882                     operand.GetRegister(),
    883                     operand.GetExtend(),
    884                     operand.GetShiftAmount());
    885   } else {
    886     Mov(rd, operand.GetRegister(), discard_mode);
    887   }
    888 }
    889 
    890 
    891 void MacroAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
    892   VIXL_ASSERT(IsUint16(imm));
    893   int byte1 = (imm & 0xff);
    894   int byte2 = ((imm >> 8) & 0xff);
    895   if (byte1 == byte2) {
    896     movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
    897   } else if (byte1 == 0) {
    898     movi(vd, byte2, LSL, 8);
    899   } else if (byte2 == 0) {
    900     movi(vd, byte1);
    901   } else if (byte1 == 0xff) {
    902     mvni(vd, ~byte2 & 0xff, LSL, 8);
    903   } else if (byte2 == 0xff) {
    904     mvni(vd, ~byte1 & 0xff);
    905   } else {
    906     UseScratchRegisterScope temps(this);
    907     Register temp = temps.AcquireW();
    908     movz(temp, imm);
    909     dup(vd, temp);
    910   }
    911 }
    912 
    913 
    914 void MacroAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
    915   VIXL_ASSERT(IsUint32(imm));
    916 
    917   uint8_t bytes[sizeof(imm)];
    918   memcpy(bytes, &imm, sizeof(imm));
    919 
    920   // All bytes are either 0x00 or 0xff.
    921   {
    922     bool all0orff = true;
    923     for (int i = 0; i < 4; ++i) {
    924       if ((bytes[i] != 0) && (bytes[i] != 0xff)) {
    925         all0orff = false;
    926         break;
    927       }
    928     }
    929 
    930     if (all0orff == true) {
    931       movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
    932       return;
    933     }
    934   }
    935 
    936   // Of the 4 bytes, only one byte is non-zero.
    937   for (int i = 0; i < 4; i++) {
    938     if ((imm & (0xff << (i * 8))) == imm) {
    939       movi(vd, bytes[i], LSL, i * 8);
    940       return;
    941     }
    942   }
    943 
    944   // Of the 4 bytes, only one byte is not 0xff.
    945   for (int i = 0; i < 4; i++) {
    946     uint32_t mask = ~(0xff << (i * 8));
    947     if ((imm & mask) == mask) {
    948       mvni(vd, ~bytes[i] & 0xff, LSL, i * 8);
    949       return;
    950     }
    951   }
    952 
    953   // Immediate is of the form 0x00MMFFFF.
    954   if ((imm & 0xff00ffff) == 0x0000ffff) {
    955     movi(vd, bytes[2], MSL, 16);
    956     return;
    957   }
    958 
    959   // Immediate is of the form 0x0000MMFF.
    960   if ((imm & 0xffff00ff) == 0x000000ff) {
    961     movi(vd, bytes[1], MSL, 8);
    962     return;
    963   }
    964 
    965   // Immediate is of the form 0xFFMM0000.
    966   if ((imm & 0xff00ffff) == 0xff000000) {
    967     mvni(vd, ~bytes[2] & 0xff, MSL, 16);
    968     return;
    969   }
    970   // Immediate is of the form 0xFFFFMM00.
    971   if ((imm & 0xffff00ff) == 0xffff0000) {
    972     mvni(vd, ~bytes[1] & 0xff, MSL, 8);
    973     return;
    974   }
    975 
    976   // Top and bottom 16-bits are equal.
    977   if (((imm >> 16) & 0xffff) == (imm & 0xffff)) {
    978     Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xffff);
    979     return;
    980   }
    981 
    982   // Default case.
    983   {
    984     UseScratchRegisterScope temps(this);
    985     Register temp = temps.AcquireW();
    986     Mov(temp, imm);
    987     dup(vd, temp);
    988   }
    989 }
    990 
    991 
    992 void MacroAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
    993   // All bytes are either 0x00 or 0xff.
    994   {
    995     bool all0orff = true;
    996     for (int i = 0; i < 8; ++i) {
    997       int byteval = (imm >> (i * 8)) & 0xff;
    998       if (byteval != 0 && byteval != 0xff) {
    999         all0orff = false;
   1000         break;
   1001       }
   1002     }
   1003     if (all0orff == true) {
   1004       movi(vd, imm);
   1005       return;
   1006     }
   1007   }
   1008 
   1009   // Top and bottom 32-bits are equal.
   1010   if (((imm >> 32) & 0xffffffff) == (imm & 0xffffffff)) {
   1011     Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xffffffff);
   1012     return;
   1013   }
   1014 
   1015   // Default case.
   1016   {
   1017     UseScratchRegisterScope temps(this);
   1018     Register temp = temps.AcquireX();
   1019     Mov(temp, imm);
   1020     if (vd.Is1D()) {
   1021       fmov(vd.D(), temp);
   1022     } else {
   1023       dup(vd.V2D(), temp);
   1024     }
   1025   }
   1026 }
   1027 
   1028 
   1029 void MacroAssembler::Movi(const VRegister& vd,
   1030                           uint64_t imm,
   1031                           Shift shift,
   1032                           int shift_amount) {
   1033   VIXL_ASSERT(allow_macro_instructions_);
   1034   MacroEmissionCheckScope guard(this);
   1035   if (shift_amount != 0 || shift != LSL) {
   1036     movi(vd, imm, shift, shift_amount);
   1037   } else if (vd.Is8B() || vd.Is16B()) {
   1038     // 8-bit immediate.
   1039     VIXL_ASSERT(IsUint8(imm));
   1040     movi(vd, imm);
   1041   } else if (vd.Is4H() || vd.Is8H()) {
   1042     // 16-bit immediate.
   1043     Movi16bitHelper(vd, imm);
   1044   } else if (vd.Is2S() || vd.Is4S()) {
   1045     // 32-bit immediate.
   1046     Movi32bitHelper(vd, imm);
   1047   } else {
   1048     // 64-bit immediate.
   1049     Movi64bitHelper(vd, imm);
   1050   }
   1051 }
   1052 
   1053 
   1054 void MacroAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
   1055   // TODO: Move 128-bit values in a more efficient way.
   1056   VIXL_ASSERT(vd.Is128Bits());
   1057   if (hi == lo) {
   1058     Movi(vd.V2D(), lo);
   1059     return;
   1060   }
   1061 
   1062   Movi(vd.V1D(), lo);
   1063 
   1064   if (hi != 0) {
   1065     UseScratchRegisterScope temps(this);
   1066     // TODO: Figure out if using a temporary V register to materialise the
   1067     // immediate is better.
   1068     Register temp = temps.AcquireX();
   1069     Mov(temp, hi);
   1070     Ins(vd.V2D(), 1, temp);
   1071   }
   1072 }
   1073 
   1074 
   1075 void MacroAssembler::Mvn(const Register& rd, const Operand& operand) {
   1076   VIXL_ASSERT(allow_macro_instructions_);
   1077   // The worst case for size is mvn immediate with up to 4 instructions.
   1078   MacroEmissionCheckScope guard(this);
   1079 
   1080   if (operand.IsImmediate()) {
   1081     // Call the macro assembler for generic immediates.
   1082     Mvn(rd, operand.GetImmediate());
   1083   } else if (operand.IsExtendedRegister()) {
   1084     // Emit two instructions for the extend case. This differs from Mov, as
   1085     // the extend and invert can't be achieved in one instruction.
   1086     EmitExtendShift(rd,
   1087                     operand.GetRegister(),
   1088                     operand.GetExtend(),
   1089                     operand.GetShiftAmount());
   1090     mvn(rd, rd);
   1091   } else {
   1092     // Otherwise, register and shifted register cases can be handled by the
   1093     // assembler directly, using orn.
   1094     mvn(rd, operand);
   1095   }
   1096 }
   1097 
   1098 
   1099 void MacroAssembler::Mov(const Register& rd, uint64_t imm) {
   1100   VIXL_ASSERT(allow_macro_instructions_);
   1101   MoveImmediateHelper(this, rd, imm);
   1102 }
   1103 
   1104 
   1105 void MacroAssembler::Ccmp(const Register& rn,
   1106                           const Operand& operand,
   1107                           StatusFlags nzcv,
   1108                           Condition cond) {
   1109   VIXL_ASSERT(allow_macro_instructions_);
   1110   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
   1111     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMN);
   1112   } else {
   1113     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMP);
   1114   }
   1115 }
   1116 
   1117 
   1118 void MacroAssembler::Ccmn(const Register& rn,
   1119                           const Operand& operand,
   1120                           StatusFlags nzcv,
   1121                           Condition cond) {
   1122   VIXL_ASSERT(allow_macro_instructions_);
   1123   if (operand.IsImmediate() && (operand.GetImmediate() < 0)) {
   1124     ConditionalCompareMacro(rn, -operand.GetImmediate(), nzcv, cond, CCMP);
   1125   } else {
   1126     ConditionalCompareMacro(rn, operand, nzcv, cond, CCMN);
   1127   }
   1128 }
   1129 
   1130 
   1131 void MacroAssembler::ConditionalCompareMacro(const Register& rn,
   1132                                              const Operand& operand,
   1133                                              StatusFlags nzcv,
   1134                                              Condition cond,
   1135                                              ConditionalCompareOp op) {
   1136   VIXL_ASSERT((cond != al) && (cond != nv));
   1137   // The worst case for size is ccmp immediate:
   1138   //  * up to 4 instructions to materialise the constant
   1139   //  * 1 instruction for ccmp
   1140   MacroEmissionCheckScope guard(this);
   1141 
   1142   if ((operand.IsShiftedRegister() && (operand.GetShiftAmount() == 0)) ||
   1143       (operand.IsImmediate() &&
   1144        IsImmConditionalCompare(operand.GetImmediate()))) {
   1145     // The immediate can be encoded in the instruction, or the operand is an
   1146     // unshifted register: call the assembler.
   1147     ConditionalCompare(rn, operand, nzcv, cond, op);
   1148   } else {
   1149     UseScratchRegisterScope temps(this);
   1150     // The operand isn't directly supported by the instruction: perform the
   1151     // operation on a temporary register.
   1152     Register temp = temps.AcquireSameSizeAs(rn);
   1153     Mov(temp, operand);
   1154     ConditionalCompare(rn, temp, nzcv, cond, op);
   1155   }
   1156 }
   1157 
   1158 
   1159 void MacroAssembler::CselHelper(MacroAssembler* masm,
   1160                                 const Register& rd,
   1161                                 Operand left,
   1162                                 Operand right,
   1163                                 Condition cond,
   1164                                 bool* should_synthesise_left,
   1165                                 bool* should_synthesise_right) {
   1166   bool emit_code = (masm != NULL);
   1167 
   1168   VIXL_ASSERT(!emit_code || masm->allow_macro_instructions_);
   1169   VIXL_ASSERT((cond != al) && (cond != nv));
   1170   VIXL_ASSERT(!rd.IsZero() && !rd.IsSP());
   1171   VIXL_ASSERT(left.IsImmediate() || !left.GetRegister().IsSP());
   1172   VIXL_ASSERT(right.IsImmediate() || !right.GetRegister().IsSP());
   1173 
   1174   if (should_synthesise_left != NULL) *should_synthesise_left = false;
   1175   if (should_synthesise_right != NULL) *should_synthesise_right = false;
   1176 
   1177   // The worst case for size occurs when the inputs are two non encodable
   1178   // constants:
   1179   //  * up to 4 instructions to materialise the left constant
   1180   //  * up to 4 instructions to materialise the right constant
   1181   //  * 1 instruction for csel
   1182   EmissionCheckScope guard(masm, 9 * kInstructionSize);
   1183   UseScratchRegisterScope temps;
   1184   if (masm != NULL) {
   1185     temps.Open(masm);
   1186   }
   1187 
   1188   // Try to handle cases where both inputs are immediates.
   1189   bool left_is_immediate = left.IsImmediate() || left.IsZero();
   1190   bool right_is_immediate = right.IsImmediate() || right.IsZero();
   1191   if (left_is_immediate && right_is_immediate &&
   1192       CselSubHelperTwoImmediates(masm,
   1193                                  rd,
   1194                                  left.GetEquivalentImmediate(),
   1195                                  right.GetEquivalentImmediate(),
   1196                                  cond,
   1197                                  should_synthesise_left,
   1198                                  should_synthesise_right)) {
   1199     return;
   1200   }
   1201 
   1202   // Handle cases where one of the two inputs is -1, 0, or 1.
   1203   bool left_is_small_immediate =
   1204       left_is_immediate && ((-1 <= left.GetEquivalentImmediate()) &&
   1205                             (left.GetEquivalentImmediate() <= 1));
   1206   bool right_is_small_immediate =
   1207       right_is_immediate && ((-1 <= right.GetEquivalentImmediate()) &&
   1208                              (right.GetEquivalentImmediate() <= 1));
   1209   if (right_is_small_immediate || left_is_small_immediate) {
   1210     bool swapped_inputs = false;
   1211     if (!right_is_small_immediate) {
   1212       std::swap(left, right);
   1213       cond = InvertCondition(cond);
   1214       swapped_inputs = true;
   1215     }
   1216     CselSubHelperRightSmallImmediate(masm,
   1217                                      &temps,
   1218                                      rd,
   1219                                      left,
   1220                                      right,
   1221                                      cond,
   1222                                      swapped_inputs ? should_synthesise_right
   1223                                                     : should_synthesise_left);
   1224     return;
   1225   }
   1226 
   1227   // Otherwise both inputs need to be available in registers. Synthesise them
   1228   // if necessary and emit the `csel`.
   1229   if (!left.IsPlainRegister()) {
   1230     if (emit_code) {
   1231       Register temp = temps.AcquireSameSizeAs(rd);
   1232       masm->Mov(temp, left);
   1233       left = temp;
   1234     }
   1235     if (should_synthesise_left != NULL) *should_synthesise_left = true;
   1236   }
   1237   if (!right.IsPlainRegister()) {
   1238     if (emit_code) {
   1239       Register temp = temps.AcquireSameSizeAs(rd);
   1240       masm->Mov(temp, right);
   1241       right = temp;
   1242     }
   1243     if (should_synthesise_right != NULL) *should_synthesise_right = true;
   1244   }
   1245   if (emit_code) {
   1246     VIXL_ASSERT(left.IsPlainRegister() && right.IsPlainRegister());
   1247     if (left.GetRegister().Is(right.GetRegister())) {
   1248       masm->Mov(rd, left.GetRegister());
   1249     } else {
   1250       masm->csel(rd, left.GetRegister(), right.GetRegister(), cond);
   1251     }
   1252   }
   1253 }
   1254 
   1255 
   1256 bool MacroAssembler::CselSubHelperTwoImmediates(MacroAssembler* masm,
   1257                                                 const Register& rd,
   1258                                                 int64_t left,
   1259                                                 int64_t right,
   1260                                                 Condition cond,
   1261                                                 bool* should_synthesise_left,
   1262                                                 bool* should_synthesise_right) {
   1263   bool emit_code = (masm != NULL);
   1264   if (should_synthesise_left != NULL) *should_synthesise_left = false;
   1265   if (should_synthesise_right != NULL) *should_synthesise_right = false;
   1266 
   1267   if (left == right) {
   1268     if (emit_code) masm->Mov(rd, left);
   1269     return true;
   1270   } else if (left == -right) {
   1271     if (should_synthesise_right != NULL) *should_synthesise_right = true;
   1272     if (emit_code) {
   1273       masm->Mov(rd, right);
   1274       masm->Cneg(rd, rd, cond);
   1275     }
   1276     return true;
   1277   }
   1278 
   1279   if (CselSubHelperTwoOrderedImmediates(masm, rd, left, right, cond)) {
   1280     return true;
   1281   } else {
   1282     std::swap(left, right);
   1283     if (CselSubHelperTwoOrderedImmediates(masm,
   1284                                           rd,
   1285                                           left,
   1286                                           right,
   1287                                           InvertCondition(cond))) {
   1288       return true;
   1289     }
   1290   }
   1291 
   1292   // TODO: Handle more situations. For example handle `csel rd, #5, #6, cond`
   1293   // with `cinc`.
   1294   return false;
   1295 }
   1296 
   1297 
   1298 bool MacroAssembler::CselSubHelperTwoOrderedImmediates(MacroAssembler* masm,
   1299                                                        const Register& rd,
   1300                                                        int64_t left,
   1301                                                        int64_t right,
   1302                                                        Condition cond) {
   1303   bool emit_code = (masm != NULL);
   1304 
   1305   if ((left == 1) && (right == 0)) {
   1306     if (emit_code) masm->cset(rd, cond);
   1307     return true;
   1308   } else if ((left == -1) && (right == 0)) {
   1309     if (emit_code) masm->csetm(rd, cond);
   1310     return true;
   1311   }
   1312   return false;
   1313 }
   1314 
   1315 
   1316 void MacroAssembler::CselSubHelperRightSmallImmediate(
   1317     MacroAssembler* masm,
   1318     UseScratchRegisterScope* temps,
   1319     const Register& rd,
   1320     const Operand& left,
   1321     const Operand& right,
   1322     Condition cond,
   1323     bool* should_synthesise_left) {
   1324   bool emit_code = (masm != NULL);
   1325   VIXL_ASSERT((right.IsImmediate() || right.IsZero()) &&
   1326               (-1 <= right.GetEquivalentImmediate()) &&
   1327               (right.GetEquivalentImmediate() <= 1));
   1328   Register left_register;
   1329 
   1330   if (left.IsPlainRegister()) {
   1331     left_register = left.GetRegister();
   1332   } else {
   1333     if (emit_code) {
   1334       left_register = temps->AcquireSameSizeAs(rd);
   1335       masm->Mov(left_register, left);
   1336     }
   1337     if (should_synthesise_left != NULL) *should_synthesise_left = true;
   1338   }
   1339   if (emit_code) {
   1340     int64_t imm = right.GetEquivalentImmediate();
   1341     Register zr = AppropriateZeroRegFor(rd);
   1342     if (imm == 0) {
   1343       masm->csel(rd, left_register, zr, cond);
   1344     } else if (imm == 1) {
   1345       masm->csinc(rd, left_register, zr, cond);
   1346     } else {
   1347       VIXL_ASSERT(imm == -1);
   1348       masm->csinv(rd, left_register, zr, cond);
   1349     }
   1350   }
   1351 }
   1352 
   1353 
   1354 void MacroAssembler::Add(const Register& rd,
   1355                          const Register& rn,
   1356                          const Operand& operand,
   1357                          FlagsUpdate S) {
   1358   VIXL_ASSERT(allow_macro_instructions_);
   1359   if (operand.IsImmediate()) {
   1360     int64_t imm = operand.GetImmediate();
   1361     if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
   1362         IsImmAddSub(-imm)) {
   1363       AddSubMacro(rd, rn, -imm, S, SUB);
   1364       return;
   1365     }
   1366   }
   1367   AddSubMacro(rd, rn, operand, S, ADD);
   1368 }
   1369 
   1370 
   1371 void MacroAssembler::Adds(const Register& rd,
   1372                           const Register& rn,
   1373                           const Operand& operand) {
   1374   Add(rd, rn, operand, SetFlags);
   1375 }
   1376 
   1377 #define MINMAX(V)        \
   1378   V(Smax, smax, IsInt8)  \
   1379   V(Smin, smin, IsInt8)  \
   1380   V(Umax, umax, IsUint8) \
   1381   V(Umin, umin, IsUint8)
   1382 
   1383 #define VIXL_DEFINE_MASM_FUNC(MASM, ASM, RANGE)      \
   1384   void MacroAssembler::MASM(const Register& rd,      \
   1385                             const Register& rn,      \
   1386                             const Operand& op) {     \
   1387     VIXL_ASSERT(allow_macro_instructions_);          \
   1388     if (op.IsImmediate()) {                          \
   1389       int64_t imm = op.GetImmediate();               \
   1390       if (!RANGE(imm)) {                             \
   1391         UseScratchRegisterScope temps(this);         \
   1392         Register temp = temps.AcquireSameSizeAs(rd); \
   1393         Mov(temp, imm);                              \
   1394         MASM(rd, rn, temp);                          \
   1395         return;                                      \
   1396       }                                              \
   1397     }                                                \
   1398     SingleEmissionCheckScope guard(this);            \
   1399     ASM(rd, rn, op);                                 \
   1400   }
   1401 MINMAX(VIXL_DEFINE_MASM_FUNC)
   1402 #undef VIXL_DEFINE_MASM_FUNC
   1403 
   1404 void MacroAssembler::St2g(const Register& rt, const MemOperand& addr) {
   1405   VIXL_ASSERT(allow_macro_instructions_);
   1406   SingleEmissionCheckScope guard(this);
   1407   st2g(rt, addr);
   1408 }
   1409 
   1410 void MacroAssembler::Stg(const Register& rt, const MemOperand& addr) {
   1411   VIXL_ASSERT(allow_macro_instructions_);
   1412   SingleEmissionCheckScope guard(this);
   1413   stg(rt, addr);
   1414 }
   1415 
   1416 void MacroAssembler::Stgp(const Register& rt1,
   1417                           const Register& rt2,
   1418                           const MemOperand& addr) {
   1419   VIXL_ASSERT(allow_macro_instructions_);
   1420   SingleEmissionCheckScope guard(this);
   1421   stgp(rt1, rt2, addr);
   1422 }
   1423 
   1424 void MacroAssembler::Stz2g(const Register& rt, const MemOperand& addr) {
   1425   VIXL_ASSERT(allow_macro_instructions_);
   1426   SingleEmissionCheckScope guard(this);
   1427   stz2g(rt, addr);
   1428 }
   1429 
   1430 void MacroAssembler::Stzg(const Register& rt, const MemOperand& addr) {
   1431   VIXL_ASSERT(allow_macro_instructions_);
   1432   SingleEmissionCheckScope guard(this);
   1433   stzg(rt, addr);
   1434 }
   1435 
   1436 void MacroAssembler::Ldg(const Register& rt, const MemOperand& addr) {
   1437   VIXL_ASSERT(allow_macro_instructions_);
   1438   SingleEmissionCheckScope guard(this);
   1439   ldg(rt, addr);
   1440 }
   1441 
   1442 void MacroAssembler::Sub(const Register& rd,
   1443                          const Register& rn,
   1444                          const Operand& operand,
   1445                          FlagsUpdate S) {
   1446   VIXL_ASSERT(allow_macro_instructions_);
   1447   if (operand.IsImmediate()) {
   1448     int64_t imm = operand.GetImmediate();
   1449     if ((imm < 0) && (imm != std::numeric_limits<int64_t>::min()) &&
   1450         IsImmAddSub(-imm)) {
   1451       AddSubMacro(rd, rn, -imm, S, ADD);
   1452       return;
   1453     }
   1454   }
   1455   AddSubMacro(rd, rn, operand, S, SUB);
   1456 }
   1457 
   1458 
   1459 void MacroAssembler::Subs(const Register& rd,
   1460                           const Register& rn,
   1461                           const Operand& operand) {
   1462   Sub(rd, rn, operand, SetFlags);
   1463 }
   1464 
   1465 
   1466 void MacroAssembler::Cmn(const Register& rn, const Operand& operand) {
   1467   VIXL_ASSERT(allow_macro_instructions_);
   1468   Adds(AppropriateZeroRegFor(rn), rn, operand);
   1469 }
   1470 
   1471 
   1472 void MacroAssembler::Cmp(const Register& rn, const Operand& operand) {
   1473   VIXL_ASSERT(allow_macro_instructions_);
   1474   Subs(AppropriateZeroRegFor(rn), rn, operand);
   1475 }
   1476 
   1477 
   1478 void MacroAssembler::Fcmp(const VRegister& fn, double value, FPTrapFlags trap) {
   1479   VIXL_ASSERT(allow_macro_instructions_);
   1480   // The worst case for size is:
   1481   //  * 1 to materialise the constant, using literal pool if necessary
   1482   //  * 1 instruction for fcmp{e}
   1483   MacroEmissionCheckScope guard(this);
   1484   if (value != 0.0) {
   1485     UseScratchRegisterScope temps(this);
   1486     VRegister tmp = temps.AcquireSameSizeAs(fn);
   1487     Fmov(tmp, value);
   1488     FPCompareMacro(fn, tmp, trap);
   1489   } else {
   1490     FPCompareMacro(fn, value, trap);
   1491   }
   1492 }
   1493 
   1494 
   1495 void MacroAssembler::Fcmpe(const VRegister& fn, double value) {
   1496   Fcmp(fn, value, EnableTrap);
   1497 }
   1498 
   1499 
   1500 void MacroAssembler::Fmov(VRegister vd, double imm) {
   1501   VIXL_ASSERT(allow_macro_instructions_);
   1502   // Floating point immediates are loaded through the literal pool.
   1503   MacroEmissionCheckScope guard(this);
   1504   uint64_t rawbits = DoubleToRawbits(imm);
   1505 
   1506   if (rawbits == 0) {
   1507     fmov(vd.D(), xzr);
   1508     return;
   1509   }
   1510 
   1511   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
   1512     Fmov(vd, Float16(imm));
   1513     return;
   1514   }
   1515 
   1516   if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
   1517     Fmov(vd, static_cast<float>(imm));
   1518     return;
   1519   }
   1520 
   1521   VIXL_ASSERT(vd.Is1D() || vd.Is2D());
   1522   if (IsImmFP64(rawbits)) {
   1523     fmov(vd, imm);
   1524   } else if (vd.IsScalar()) {
   1525     ldr(vd,
   1526         new Literal<double>(imm,
   1527                             &literal_pool_,
   1528                             RawLiteral::kDeletedOnPlacementByPool));
   1529   } else {
   1530     // TODO: consider NEON support for load literal.
   1531     Movi(vd, rawbits);
   1532   }
   1533 }
   1534 
   1535 
   1536 void MacroAssembler::Fmov(VRegister vd, float imm) {
   1537   VIXL_ASSERT(allow_macro_instructions_);
   1538   // Floating point immediates are loaded through the literal pool.
   1539   MacroEmissionCheckScope guard(this);
   1540   uint32_t rawbits = FloatToRawbits(imm);
   1541 
   1542   if (rawbits == 0) {
   1543     fmov(vd.S(), wzr);
   1544     return;
   1545   }
   1546 
   1547   if (vd.Is1H() || vd.Is4H() || vd.Is8H()) {
   1548     Fmov(vd, Float16(imm));
   1549     return;
   1550   }
   1551 
   1552   if (vd.Is1D() || vd.Is2D()) {
   1553     Fmov(vd, static_cast<double>(imm));
   1554     return;
   1555   }
   1556 
   1557   VIXL_ASSERT(vd.Is1S() || vd.Is2S() || vd.Is4S());
   1558   if (IsImmFP32(rawbits)) {
   1559     fmov(vd, imm);
   1560   } else if (vd.IsScalar()) {
   1561     ldr(vd,
   1562         new Literal<float>(imm,
   1563                            &literal_pool_,
   1564                            RawLiteral::kDeletedOnPlacementByPool));
   1565   } else {
   1566     // TODO: consider NEON support for load literal.
   1567     Movi(vd, rawbits);
   1568   }
   1569 }
   1570 
   1571 
   1572 void MacroAssembler::Fmov(VRegister vd, Float16 imm) {
   1573   VIXL_ASSERT(allow_macro_instructions_);
   1574   MacroEmissionCheckScope guard(this);
   1575 
   1576   if (vd.Is1S() || vd.Is2S() || vd.Is4S()) {
   1577     Fmov(vd, FPToFloat(imm, kIgnoreDefaultNaN));
   1578     return;
   1579   }
   1580 
   1581   if (vd.Is1D() || vd.Is2D()) {
   1582     Fmov(vd, FPToDouble(imm, kIgnoreDefaultNaN));
   1583     return;
   1584   }
   1585 
   1586   VIXL_ASSERT(vd.Is1H() || vd.Is4H() || vd.Is8H());
   1587   uint16_t rawbits = Float16ToRawbits(imm);
   1588   if (IsImmFP16(imm)) {
   1589     fmov(vd, imm);
   1590   } else {
   1591     if (vd.IsScalar()) {
   1592       if (rawbits == 0x0) {
   1593         fmov(vd, wzr);
   1594       } else {
   1595         // We can use movz instead of the literal pool.
   1596         UseScratchRegisterScope temps(this);
   1597         Register temp = temps.AcquireW();
   1598         Mov(temp, rawbits);
   1599         Fmov(vd, temp);
   1600       }
   1601     } else {
   1602       // TODO: consider NEON support for load literal.
   1603       Movi(vd, static_cast<uint64_t>(rawbits));
   1604     }
   1605   }
   1606 }
   1607 
   1608 
   1609 void MacroAssembler::Neg(const Register& rd, const Operand& operand) {
   1610   VIXL_ASSERT(allow_macro_instructions_);
   1611   if (operand.IsImmediate()) {
   1612     Mov(rd, -operand.GetImmediate());
   1613   } else {
   1614     Sub(rd, AppropriateZeroRegFor(rd), operand);
   1615   }
   1616 }
   1617 
   1618 
   1619 void MacroAssembler::Negs(const Register& rd, const Operand& operand) {
   1620   VIXL_ASSERT(allow_macro_instructions_);
   1621   Subs(rd, AppropriateZeroRegFor(rd), operand);
   1622 }
   1623 
   1624 
   1625 bool MacroAssembler::TryOneInstrMoveImmediate(const Register& dst,
   1626                                               uint64_t imm) {
   1627   return OneInstrMoveImmediateHelper(this, dst, imm);
   1628 }
   1629 
   1630 
   1631 Operand MacroAssembler::MoveImmediateForShiftedOp(const Register& dst,
   1632                                                   uint64_t imm,
   1633                                                   PreShiftImmMode mode) {
   1634   int reg_size = dst.GetSizeInBits();
   1635 
   1636   // Encode the immediate in a single move instruction, if possible.
   1637   if (TryOneInstrMoveImmediate(dst, imm)) {
   1638     // The move was successful; nothing to do here.
   1639   } else {
   1640     // Pre-shift the immediate to the least-significant bits of the register.
   1641     int shift_low = CountTrailingZeros(imm, reg_size);
   1642     if (mode == kLimitShiftForSP) {
   1643       // When applied to the stack pointer, the subsequent arithmetic operation
   1644       // can use the extend form to shift left by a maximum of four bits. Right
   1645       // shifts are not allowed, so we filter them out later before the new
   1646       // immediate is tested.
   1647       shift_low = std::min(shift_low, 4);
   1648     }
   1649     // TryOneInstrMoveImmediate handles `imm` with a value of zero, so shift_low
   1650     // must lie in the range [0, 63], and the shifts below are well-defined.
   1651     VIXL_ASSERT((shift_low >= 0) && (shift_low < 64));
   1652     // imm_low = imm >> shift_low (with sign extension)
   1653     uint64_t imm_low = ExtractSignedBitfield64(63, shift_low, imm);
   1654 
   1655     // Pre-shift the immediate to the most-significant bits of the register,
   1656     // inserting set bits in the least-significant bits.
   1657     int shift_high = CountLeadingZeros(imm, reg_size);
   1658     VIXL_ASSERT((shift_high >= 0) && (shift_high < 64));
   1659     uint64_t imm_high = (imm << shift_high) | GetUintMask(shift_high);
   1660 
   1661     if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) {
   1662       // The new immediate has been moved into the destination's low bits:
   1663       // return a new leftward-shifting operand.
   1664       return Operand(dst, LSL, shift_low);
   1665     } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) {
   1666       // The new immediate has been moved into the destination's high bits:
   1667       // return a new rightward-shifting operand.
   1668       return Operand(dst, LSR, shift_high);
   1669     } else {
   1670       Mov(dst, imm);
   1671     }
   1672   }
   1673   return Operand(dst);
   1674 }
   1675 
   1676 
   1677 void MacroAssembler::Move(const GenericOperand& dst,
   1678                           const GenericOperand& src) {
   1679   if (dst.Equals(src)) {
   1680     return;
   1681   }
   1682 
   1683   VIXL_ASSERT(dst.IsValid() && src.IsValid());
   1684 
   1685   // The sizes of the operands must match exactly.
   1686   VIXL_ASSERT(dst.GetSizeInBits() == src.GetSizeInBits());
   1687   VIXL_ASSERT(dst.GetSizeInBits() <= kXRegSize);
   1688   int operand_size = static_cast<int>(dst.GetSizeInBits());
   1689 
   1690   if (dst.IsCPURegister() && src.IsCPURegister()) {
   1691     CPURegister dst_reg = dst.GetCPURegister();
   1692     CPURegister src_reg = src.GetCPURegister();
   1693     if (dst_reg.IsRegister() && src_reg.IsRegister()) {
   1694       Mov(Register(dst_reg), Register(src_reg));
   1695     } else if (dst_reg.IsVRegister() && src_reg.IsVRegister()) {
   1696       Fmov(VRegister(dst_reg), VRegister(src_reg));
   1697     } else {
   1698       if (dst_reg.IsRegister()) {
   1699         Fmov(Register(dst_reg), VRegister(src_reg));
   1700       } else {
   1701         Fmov(VRegister(dst_reg), Register(src_reg));
   1702       }
   1703     }
   1704     return;
   1705   }
   1706 
   1707   if (dst.IsMemOperand() && src.IsMemOperand()) {
   1708     UseScratchRegisterScope temps(this);
   1709     CPURegister temp = temps.AcquireCPURegisterOfSize(operand_size);
   1710     Ldr(temp, src.GetMemOperand());
   1711     Str(temp, dst.GetMemOperand());
   1712     return;
   1713   }
   1714 
   1715   if (dst.IsCPURegister()) {
   1716     Ldr(dst.GetCPURegister(), src.GetMemOperand());
   1717   } else {
   1718     Str(src.GetCPURegister(), dst.GetMemOperand());
   1719   }
   1720 }
   1721 
   1722 
   1723 void MacroAssembler::ComputeAddress(const Register& dst,
   1724                                     const MemOperand& mem_op) {
   1725   // We cannot handle pre-indexing or post-indexing.
   1726   VIXL_ASSERT(mem_op.GetAddrMode() == Offset);
   1727   Register base = mem_op.GetBaseRegister();
   1728   if (mem_op.IsImmediateOffset()) {
   1729     Add(dst, base, mem_op.GetOffset());
   1730   } else {
   1731     VIXL_ASSERT(mem_op.IsRegisterOffset());
   1732     Register reg_offset = mem_op.GetRegisterOffset();
   1733     Shift shift = mem_op.GetShift();
   1734     Extend extend = mem_op.GetExtend();
   1735     if (shift == NO_SHIFT) {
   1736       VIXL_ASSERT(extend != NO_EXTEND);
   1737       Add(dst, base, Operand(reg_offset, extend, mem_op.GetShiftAmount()));
   1738     } else {
   1739       VIXL_ASSERT(extend == NO_EXTEND);
   1740       Add(dst, base, Operand(reg_offset, shift, mem_op.GetShiftAmount()));
   1741     }
   1742   }
   1743 }
   1744 
   1745 
   1746 void MacroAssembler::AddSubMacro(const Register& rd,
   1747                                  const Register& rn,
   1748                                  const Operand& operand,
   1749                                  FlagsUpdate S,
   1750                                  AddSubOp op) {
   1751   // Worst case is add/sub immediate:
   1752   //  * up to 4 instructions to materialise the constant
   1753   //  * 1 instruction for add/sub
   1754   MacroEmissionCheckScope guard(this);
   1755 
   1756   if (operand.IsZero() && rd.Is(rn) && rd.Is64Bits() && rn.Is64Bits() &&
   1757       (S == LeaveFlags)) {
   1758     // The instruction would be a nop. Avoid generating useless code.
   1759     return;
   1760   }
   1761 
   1762   if ((operand.IsImmediate() && !IsImmAddSub(operand.GetImmediate())) ||
   1763       (rn.IsZero() && !operand.IsShiftedRegister()) ||
   1764       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
   1765     UseScratchRegisterScope temps(this);
   1766     // Use `rd` as a temp, if we can.
   1767     temps.Include(rd);
   1768     // We read `rn` after evaluating `operand`.
   1769     temps.Exclude(rn);
   1770     // It doesn't matter if `operand` is in `temps` (e.g. because it alises
   1771     // `rd`) because we don't need it after it is evaluated.
   1772     Register temp = temps.AcquireSameSizeAs(rn);
   1773     if (operand.IsImmediate()) {
   1774       PreShiftImmMode mode = kAnyShift;
   1775 
   1776       // If the destination or source register is the stack pointer, we can
   1777       // only pre-shift the immediate right by values supported in the add/sub
   1778       // extend encoding.
   1779       if (rd.IsSP()) {
   1780         // If the destination is SP and flags will be set, we can't pre-shift
   1781         // the immediate at all.
   1782         mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP;
   1783       } else if (rn.IsSP()) {
   1784         mode = kLimitShiftForSP;
   1785       }
   1786 
   1787       Operand imm_operand =
   1788           MoveImmediateForShiftedOp(temp, operand.GetImmediate(), mode);
   1789       AddSub(rd, rn, imm_operand, S, op);
   1790     } else {
   1791       Mov(temp, operand);
   1792       AddSub(rd, rn, temp, S, op);
   1793     }
   1794   } else {
   1795     AddSub(rd, rn, operand, S, op);
   1796   }
   1797 }
   1798 
   1799 
   1800 void MacroAssembler::Adc(const Register& rd,
   1801                          const Register& rn,
   1802                          const Operand& operand) {
   1803   VIXL_ASSERT(allow_macro_instructions_);
   1804   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, ADC);
   1805 }
   1806 
   1807 
   1808 void MacroAssembler::Adcs(const Register& rd,
   1809                           const Register& rn,
   1810                           const Operand& operand) {
   1811   VIXL_ASSERT(allow_macro_instructions_);
   1812   AddSubWithCarryMacro(rd, rn, operand, SetFlags, ADC);
   1813 }
   1814 
   1815 
   1816 void MacroAssembler::Sbc(const Register& rd,
   1817                          const Register& rn,
   1818                          const Operand& operand) {
   1819   VIXL_ASSERT(allow_macro_instructions_);
   1820   AddSubWithCarryMacro(rd, rn, operand, LeaveFlags, SBC);
   1821 }
   1822 
   1823 
   1824 void MacroAssembler::Sbcs(const Register& rd,
   1825                           const Register& rn,
   1826                           const Operand& operand) {
   1827   VIXL_ASSERT(allow_macro_instructions_);
   1828   AddSubWithCarryMacro(rd, rn, operand, SetFlags, SBC);
   1829 }
   1830 
   1831 
   1832 void MacroAssembler::Ngc(const Register& rd, const Operand& operand) {
   1833   VIXL_ASSERT(allow_macro_instructions_);
   1834   Register zr = AppropriateZeroRegFor(rd);
   1835   Sbc(rd, zr, operand);
   1836 }
   1837 
   1838 
   1839 void MacroAssembler::Ngcs(const Register& rd, const Operand& operand) {
   1840   VIXL_ASSERT(allow_macro_instructions_);
   1841   Register zr = AppropriateZeroRegFor(rd);
   1842   Sbcs(rd, zr, operand);
   1843 }
   1844 
   1845 
   1846 void MacroAssembler::AddSubWithCarryMacro(const Register& rd,
   1847                                           const Register& rn,
   1848                                           const Operand& operand,
   1849                                           FlagsUpdate S,
   1850                                           AddSubWithCarryOp op) {
   1851   VIXL_ASSERT(rd.GetSizeInBits() == rn.GetSizeInBits());
   1852   // Worst case is addc/subc immediate:
   1853   //  * up to 4 instructions to materialise the constant
   1854   //  * 1 instruction for add/sub
   1855   MacroEmissionCheckScope guard(this);
   1856   UseScratchRegisterScope temps(this);
   1857   // Use `rd` as a temp, if we can.
   1858   temps.Include(rd);
   1859   // We read `rn` after evaluating `operand`.
   1860   temps.Exclude(rn);
   1861   // It doesn't matter if `operand` is in `temps` (e.g. because it alises `rd`)
   1862   // because we don't need it after it is evaluated.
   1863 
   1864   if (operand.IsImmediate() ||
   1865       (operand.IsShiftedRegister() && (operand.GetShift() == ROR))) {
   1866     // Add/sub with carry (immediate or ROR shifted register.)
   1867     Register temp = temps.AcquireSameSizeAs(rn);
   1868     Mov(temp, operand);
   1869     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   1870   } else if (operand.IsShiftedRegister() && (operand.GetShiftAmount() != 0)) {
   1871     // Add/sub with carry (shifted register).
   1872     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() == rd.GetSizeInBits());
   1873     VIXL_ASSERT(operand.GetShift() != ROR);
   1874     VIXL_ASSERT(
   1875         IsUintN(rd.GetSizeInBits() == kXRegSize ? kXRegSizeLog2 : kWRegSizeLog2,
   1876                 operand.GetShiftAmount()));
   1877     Register temp = temps.AcquireSameSizeAs(rn);
   1878     EmitShift(temp,
   1879               operand.GetRegister(),
   1880               operand.GetShift(),
   1881               operand.GetShiftAmount());
   1882     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   1883   } else if (operand.IsExtendedRegister()) {
   1884     // Add/sub with carry (extended register).
   1885     VIXL_ASSERT(operand.GetRegister().GetSizeInBits() <= rd.GetSizeInBits());
   1886     // Add/sub extended supports a shift <= 4. We want to support exactly the
   1887     // same modes.
   1888     VIXL_ASSERT(operand.GetShiftAmount() <= 4);
   1889     VIXL_ASSERT(
   1890         operand.GetRegister().Is64Bits() ||
   1891         ((operand.GetExtend() != UXTX) && (operand.GetExtend() != SXTX)));
   1892     Register temp = temps.AcquireSameSizeAs(rn);
   1893     EmitExtendShift(temp,
   1894                     operand.GetRegister(),
   1895                     operand.GetExtend(),
   1896                     operand.GetShiftAmount());
   1897     AddSubWithCarry(rd, rn, Operand(temp), S, op);
   1898   } else {
   1899     // The addressing mode is directly supported by the instruction.
   1900     AddSubWithCarry(rd, rn, operand, S, op);
   1901   }
   1902 }
   1903 
   1904 
   1905 void MacroAssembler::Rmif(const Register& xn,
   1906                           unsigned shift,
   1907                           StatusFlags flags) {
   1908   VIXL_ASSERT(allow_macro_instructions_);
   1909   SingleEmissionCheckScope guard(this);
   1910   rmif(xn, shift, flags);
   1911 }
   1912 
   1913 
   1914 void MacroAssembler::Setf8(const Register& wn) {
   1915   VIXL_ASSERT(allow_macro_instructions_);
   1916   SingleEmissionCheckScope guard(this);
   1917   setf8(wn);
   1918 }
   1919 
   1920 
   1921 void MacroAssembler::Setf16(const Register& wn) {
   1922   VIXL_ASSERT(allow_macro_instructions_);
   1923   SingleEmissionCheckScope guard(this);
   1924   setf16(wn);
   1925 }
   1926 
   1927 
   1928 #define DEFINE_FUNCTION(FN, REGTYPE, REG, OP)                          \
   1929   void MacroAssembler::FN(const REGTYPE REG, const MemOperand& addr) { \
   1930     VIXL_ASSERT(allow_macro_instructions_);                            \
   1931     LoadStoreMacro(REG, addr, OP);                                     \
   1932   }
   1933 LS_MACRO_LIST(DEFINE_FUNCTION)
   1934 #undef DEFINE_FUNCTION
   1935 
   1936 
   1937 void MacroAssembler::LoadStoreMacro(const CPURegister& rt,
   1938                                     const MemOperand& addr,
   1939                                     LoadStoreOp op) {
   1940   VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsImmediatePostIndex() ||
   1941               addr.IsImmediatePreIndex() || addr.IsRegisterOffset());
   1942 
   1943   // Worst case is ldr/str pre/post index:
   1944   //  * 1 instruction for ldr/str
   1945   //  * up to 4 instructions to materialise the constant
   1946   //  * 1 instruction to update the base
   1947   MacroEmissionCheckScope guard(this);
   1948 
   1949   int64_t offset = addr.GetOffset();
   1950   unsigned access_size = CalcLSDataSize(op);
   1951 
   1952   // Check if an immediate offset fits in the immediate field of the
   1953   // appropriate instruction. If not, emit two instructions to perform
   1954   // the operation.
   1955   if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, access_size) &&
   1956       !IsImmLSUnscaled(offset)) {
   1957     // Immediate offset that can't be encoded using unsigned or unscaled
   1958     // addressing modes.
   1959     UseScratchRegisterScope temps(this);
   1960     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
   1961     Mov(temp, addr.GetOffset());
   1962     LoadStore(rt, MemOperand(addr.GetBaseRegister(), temp), op);
   1963   } else if (addr.IsImmediatePostIndex() && !IsImmLSUnscaled(offset)) {
   1964     // Post-index beyond unscaled addressing range.
   1965     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
   1966     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
   1967   } else if (addr.IsImmediatePreIndex() && !IsImmLSUnscaled(offset)) {
   1968     // Pre-index beyond unscaled addressing range.
   1969     Add(addr.GetBaseRegister(), addr.GetBaseRegister(), Operand(offset));
   1970     LoadStore(rt, MemOperand(addr.GetBaseRegister()), op);
   1971   } else {
   1972     // Encodable in one load/store instruction.
   1973     LoadStore(rt, addr, op);
   1974   }
   1975 }
   1976 
   1977 
   1978 #define DEFINE_FUNCTION(FN, REGTYPE, REG, REG2, OP) \
   1979   void MacroAssembler::FN(const REGTYPE REG,        \
   1980                           const REGTYPE REG2,       \
   1981                           const MemOperand& addr) { \
   1982     VIXL_ASSERT(allow_macro_instructions_);         \
   1983     LoadStorePairMacro(REG, REG2, addr, OP);        \
   1984   }
   1985 LSPAIR_MACRO_LIST(DEFINE_FUNCTION)
   1986 #undef DEFINE_FUNCTION
   1987 
   1988 void MacroAssembler::LoadStorePairMacro(const CPURegister& rt,
   1989                                         const CPURegister& rt2,
   1990                                         const MemOperand& addr,
   1991                                         LoadStorePairOp op) {
   1992   // TODO(all): Should we support register offset for load-store-pair?
   1993   VIXL_ASSERT(!addr.IsRegisterOffset());
   1994   // Worst case is ldp/stp immediate:
   1995   //  * 1 instruction for ldp/stp
   1996   //  * up to 4 instructions to materialise the constant
   1997   //  * 1 instruction to update the base
   1998   MacroEmissionCheckScope guard(this);
   1999 
   2000   int64_t offset = addr.GetOffset();
   2001   unsigned access_size = CalcLSPairDataSize(op);
   2002 
   2003   // Check if the offset fits in the immediate field of the appropriate
   2004   // instruction. If not, emit two instructions to perform the operation.
   2005   if (IsImmLSPair(offset, access_size)) {
   2006     // Encodable in one load/store pair instruction.
   2007     LoadStorePair(rt, rt2, addr, op);
   2008   } else {
   2009     Register base = addr.GetBaseRegister();
   2010     if (addr.IsImmediateOffset()) {
   2011       UseScratchRegisterScope temps(this);
   2012       Register temp = temps.AcquireSameSizeAs(base);
   2013       Add(temp, base, offset);
   2014       LoadStorePair(rt, rt2, MemOperand(temp), op);
   2015     } else if (addr.IsImmediatePostIndex()) {
   2016       LoadStorePair(rt, rt2, MemOperand(base), op);
   2017       Add(base, base, offset);
   2018     } else {
   2019       VIXL_ASSERT(addr.IsImmediatePreIndex());
   2020       Add(base, base, offset);
   2021       LoadStorePair(rt, rt2, MemOperand(base), op);
   2022     }
   2023   }
   2024 }
   2025 
   2026 
   2027 void MacroAssembler::Prfm(PrefetchOperation op, const MemOperand& addr) {
   2028   MacroEmissionCheckScope guard(this);
   2029 
   2030   // There are no pre- or post-index modes for prfm.
   2031   VIXL_ASSERT(addr.IsImmediateOffset() || addr.IsRegisterOffset());
   2032 
   2033   // The access size is implicitly 8 bytes for all prefetch operations.
   2034   unsigned size = kXRegSizeInBytesLog2;
   2035 
   2036   // Check if an immediate offset fits in the immediate field of the
   2037   // appropriate instruction. If not, emit two instructions to perform
   2038   // the operation.
   2039   if (addr.IsImmediateOffset() && !IsImmLSScaled(addr.GetOffset(), size) &&
   2040       !IsImmLSUnscaled(addr.GetOffset())) {
   2041     // Immediate offset that can't be encoded using unsigned or unscaled
   2042     // addressing modes.
   2043     UseScratchRegisterScope temps(this);
   2044     Register temp = temps.AcquireSameSizeAs(addr.GetBaseRegister());
   2045     Mov(temp, addr.GetOffset());
   2046     Prefetch(op, MemOperand(addr.GetBaseRegister(), temp));
   2047   } else {
   2048     // Simple register-offsets are encodable in one instruction.
   2049     Prefetch(op, addr);
   2050   }
   2051 }
   2052 
   2053 
   2054 void MacroAssembler::Push(const CPURegister& src0,
   2055                           const CPURegister& src1,
   2056                           const CPURegister& src2,
   2057                           const CPURegister& src3) {
   2058   VIXL_ASSERT(allow_macro_instructions_);
   2059   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
   2060   VIXL_ASSERT(src0.IsValid());
   2061 
   2062   int count = 1 + src1.IsValid() + src2.IsValid() + src3.IsValid();
   2063   int size = src0.GetSizeInBytes();
   2064 
   2065   PrepareForPush(count, size);
   2066   PushHelper(count, size, src0, src1, src2, src3);
   2067 }
   2068 
   2069 
   2070 void MacroAssembler::Pop(const CPURegister& dst0,
   2071                          const CPURegister& dst1,
   2072                          const CPURegister& dst2,
   2073                          const CPURegister& dst3) {
   2074   // It is not valid to pop into the same register more than once in one
   2075   // instruction, not even into the zero register.
   2076   VIXL_ASSERT(allow_macro_instructions_);
   2077   VIXL_ASSERT(!AreAliased(dst0, dst1, dst2, dst3));
   2078   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
   2079   VIXL_ASSERT(dst0.IsValid());
   2080 
   2081   int count = 1 + dst1.IsValid() + dst2.IsValid() + dst3.IsValid();
   2082   int size = dst0.GetSizeInBytes();
   2083 
   2084   PrepareForPop(count, size);
   2085   PopHelper(count, size, dst0, dst1, dst2, dst3);
   2086 }
   2087 
   2088 
   2089 void MacroAssembler::PushCPURegList(CPURegList registers) {
   2090   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
   2091   VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList()));
   2092   VIXL_ASSERT(allow_macro_instructions_);
   2093 
   2094   int reg_size = registers.GetRegisterSizeInBytes();
   2095   PrepareForPush(registers.GetCount(), reg_size);
   2096 
   2097   // Bump the stack pointer and store two registers at the bottom.
   2098   int size = registers.GetTotalSizeInBytes();
   2099   const CPURegister& bottom_0 = registers.PopLowestIndex();
   2100   const CPURegister& bottom_1 = registers.PopLowestIndex();
   2101   if (bottom_0.IsValid() && bottom_1.IsValid()) {
   2102     Stp(bottom_0, bottom_1, MemOperand(StackPointer(), -size, PreIndex));
   2103   } else if (bottom_0.IsValid()) {
   2104     Str(bottom_0, MemOperand(StackPointer(), -size, PreIndex));
   2105   }
   2106 
   2107   int offset = 2 * reg_size;
   2108   while (!registers.IsEmpty()) {
   2109     const CPURegister& src0 = registers.PopLowestIndex();
   2110     const CPURegister& src1 = registers.PopLowestIndex();
   2111     if (src1.IsValid()) {
   2112       Stp(src0, src1, MemOperand(StackPointer(), offset));
   2113     } else {
   2114       Str(src0, MemOperand(StackPointer(), offset));
   2115     }
   2116     offset += 2 * reg_size;
   2117   }
   2118 }
   2119 
   2120 
   2121 void MacroAssembler::PopCPURegList(CPURegList registers) {
   2122   VIXL_ASSERT(!registers.Overlaps(*GetScratchRegisterList()));
   2123   VIXL_ASSERT(!registers.Overlaps(*GetScratchVRegisterList()));
   2124   VIXL_ASSERT(allow_macro_instructions_);
   2125 
   2126   int reg_size = registers.GetRegisterSizeInBytes();
   2127   PrepareForPop(registers.GetCount(), reg_size);
   2128 
   2129 
   2130   int size = registers.GetTotalSizeInBytes();
   2131   const CPURegister& bottom_0 = registers.PopLowestIndex();
   2132   const CPURegister& bottom_1 = registers.PopLowestIndex();
   2133 
   2134   int offset = 2 * reg_size;
   2135   while (!registers.IsEmpty()) {
   2136     const CPURegister& dst0 = registers.PopLowestIndex();
   2137     const CPURegister& dst1 = registers.PopLowestIndex();
   2138     if (dst1.IsValid()) {
   2139       Ldp(dst0, dst1, MemOperand(StackPointer(), offset));
   2140     } else {
   2141       Ldr(dst0, MemOperand(StackPointer(), offset));
   2142     }
   2143     offset += 2 * reg_size;
   2144   }
   2145 
   2146   // Load the two registers at the bottom and drop the stack pointer.
   2147   if (bottom_0.IsValid() && bottom_1.IsValid()) {
   2148     Ldp(bottom_0, bottom_1, MemOperand(StackPointer(), size, PostIndex));
   2149   } else if (bottom_0.IsValid()) {
   2150     Ldr(bottom_0, MemOperand(StackPointer(), size, PostIndex));
   2151   }
   2152 }
   2153 
   2154 
   2155 void MacroAssembler::PushMultipleTimes(int count, Register src) {
   2156   VIXL_ASSERT(allow_macro_instructions_);
   2157   int size = src.GetSizeInBytes();
   2158 
   2159   PrepareForPush(count, size);
   2160   // Push up to four registers at a time if possible because if the current
   2161   // stack pointer is sp and the register size is 32, registers must be pushed
   2162   // in blocks of four in order to maintain the 16-byte alignment for sp.
   2163   while (count >= 4) {
   2164     PushHelper(4, size, src, src, src, src);
   2165     count -= 4;
   2166   }
   2167   if (count >= 2) {
   2168     PushHelper(2, size, src, src, NoReg, NoReg);
   2169     count -= 2;
   2170   }
   2171   if (count == 1) {
   2172     PushHelper(1, size, src, NoReg, NoReg, NoReg);
   2173     count -= 1;
   2174   }
   2175   VIXL_ASSERT(count == 0);
   2176 }
   2177 
   2178 
   2179 void MacroAssembler::PushHelper(int count,
   2180                                 int size,
   2181                                 const CPURegister& src0,
   2182                                 const CPURegister& src1,
   2183                                 const CPURegister& src2,
   2184                                 const CPURegister& src3) {
   2185   // Ensure that we don't unintentionally modify scratch or debug registers.
   2186   // Worst case for size is 2 stp.
   2187   ExactAssemblyScope scope(this,
   2188                            2 * kInstructionSize,
   2189                            ExactAssemblyScope::kMaximumSize);
   2190 
   2191   VIXL_ASSERT(AreSameSizeAndType(src0, src1, src2, src3));
   2192   VIXL_ASSERT(size == src0.GetSizeInBytes());
   2193 
   2194   // When pushing multiple registers, the store order is chosen such that
   2195   // Push(a, b) is equivalent to Push(a) followed by Push(b).
   2196   switch (count) {
   2197     case 1:
   2198       VIXL_ASSERT(src1.IsNone() && src2.IsNone() && src3.IsNone());
   2199       str(src0, MemOperand(StackPointer(), -1 * size, PreIndex));
   2200       break;
   2201     case 2:
   2202       VIXL_ASSERT(src2.IsNone() && src3.IsNone());
   2203       stp(src1, src0, MemOperand(StackPointer(), -2 * size, PreIndex));
   2204       break;
   2205     case 3:
   2206       VIXL_ASSERT(src3.IsNone());
   2207       stp(src2, src1, MemOperand(StackPointer(), -3 * size, PreIndex));
   2208       str(src0, MemOperand(StackPointer(), 2 * size));
   2209       break;
   2210     case 4:
   2211       // Skip over 4 * size, then fill in the gap. This allows four W registers
   2212       // to be pushed using sp, whilst maintaining 16-byte alignment for sp at
   2213       // all times.
   2214       stp(src3, src2, MemOperand(StackPointer(), -4 * size, PreIndex));
   2215       stp(src1, src0, MemOperand(StackPointer(), 2 * size));
   2216       break;
   2217     default:
   2218       VIXL_UNREACHABLE();
   2219   }
   2220 }
   2221 
   2222 
   2223 void MacroAssembler::PopHelper(int count,
   2224                                int size,
   2225                                const CPURegister& dst0,
   2226                                const CPURegister& dst1,
   2227                                const CPURegister& dst2,
   2228                                const CPURegister& dst3) {
   2229   // Ensure that we don't unintentionally modify scratch or debug registers.
   2230   // Worst case for size is 2 ldp.
   2231   ExactAssemblyScope scope(this,
   2232                            2 * kInstructionSize,
   2233                            ExactAssemblyScope::kMaximumSize);
   2234 
   2235   VIXL_ASSERT(AreSameSizeAndType(dst0, dst1, dst2, dst3));
   2236   VIXL_ASSERT(size == dst0.GetSizeInBytes());
   2237 
   2238   // When popping multiple registers, the load order is chosen such that
   2239   // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
   2240   switch (count) {
   2241     case 1:
   2242       VIXL_ASSERT(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
   2243       ldr(dst0, MemOperand(StackPointer(), 1 * size, PostIndex));
   2244       break;
   2245     case 2:
   2246       VIXL_ASSERT(dst2.IsNone() && dst3.IsNone());
   2247       ldp(dst0, dst1, MemOperand(StackPointer(), 2 * size, PostIndex));
   2248       break;
   2249     case 3:
   2250       VIXL_ASSERT(dst3.IsNone());
   2251       ldr(dst2, MemOperand(StackPointer(), 2 * size));
   2252       ldp(dst0, dst1, MemOperand(StackPointer(), 3 * size, PostIndex));
   2253       break;
   2254     case 4:
   2255       // Load the higher addresses first, then load the lower addresses and skip
   2256       // the whole block in the second instruction. This allows four W registers
   2257       // to be popped using sp, whilst maintaining 16-byte alignment for sp at
   2258       // all times.
   2259       ldp(dst2, dst3, MemOperand(StackPointer(), 2 * size));
   2260       ldp(dst0, dst1, MemOperand(StackPointer(), 4 * size, PostIndex));
   2261       break;
   2262     default:
   2263       VIXL_UNREACHABLE();
   2264   }
   2265 }
   2266 
   2267 
   2268 void MacroAssembler::PrepareForPush(int count, int size) {
   2269   if (sp.Is(StackPointer())) {
   2270     // If the current stack pointer is sp, then it must be aligned to 16 bytes
   2271     // on entry and the total size of the specified registers must also be a
   2272     // multiple of 16 bytes.
   2273     VIXL_ASSERT((count * size) % 16 == 0);
   2274   } else {
   2275     // Even if the current stack pointer is not the system stack pointer (sp),
   2276     // the system stack pointer will still be modified in order to comply with
   2277     // ABI rules about accessing memory below the system stack pointer.
   2278     BumpSystemStackPointer(count * size);
   2279   }
   2280 }
   2281 
   2282 
   2283 void MacroAssembler::PrepareForPop(int count, int size) {
   2284   USE(count, size);
   2285   if (sp.Is(StackPointer())) {
   2286     // If the current stack pointer is sp, then it must be aligned to 16 bytes
   2287     // on entry and the total size of the specified registers must also be a
   2288     // multiple of 16 bytes.
   2289     VIXL_ASSERT((count * size) % 16 == 0);
   2290   }
   2291 }
   2292 
   2293 void MacroAssembler::Poke(const Register& src, const Operand& offset) {
   2294   VIXL_ASSERT(allow_macro_instructions_);
   2295   if (offset.IsImmediate()) {
   2296     VIXL_ASSERT(offset.GetImmediate() >= 0);
   2297   }
   2298 
   2299   Str(src, MemOperand(StackPointer(), offset));
   2300 }
   2301 
   2302 
   2303 void MacroAssembler::Peek(const Register& dst, const Operand& offset) {
   2304   VIXL_ASSERT(allow_macro_instructions_);
   2305   if (offset.IsImmediate()) {
   2306     VIXL_ASSERT(offset.GetImmediate() >= 0);
   2307   }
   2308 
   2309   Ldr(dst, MemOperand(StackPointer(), offset));
   2310 }
   2311 
   2312 
   2313 void MacroAssembler::Claim(const Operand& size) {
   2314   VIXL_ASSERT(allow_macro_instructions_);
   2315 
   2316   if (size.IsZero()) {
   2317     return;
   2318   }
   2319 
   2320   if (size.IsImmediate()) {
   2321     VIXL_ASSERT(size.GetImmediate() > 0);
   2322     if (sp.Is(StackPointer())) {
   2323       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
   2324     }
   2325   }
   2326 
   2327   if (!sp.Is(StackPointer())) {
   2328     BumpSystemStackPointer(size);
   2329   }
   2330 
   2331   Sub(StackPointer(), StackPointer(), size);
   2332 }
   2333 
   2334 
   2335 void MacroAssembler::Drop(const Operand& size) {
   2336   VIXL_ASSERT(allow_macro_instructions_);
   2337 
   2338   if (size.IsZero()) {
   2339     return;
   2340   }
   2341 
   2342   if (size.IsImmediate()) {
   2343     VIXL_ASSERT(size.GetImmediate() > 0);
   2344     if (sp.Is(StackPointer())) {
   2345       VIXL_ASSERT((size.GetImmediate() % 16) == 0);
   2346     }
   2347   }
   2348 
   2349   Add(StackPointer(), StackPointer(), size);
   2350 }
   2351 
   2352 
   2353 void MacroAssembler::PushCalleeSavedRegisters() {
   2354   // Ensure that the macro-assembler doesn't use any scratch registers.
   2355   // 10 stp will be emitted.
   2356   // TODO(all): Should we use GetCalleeSaved and SavedFP.
   2357   ExactAssemblyScope scope(this, 10 * kInstructionSize);
   2358 
   2359   // This method must not be called unless the current stack pointer is sp.
   2360   VIXL_ASSERT(sp.Is(StackPointer()));
   2361 
   2362   MemOperand tos(sp, -2 * static_cast<int>(kXRegSizeInBytes), PreIndex);
   2363 
   2364   stp(x29, x30, tos);
   2365   stp(x27, x28, tos);
   2366   stp(x25, x26, tos);
   2367   stp(x23, x24, tos);
   2368   stp(x21, x22, tos);
   2369   stp(x19, x20, tos);
   2370 
   2371   stp(d14, d15, tos);
   2372   stp(d12, d13, tos);
   2373   stp(d10, d11, tos);
   2374   stp(d8, d9, tos);
   2375 }
   2376 
   2377 
   2378 void MacroAssembler::PopCalleeSavedRegisters() {
   2379   // Ensure that the macro-assembler doesn't use any scratch registers.
   2380   // 10 ldp will be emitted.
   2381   // TODO(all): Should we use GetCalleeSaved and SavedFP.
   2382   ExactAssemblyScope scope(this, 10 * kInstructionSize);
   2383 
   2384   // This method must not be called unless the current stack pointer is sp.
   2385   VIXL_ASSERT(sp.Is(StackPointer()));
   2386 
   2387   MemOperand tos(sp, 2 * kXRegSizeInBytes, PostIndex);
   2388 
   2389   ldp(d8, d9, tos);
   2390   ldp(d10, d11, tos);
   2391   ldp(d12, d13, tos);
   2392   ldp(d14, d15, tos);
   2393 
   2394   ldp(x19, x20, tos);
   2395   ldp(x21, x22, tos);
   2396   ldp(x23, x24, tos);
   2397   ldp(x25, x26, tos);
   2398   ldp(x27, x28, tos);
   2399   ldp(x29, x30, tos);
   2400 }
   2401 
   2402 void MacroAssembler::LoadCPURegList(CPURegList registers,
   2403                                     const MemOperand& src) {
   2404   LoadStoreCPURegListHelper(kLoad, registers, src);
   2405 }
   2406 
   2407 void MacroAssembler::StoreCPURegList(CPURegList registers,
   2408                                      const MemOperand& dst) {
   2409   LoadStoreCPURegListHelper(kStore, registers, dst);
   2410 }
   2411 
   2412 
   2413 void MacroAssembler::LoadStoreCPURegListHelper(LoadStoreCPURegListAction op,
   2414                                                CPURegList registers,
   2415                                                const MemOperand& mem) {
   2416   // We do not handle pre-indexing or post-indexing.
   2417   VIXL_ASSERT(!(mem.IsPreIndex() || mem.IsPostIndex()));
   2418   VIXL_ASSERT(!registers.Overlaps(tmp_list_));
   2419   VIXL_ASSERT(!registers.Overlaps(v_tmp_list_));
   2420   VIXL_ASSERT(!registers.Overlaps(p_tmp_list_));
   2421   VIXL_ASSERT(!registers.IncludesAliasOf(sp));
   2422 
   2423   UseScratchRegisterScope temps(this);
   2424 
   2425   MemOperand loc = BaseMemOperandForLoadStoreCPURegList(registers, mem, &temps);
   2426   const int reg_size = registers.GetRegisterSizeInBytes();
   2427 
   2428   VIXL_ASSERT(IsPowerOf2(reg_size));
   2429 
   2430   // Since we are operating on register pairs, we would like to align on double
   2431   // the standard size; on the other hand, we don't want to insert an extra
   2432   // operation, which will happen if the number of registers is even. Note that
   2433   // the alignment of the base pointer is unknown here, but we assume that it
   2434   // is more likely to be aligned.
   2435   if (((loc.GetOffset() & (2 * reg_size - 1)) != 0) &&
   2436       ((registers.GetCount() % 2) != 0)) {
   2437     if (op == kStore) {
   2438       Str(registers.PopLowestIndex(), loc);
   2439     } else {
   2440       VIXL_ASSERT(op == kLoad);
   2441       Ldr(registers.PopLowestIndex(), loc);
   2442     }
   2443     loc.AddOffset(reg_size);
   2444   }
   2445   while (registers.GetCount() >= 2) {
   2446     const CPURegister& dst0 = registers.PopLowestIndex();
   2447     const CPURegister& dst1 = registers.PopLowestIndex();
   2448     if (op == kStore) {
   2449       Stp(dst0, dst1, loc);
   2450     } else {
   2451       VIXL_ASSERT(op == kLoad);
   2452       Ldp(dst0, dst1, loc);
   2453     }
   2454     loc.AddOffset(2 * reg_size);
   2455   }
   2456   if (!registers.IsEmpty()) {
   2457     if (op == kStore) {
   2458       Str(registers.PopLowestIndex(), loc);
   2459     } else {
   2460       VIXL_ASSERT(op == kLoad);
   2461       Ldr(registers.PopLowestIndex(), loc);
   2462     }
   2463   }
   2464 }
   2465 
   2466 MemOperand MacroAssembler::BaseMemOperandForLoadStoreCPURegList(
   2467     const CPURegList& registers,
   2468     const MemOperand& mem,
   2469     UseScratchRegisterScope* scratch_scope) {
   2470   // If necessary, pre-compute the base address for the accesses.
   2471   if (mem.IsRegisterOffset()) {
   2472     Register reg_base = scratch_scope->AcquireX();
   2473     ComputeAddress(reg_base, mem);
   2474     return MemOperand(reg_base);
   2475 
   2476   } else if (mem.IsImmediateOffset()) {
   2477     int reg_size = registers.GetRegisterSizeInBytes();
   2478     int total_size = registers.GetTotalSizeInBytes();
   2479     int64_t min_offset = mem.GetOffset();
   2480     int64_t max_offset =
   2481         mem.GetOffset() + std::max(0, total_size - 2 * reg_size);
   2482     if ((registers.GetCount() >= 2) &&
   2483         (!Assembler::IsImmLSPair(min_offset, WhichPowerOf2(reg_size)) ||
   2484          !Assembler::IsImmLSPair(max_offset, WhichPowerOf2(reg_size)))) {
   2485       Register reg_base = scratch_scope->AcquireX();
   2486       ComputeAddress(reg_base, mem);
   2487       return MemOperand(reg_base);
   2488     }
   2489   }
   2490 
   2491   return mem;
   2492 }
   2493 
   2494 void MacroAssembler::BumpSystemStackPointer(const Operand& space) {
   2495   VIXL_ASSERT(!sp.Is(StackPointer()));
   2496   // TODO: Several callers rely on this not using scratch registers, so we use
   2497   // the assembler directly here. However, this means that large immediate
   2498   // values of 'space' cannot be handled.
   2499   ExactAssemblyScope scope(this, kInstructionSize);
   2500   sub(sp, StackPointer(), space);
   2501 }
   2502 
   2503 
   2504 // TODO(all): Fix printf for NEON and SVE registers.
   2505 
   2506 // This is the main Printf implementation. All callee-saved registers are
   2507 // preserved, but NZCV and the caller-saved registers may be clobbered.
   2508 void MacroAssembler::PrintfNoPreserve(const char* format,
   2509                                       const CPURegister& arg0,
   2510                                       const CPURegister& arg1,
   2511                                       const CPURegister& arg2,
   2512                                       const CPURegister& arg3) {
   2513   // We cannot handle a caller-saved stack pointer. It doesn't make much sense
   2514   // in most cases anyway, so this restriction shouldn't be too serious.
   2515   VIXL_ASSERT(!kCallerSaved.IncludesAliasOf(StackPointer()));
   2516 
   2517   // The provided arguments, and their proper PCS registers.
   2518   CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
   2519   CPURegister pcs[kPrintfMaxArgCount];
   2520 
   2521   int arg_count = kPrintfMaxArgCount;
   2522 
   2523   // The PCS varargs registers for printf. Note that x0 is used for the printf
   2524   // format string.
   2525   static const CPURegList kPCSVarargs =
   2526       CPURegList(CPURegister::kRegister, kXRegSize, 1, arg_count);
   2527   static const CPURegList kPCSVarargsV =
   2528       CPURegList(CPURegister::kVRegister, kDRegSize, 0, arg_count - 1);
   2529 
   2530   // We can use caller-saved registers as scratch values, except for the
   2531   // arguments and the PCS registers where they might need to go.
   2532   UseScratchRegisterScope temps(this);
   2533   temps.Include(kCallerSaved);
   2534   temps.Include(kCallerSavedV);
   2535   temps.Exclude(kPCSVarargs);
   2536   temps.Exclude(kPCSVarargsV);
   2537   temps.Exclude(arg0, arg1, arg2, arg3);
   2538 
   2539   // Copies of the arg lists that we can iterate through.
   2540   CPURegList pcs_varargs = kPCSVarargs;
   2541   CPURegList pcs_varargs_fp = kPCSVarargsV;
   2542 
   2543   // Place the arguments. There are lots of clever tricks and optimizations we
   2544   // could use here, but Printf is a debug tool so instead we just try to keep
   2545   // it simple: Move each input that isn't already in the right place to a
   2546   // scratch register, then move everything back.
   2547   for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
   2548     // Work out the proper PCS register for this argument.
   2549     if (args[i].IsRegister()) {
   2550       pcs[i] = pcs_varargs.PopLowestIndex().X();
   2551       // We might only need a W register here. We need to know the size of the
   2552       // argument so we can properly encode it for the simulator call.
   2553       if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
   2554     } else if (args[i].IsVRegister()) {
   2555       // In C, floats are always cast to doubles for varargs calls.
   2556       pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
   2557     } else {
   2558       VIXL_ASSERT(args[i].IsNone());
   2559       arg_count = i;
   2560       break;
   2561     }
   2562 
   2563     // If the argument is already in the right place, leave it where it is.
   2564     if (args[i].Aliases(pcs[i])) continue;
   2565 
   2566     // Otherwise, if the argument is in a PCS argument register, allocate an
   2567     // appropriate scratch register and then move it out of the way.
   2568     if (kPCSVarargs.IncludesAliasOf(args[i]) ||
   2569         kPCSVarargsV.IncludesAliasOf(args[i])) {
   2570       if (args[i].IsRegister()) {
   2571         Register old_arg = Register(args[i]);
   2572         Register new_arg = temps.AcquireSameSizeAs(old_arg);
   2573         Mov(new_arg, old_arg);
   2574         args[i] = new_arg;
   2575       } else {
   2576         VRegister old_arg(args[i]);
   2577         VRegister new_arg = temps.AcquireSameSizeAs(old_arg);
   2578         Fmov(new_arg, old_arg);
   2579         args[i] = new_arg;
   2580       }
   2581     }
   2582   }
   2583 
   2584   // Do a second pass to move values into their final positions and perform any
   2585   // conversions that may be required.
   2586   for (int i = 0; i < arg_count; i++) {
   2587     VIXL_ASSERT(pcs[i].GetType() == args[i].GetType());
   2588     if (pcs[i].IsRegister()) {
   2589       Mov(Register(pcs[i]), Register(args[i]), kDiscardForSameWReg);
   2590     } else {
   2591       VIXL_ASSERT(pcs[i].IsVRegister());
   2592       if (pcs[i].GetSizeInBits() == args[i].GetSizeInBits()) {
   2593         Fmov(VRegister(pcs[i]), VRegister(args[i]));
   2594       } else {
   2595         Fcvt(VRegister(pcs[i]), VRegister(args[i]));
   2596       }
   2597     }
   2598   }
   2599 
   2600   // Load the format string into x0, as per the procedure-call standard.
   2601   //
   2602   // To make the code as portable as possible, the format string is encoded
   2603   // directly in the instruction stream. It might be cleaner to encode it in a
   2604   // literal pool, but since Printf is usually used for debugging, it is
   2605   // beneficial for it to be minimally dependent on other features.
   2606   temps.Exclude(x0);
   2607   Label format_address;
   2608   Adr(x0, &format_address);
   2609 
   2610   // Emit the format string directly in the instruction stream.
   2611   {
   2612     BlockPoolsScope scope(this);
   2613     // Data emitted:
   2614     //   branch
   2615     //   strlen(format) + 1 (includes null termination)
   2616     //   padding to next instruction
   2617     //   unreachable
   2618     EmissionCheckScope guard(this,
   2619                              AlignUp(strlen(format) + 1, kInstructionSize) +
   2620                                  2 * kInstructionSize);
   2621     Label after_data;
   2622     B(&after_data);
   2623     Bind(&format_address);
   2624     EmitString(format);
   2625     Unreachable();
   2626     Bind(&after_data);
   2627   }
   2628 
   2629   // We don't pass any arguments on the stack, but we still need to align the C
   2630   // stack pointer to a 16-byte boundary for PCS compliance.
   2631   if (!sp.Is(StackPointer())) {
   2632     Bic(sp, StackPointer(), 0xf);
   2633   }
   2634 
   2635   // Actually call printf. This part needs special handling for the simulator,
   2636   // since the system printf function will use a different instruction set and
   2637   // the procedure-call standard will not be compatible.
   2638   if (generate_simulator_code_) {
   2639     ExactAssemblyScope scope(this, kPrintfLength);
   2640     hlt(kPrintfOpcode);
   2641     dc32(arg_count);  // kPrintfArgCountOffset
   2642 
   2643     // Determine the argument pattern.
   2644     uint32_t arg_pattern_list = 0;
   2645     for (int i = 0; i < arg_count; i++) {
   2646       uint32_t arg_pattern;
   2647       if (pcs[i].IsRegister()) {
   2648         arg_pattern = pcs[i].Is32Bits() ? kPrintfArgW : kPrintfArgX;
   2649       } else {
   2650         VIXL_ASSERT(pcs[i].Is64Bits());
   2651         arg_pattern = kPrintfArgD;
   2652       }
   2653       VIXL_ASSERT(arg_pattern < (1 << kPrintfArgPatternBits));
   2654       arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i));
   2655     }
   2656     dc32(arg_pattern_list);  // kPrintfArgPatternListOffset
   2657   } else {
   2658     Register tmp = temps.AcquireX();
   2659     Mov(tmp, reinterpret_cast<uintptr_t>(printf));
   2660     Blr(tmp);
   2661   }
   2662 }
   2663 
   2664 
   2665 void MacroAssembler::Printf(const char* format,
   2666                             CPURegister arg0,
   2667                             CPURegister arg1,
   2668                             CPURegister arg2,
   2669                             CPURegister arg3) {
   2670   // We can only print sp if it is the current stack pointer.
   2671   if (!sp.Is(StackPointer())) {
   2672     VIXL_ASSERT(!sp.Aliases(arg0));
   2673     VIXL_ASSERT(!sp.Aliases(arg1));
   2674     VIXL_ASSERT(!sp.Aliases(arg2));
   2675     VIXL_ASSERT(!sp.Aliases(arg3));
   2676   }
   2677 
   2678   // Make sure that the macro assembler doesn't try to use any of our arguments
   2679   // as scratch registers.
   2680   UseScratchRegisterScope exclude_all(this);
   2681   exclude_all.ExcludeAll();
   2682 
   2683   // Preserve all caller-saved registers as well as NZCV.
   2684   // If sp is the stack pointer, PushCPURegList asserts that the size of each
   2685   // list is a multiple of 16 bytes.
   2686   PushCPURegList(kCallerSaved);
   2687   PushCPURegList(kCallerSavedV);
   2688 
   2689   {
   2690     UseScratchRegisterScope temps(this);
   2691     // We can use caller-saved registers as scratch values (except for argN).
   2692     temps.Include(kCallerSaved);
   2693     temps.Include(kCallerSavedV);
   2694     temps.Exclude(arg0, arg1, arg2, arg3);
   2695 
   2696     // If any of the arguments are the current stack pointer, allocate a new
   2697     // register for them, and adjust the value to compensate for pushing the
   2698     // caller-saved registers.
   2699     bool arg0_sp = StackPointer().Aliases(arg0);
   2700     bool arg1_sp = StackPointer().Aliases(arg1);
   2701     bool arg2_sp = StackPointer().Aliases(arg2);
   2702     bool arg3_sp = StackPointer().Aliases(arg3);
   2703     if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
   2704       // Allocate a register to hold the original stack pointer value, to pass
   2705       // to PrintfNoPreserve as an argument.
   2706       Register arg_sp = temps.AcquireX();
   2707       Add(arg_sp,
   2708           StackPointer(),
   2709           kCallerSaved.GetTotalSizeInBytes() +
   2710               kCallerSavedV.GetTotalSizeInBytes());
   2711       if (arg0_sp) arg0 = Register(arg_sp.GetCode(), arg0.GetSizeInBits());
   2712       if (arg1_sp) arg1 = Register(arg_sp.GetCode(), arg1.GetSizeInBits());
   2713       if (arg2_sp) arg2 = Register(arg_sp.GetCode(), arg2.GetSizeInBits());
   2714       if (arg3_sp) arg3 = Register(arg_sp.GetCode(), arg3.GetSizeInBits());
   2715     }
   2716 
   2717     // Preserve NZCV.
   2718     Register tmp = temps.AcquireX();
   2719     Mrs(tmp, NZCV);
   2720     Push(tmp, xzr);
   2721     temps.Release(tmp);
   2722 
   2723     PrintfNoPreserve(format, arg0, arg1, arg2, arg3);
   2724 
   2725     // Restore NZCV.
   2726     tmp = temps.AcquireX();
   2727     Pop(xzr, tmp);
   2728     Msr(NZCV, tmp);
   2729     temps.Release(tmp);
   2730   }
   2731 
   2732   PopCPURegList(kCallerSavedV);
   2733   PopCPURegList(kCallerSaved);
   2734 }
   2735 
   2736 void MacroAssembler::Trace(TraceParameters parameters, TraceCommand command) {
   2737   VIXL_ASSERT(allow_macro_instructions_);
   2738 
   2739   if (generate_simulator_code_) {
   2740     // The arguments to the trace pseudo instruction need to be contiguous in
   2741     // memory, so make sure we don't try to emit a literal pool.
   2742     ExactAssemblyScope scope(this, kTraceLength);
   2743 
   2744     Label start;
   2745     bind(&start);
   2746 
   2747     // Refer to simulator-aarch64.h for a description of the marker and its
   2748     // arguments.
   2749     hlt(kTraceOpcode);
   2750 
   2751     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceParamsOffset);
   2752     dc32(parameters);
   2753 
   2754     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kTraceCommandOffset);
   2755     dc32(command);
   2756   } else {
   2757     // Emit nothing on real hardware.
   2758     USE(parameters, command);
   2759   }
   2760 }
   2761 
   2762 
   2763 void MacroAssembler::Log(TraceParameters parameters) {
   2764   VIXL_ASSERT(allow_macro_instructions_);
   2765 
   2766   if (generate_simulator_code_) {
   2767     // The arguments to the log pseudo instruction need to be contiguous in
   2768     // memory, so make sure we don't try to emit a literal pool.
   2769     ExactAssemblyScope scope(this, kLogLength);
   2770 
   2771     Label start;
   2772     bind(&start);
   2773 
   2774     // Refer to simulator-aarch64.h for a description of the marker and its
   2775     // arguments.
   2776     hlt(kLogOpcode);
   2777 
   2778     VIXL_ASSERT(GetSizeOfCodeGeneratedSince(&start) == kLogParamsOffset);
   2779     dc32(parameters);
   2780   } else {
   2781     // Emit nothing on real hardware.
   2782     USE(parameters);
   2783   }
   2784 }
   2785 
   2786 
   2787 void MacroAssembler::SetSimulatorCPUFeatures(const CPUFeatures& features) {
   2788   ConfigureSimulatorCPUFeaturesHelper(features, kSetCPUFeaturesOpcode);
   2789 }
   2790 
   2791 
   2792 void MacroAssembler::EnableSimulatorCPUFeatures(const CPUFeatures& features) {
   2793   ConfigureSimulatorCPUFeaturesHelper(features, kEnableCPUFeaturesOpcode);
   2794 }
   2795 
   2796 
   2797 void MacroAssembler::DisableSimulatorCPUFeatures(const CPUFeatures& features) {
   2798   ConfigureSimulatorCPUFeaturesHelper(features, kDisableCPUFeaturesOpcode);
   2799 }
   2800 
   2801 
   2802 void MacroAssembler::ConfigureSimulatorCPUFeaturesHelper(
   2803     const CPUFeatures& features, DebugHltOpcode action) {
   2804   VIXL_ASSERT(allow_macro_instructions_);
   2805   VIXL_ASSERT(generate_simulator_code_);
   2806 
   2807   typedef ConfigureCPUFeaturesElementType ElementType;
   2808   VIXL_ASSERT(CPUFeatures::kNumberOfFeatures <=
   2809               std::numeric_limits<ElementType>::max());
   2810 
   2811   size_t count = features.Count();
   2812 
   2813   size_t preamble_length = kConfigureCPUFeaturesListOffset;
   2814   size_t list_length = (count + 1) * sizeof(ElementType);
   2815   size_t padding_length = AlignUp(list_length, kInstructionSize) - list_length;
   2816 
   2817   size_t total_length = preamble_length + list_length + padding_length;
   2818 
   2819   // Check the overall code size as well as the size of each component.
   2820   ExactAssemblyScope guard_total(this, total_length);
   2821 
   2822   {  // Preamble: the opcode itself.
   2823     ExactAssemblyScope guard_preamble(this, preamble_length);
   2824     hlt(action);
   2825   }
   2826   {  // A kNone-terminated list of features.
   2827     ExactAssemblyScope guard_list(this, list_length);
   2828     for (CPUFeatures::const_iterator it = features.begin();
   2829          it != features.end();
   2830          ++it) {
   2831       dc(static_cast<ElementType>(*it));
   2832     }
   2833     dc(static_cast<ElementType>(CPUFeatures::kNone));
   2834   }
   2835   {  // Padding for instruction alignment.
   2836     ExactAssemblyScope guard_padding(this, padding_length);
   2837     for (size_t size = 0; size < padding_length; size += sizeof(ElementType)) {
   2838       // The exact value is arbitrary.
   2839       dc(static_cast<ElementType>(CPUFeatures::kNone));
   2840     }
   2841   }
   2842 }
   2843 
   2844 void MacroAssembler::SaveSimulatorCPUFeatures() {
   2845   VIXL_ASSERT(allow_macro_instructions_);
   2846   VIXL_ASSERT(generate_simulator_code_);
   2847   SingleEmissionCheckScope guard(this);
   2848   hlt(kSaveCPUFeaturesOpcode);
   2849 }
   2850 
   2851 
   2852 void MacroAssembler::RestoreSimulatorCPUFeatures() {
   2853   VIXL_ASSERT(allow_macro_instructions_);
   2854   VIXL_ASSERT(generate_simulator_code_);
   2855   SingleEmissionCheckScope guard(this);
   2856   hlt(kRestoreCPUFeaturesOpcode);
   2857 }
   2858 
   2859 
   2860 void UseScratchRegisterScope::Open(MacroAssembler* masm) {
   2861   VIXL_ASSERT(masm_ == NULL);
   2862   VIXL_ASSERT(masm != NULL);
   2863   masm_ = masm;
   2864 
   2865   CPURegList* available = masm->GetScratchRegisterList();
   2866   CPURegList* available_v = masm->GetScratchVRegisterList();
   2867   CPURegList* available_p = masm->GetScratchPRegisterList();
   2868   old_available_ = available->GetList();
   2869   old_available_v_ = available_v->GetList();
   2870   old_available_p_ = available_p->GetList();
   2871   VIXL_ASSERT(available->GetType() == CPURegister::kRegister);
   2872   VIXL_ASSERT(available_v->GetType() == CPURegister::kVRegister);
   2873   VIXL_ASSERT(available_p->GetType() == CPURegister::kPRegister);
   2874 
   2875   parent_ = masm->GetCurrentScratchRegisterScope();
   2876   masm->SetCurrentScratchRegisterScope(this);
   2877 }
   2878 
   2879 
   2880 void UseScratchRegisterScope::Close() {
   2881   if (masm_ != NULL) {
   2882     // Ensure that scopes nest perfectly, and do not outlive their parents.
   2883     // This is a run-time check because the order of destruction of objects in
   2884     // the _same_ scope is implementation-defined, and is likely to change in
   2885     // optimised builds.
   2886     VIXL_CHECK(masm_->GetCurrentScratchRegisterScope() == this);
   2887     masm_->SetCurrentScratchRegisterScope(parent_);
   2888 
   2889     masm_->GetScratchRegisterList()->SetList(old_available_);
   2890     masm_->GetScratchVRegisterList()->SetList(old_available_v_);
   2891     masm_->GetScratchPRegisterList()->SetList(old_available_p_);
   2892 
   2893     masm_ = NULL;
   2894   }
   2895 }
   2896 
   2897 
   2898 bool UseScratchRegisterScope::IsAvailable(const CPURegister& reg) const {
   2899   return masm_->GetScratchRegisterList()->IncludesAliasOf(reg) ||
   2900          masm_->GetScratchVRegisterList()->IncludesAliasOf(reg) ||
   2901          masm_->GetScratchPRegisterList()->IncludesAliasOf(reg);
   2902 }
   2903 
   2904 Register UseScratchRegisterScope::AcquireRegisterOfSize(int size_in_bits) {
   2905   int code = AcquireFrom(masm_->GetScratchRegisterList()).GetCode();
   2906   return Register(code, size_in_bits);
   2907 }
   2908 
   2909 
   2910 VRegister UseScratchRegisterScope::AcquireVRegisterOfSize(int size_in_bits) {
   2911   int code = AcquireFrom(masm_->GetScratchVRegisterList()).GetCode();
   2912   return VRegister(code, size_in_bits);
   2913 }
   2914 
   2915 
   2916 void UseScratchRegisterScope::Release(const CPURegister& reg) {
   2917   VIXL_ASSERT(masm_ != NULL);
   2918 
   2919   // Release(NoReg) has no effect.
   2920   if (reg.IsNone()) return;
   2921 
   2922   ReleaseByCode(GetAvailableListFor(reg.GetBank()), reg.GetCode());
   2923 }
   2924 
   2925 
   2926 void UseScratchRegisterScope::Include(const CPURegList& list) {
   2927   VIXL_ASSERT(masm_ != NULL);
   2928 
   2929   // Including an empty list has no effect.
   2930   if (list.IsEmpty()) return;
   2931   VIXL_ASSERT(list.GetType() != CPURegister::kNoRegister);
   2932 
   2933   RegList reg_list = list.GetList();
   2934   if (list.GetType() == CPURegister::kRegister) {
   2935     // Make sure that neither sp nor xzr are included the list.
   2936     reg_list &= ~(xzr.GetBit() | sp.GetBit());
   2937   }
   2938 
   2939   IncludeByRegList(GetAvailableListFor(list.GetBank()), reg_list);
   2940 }
   2941 
   2942 
   2943 void UseScratchRegisterScope::Include(const Register& reg1,
   2944                                       const Register& reg2,
   2945                                       const Register& reg3,
   2946                                       const Register& reg4) {
   2947   VIXL_ASSERT(masm_ != NULL);
   2948   RegList include =
   2949       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   2950   // Make sure that neither sp nor xzr are included the list.
   2951   include &= ~(xzr.GetBit() | sp.GetBit());
   2952 
   2953   IncludeByRegList(masm_->GetScratchRegisterList(), include);
   2954 }
   2955 
   2956 
   2957 void UseScratchRegisterScope::Include(const VRegister& reg1,
   2958                                       const VRegister& reg2,
   2959                                       const VRegister& reg3,
   2960                                       const VRegister& reg4) {
   2961   RegList include =
   2962       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   2963   IncludeByRegList(masm_->GetScratchVRegisterList(), include);
   2964 }
   2965 
   2966 
   2967 void UseScratchRegisterScope::Include(const CPURegister& reg1,
   2968                                       const CPURegister& reg2,
   2969                                       const CPURegister& reg3,
   2970                                       const CPURegister& reg4) {
   2971   RegList include = 0;
   2972   RegList include_v = 0;
   2973   RegList include_p = 0;
   2974 
   2975   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
   2976 
   2977   for (size_t i = 0; i < ArrayLength(regs); i++) {
   2978     RegList bit = regs[i].GetBit();
   2979     switch (regs[i].GetBank()) {
   2980       case CPURegister::kNoRegisterBank:
   2981         // Include(NoReg) has no effect.
   2982         VIXL_ASSERT(regs[i].IsNone());
   2983         break;
   2984       case CPURegister::kRRegisterBank:
   2985         include |= bit;
   2986         break;
   2987       case CPURegister::kVRegisterBank:
   2988         include_v |= bit;
   2989         break;
   2990       case CPURegister::kPRegisterBank:
   2991         include_p |= bit;
   2992         break;
   2993     }
   2994   }
   2995 
   2996   IncludeByRegList(masm_->GetScratchRegisterList(), include);
   2997   IncludeByRegList(masm_->GetScratchVRegisterList(), include_v);
   2998   IncludeByRegList(masm_->GetScratchPRegisterList(), include_p);
   2999 }
   3000 
   3001 
   3002 void UseScratchRegisterScope::Exclude(const CPURegList& list) {
   3003   ExcludeByRegList(GetAvailableListFor(list.GetBank()), list.GetList());
   3004 }
   3005 
   3006 
   3007 void UseScratchRegisterScope::Exclude(const Register& reg1,
   3008                                       const Register& reg2,
   3009                                       const Register& reg3,
   3010                                       const Register& reg4) {
   3011   RegList exclude =
   3012       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   3013   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
   3014 }
   3015 
   3016 
   3017 void UseScratchRegisterScope::Exclude(const VRegister& reg1,
   3018                                       const VRegister& reg2,
   3019                                       const VRegister& reg3,
   3020                                       const VRegister& reg4) {
   3021   RegList exclude_v =
   3022       reg1.GetBit() | reg2.GetBit() | reg3.GetBit() | reg4.GetBit();
   3023   ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
   3024 }
   3025 
   3026 
   3027 void UseScratchRegisterScope::Exclude(const CPURegister& reg1,
   3028                                       const CPURegister& reg2,
   3029                                       const CPURegister& reg3,
   3030                                       const CPURegister& reg4) {
   3031   RegList exclude = 0;
   3032   RegList exclude_v = 0;
   3033   RegList exclude_p = 0;
   3034 
   3035   const CPURegister regs[] = {reg1, reg2, reg3, reg4};
   3036 
   3037   for (size_t i = 0; i < ArrayLength(regs); i++) {
   3038     RegList bit = regs[i].GetBit();
   3039     switch (regs[i].GetBank()) {
   3040       case CPURegister::kNoRegisterBank:
   3041         // Exclude(NoReg) has no effect.
   3042         VIXL_ASSERT(regs[i].IsNone());
   3043         break;
   3044       case CPURegister::kRRegisterBank:
   3045         exclude |= bit;
   3046         break;
   3047       case CPURegister::kVRegisterBank:
   3048         exclude_v |= bit;
   3049         break;
   3050       case CPURegister::kPRegisterBank:
   3051         exclude_p |= bit;
   3052         break;
   3053     }
   3054   }
   3055 
   3056   ExcludeByRegList(masm_->GetScratchRegisterList(), exclude);
   3057   ExcludeByRegList(masm_->GetScratchVRegisterList(), exclude_v);
   3058   ExcludeByRegList(masm_->GetScratchPRegisterList(), exclude_p);
   3059 }
   3060 
   3061 
   3062 void UseScratchRegisterScope::ExcludeAll() {
   3063   ExcludeByRegList(masm_->GetScratchRegisterList(),
   3064                    masm_->GetScratchRegisterList()->GetList());
   3065   ExcludeByRegList(masm_->GetScratchVRegisterList(),
   3066                    masm_->GetScratchVRegisterList()->GetList());
   3067   ExcludeByRegList(masm_->GetScratchPRegisterList(),
   3068                    masm_->GetScratchPRegisterList()->GetList());
   3069 }
   3070 
   3071 
   3072 CPURegister UseScratchRegisterScope::AcquireFrom(CPURegList* available,
   3073                                                  RegList mask) {
   3074   VIXL_CHECK((available->GetList() & mask) != 0);
   3075   CPURegister result = available->PopLowestIndex(mask);
   3076   VIXL_ASSERT(!AreAliased(result, xzr, sp));
   3077   return result;
   3078 }
   3079 
   3080 
   3081 void UseScratchRegisterScope::ReleaseByCode(CPURegList* available, int code) {
   3082   ReleaseByRegList(available, static_cast<RegList>(1) << code);
   3083 }
   3084 
   3085 
   3086 void UseScratchRegisterScope::ReleaseByRegList(CPURegList* available,
   3087                                                RegList regs) {
   3088   available->SetList(available->GetList() | regs);
   3089 }
   3090 
   3091 
   3092 void UseScratchRegisterScope::IncludeByRegList(CPURegList* available,
   3093                                                RegList regs) {
   3094   available->SetList(available->GetList() | regs);
   3095 }
   3096 
   3097 
   3098 void UseScratchRegisterScope::ExcludeByRegList(CPURegList* available,
   3099                                                RegList exclude) {
   3100   available->SetList(available->GetList() & ~exclude);
   3101 }
   3102 
   3103 CPURegList* UseScratchRegisterScope::GetAvailableListFor(
   3104     CPURegister::RegisterBank bank) {
   3105   switch (bank) {
   3106     case CPURegister::kNoRegisterBank:
   3107       return NULL;
   3108     case CPURegister::kRRegisterBank:
   3109       return masm_->GetScratchRegisterList();
   3110     case CPURegister::kVRegisterBank:
   3111       return masm_->GetScratchVRegisterList();
   3112     case CPURegister::kPRegisterBank:
   3113       return masm_->GetScratchPRegisterList();
   3114   }
   3115   VIXL_UNREACHABLE();
   3116   return NULL;
   3117 }
   3118 
   3119 }  // namespace aarch64
   3120 }  // namespace vixl