duckstation

duckstation, but archived from the revision just before upstream changed it to a proprietary software project, this version is the libre one
git clone https://git.neptards.moe/u3shit/duckstation.git
Log | Files | Refs | README | LICENSE

assembler.cpp (84419B)


      1 #include <biscuit/assert.hpp>
      2 #include <biscuit/assembler.hpp>
      3 
      4 #include <cstring>
      5 #include <utility>
      6 
      7 namespace biscuit {
      8 namespace {
      9 // Determines if a value lies within the range of a 6-bit immediate.
     10 [[nodiscard]] bool IsValidSigned6BitImm(ptrdiff_t value) noexcept {
     11     return value >= -32 && value <= 31;
     12 }
     13 
     14 // S-type and I-type immediates are 12 bits in size
     15 [[nodiscard]] bool IsValidSigned12BitImm(ptrdiff_t value) noexcept {
     16     return value >= -2048 && value <= 2047;
     17 }
     18 
     19 // B-type immediates only provide -4KiB to +4KiB range branches.
     20 [[nodiscard]] bool IsValidBTypeImm(ptrdiff_t value) noexcept {
     21     return value >= -4096 && value <= 4095;
     22 }
     23 
     24 // J-type immediates only provide -1MiB to +1MiB range branches.
     25 [[nodiscard]] bool IsValidJTypeImm(ptrdiff_t value) noexcept {
     26     return value >= -0x80000 && value <= 0x7FFFF;
     27 }
     28 
     29 // CB-type immediates only provide -256B to +256B range branches.
     30 [[nodiscard]] bool IsValidCBTypeImm(ptrdiff_t value) noexcept {
     31     return value >= -256 && value <= 255;
     32 }
     33 
     34 // CJ-type immediates only provide -2KiB to +2KiB range branches.
     35 [[nodiscard]] bool IsValidCJTypeImm(ptrdiff_t value) noexcept {
     36     return IsValidSigned12BitImm(value);
     37 }
     38 
     39 // Determines whether or not the register fits in 3-bit compressed encoding.
     40 [[nodiscard]] bool IsValid3BitCompressedReg(Register reg) noexcept {
     41     const auto index = reg.Index();
     42     return index >= 8 && index <= 15;
     43 }
     44 
     45 // Determines whether or not the given shift amount is valid for a compressed shift instruction
     46 [[nodiscard]] bool IsValidCompressedShiftAmount(uint32_t shift) noexcept {
     47     return shift > 0 && shift <= 64;
     48 }
     49 
     50 // Turns a compressed register into its encoding.
     51 [[nodiscard]] uint32_t CompressedRegTo3BitEncoding(Register reg) noexcept {
     52     return reg.Index() - 8;
     53 }
     54 
     55 // Transforms a regular value into an immediate encoded in a B-type instruction.
     56 [[nodiscard]] uint32_t TransformToBTypeImm(uint32_t imm) noexcept {
     57     // clang-format off
     58     return ((imm & 0x07E0) << 20) |
     59            ((imm & 0x1000) << 19) |
     60            ((imm & 0x001E) << 7) |
     61            ((imm & 0x0800) >> 4);
     62     // clang-format on
     63 }
     64 
     65 // Transforms a regular value into an immediate encoded in a J-type instruction.
     66 [[nodiscard]] uint32_t TransformToJTypeImm(uint32_t imm) noexcept {
     67     // clang-format off
     68     return ((imm & 0x0FF000) >> 0) |
     69            ((imm & 0x000800) << 9) |
     70            ((imm & 0x0007FE) << 20) |
     71            ((imm & 0x100000) << 11);
     72     // clang-format on
     73 }
     74 
     75 // Transforms a regular value into an immediate encoded in a CB-type instruction.
     76 [[nodiscard]] uint32_t TransformToCBTypeImm(uint32_t imm) noexcept {
     77     // clang-format off
     78     return ((imm & 0x0C0) >> 1) |
     79            ((imm & 0x006) << 2) |
     80            ((imm & 0x020) >> 3) |
     81            ((imm & 0x018) << 7) |
     82            ((imm & 0x100) << 4);
     83     // clang-format on
     84 }
     85 
     86 // Transforms a regular value into an immediate encoded in a CJ-type instruction.
     87 [[nodiscard]] uint32_t TransformToCJTypeImm(uint32_t imm) noexcept {
     88     // clang-format off
     89     return ((imm & 0x800) << 1) |
     90            ((imm & 0x010) << 7) |
     91            ((imm & 0x300) << 1) |
     92            ((imm & 0x400) >> 2) |
     93            ((imm & 0x040) << 1) |
     94            ((imm & 0x080) >> 1) |
     95            ((imm & 0x00E) << 4) |
     96            ((imm & 0x020) >> 3);
     97     // clang-format on
     98 }
     99 
    100 // Emits a B type RISC-V instruction. These consist of:
    101 // imm[12|10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode
    102 void EmitBType(CodeBuffer& buffer, uint32_t imm, GPR rs2, GPR rs1, uint32_t funct3, uint32_t opcode) noexcept {
    103     imm &= 0x1FFE;
    104 
    105     buffer.Emit32(TransformToBTypeImm(imm) | (rs2.Index() << 20) | (rs1.Index() << 15) | ((funct3 & 0b111) << 12) | (opcode & 0x7F));
    106 }
    107 
    108 // Emits a I type RISC-V instruction. These consist of:
    109 // imm[11:0] | rs1 | funct3 | rd | opcode
    110 void EmitIType(CodeBuffer& buffer, uint32_t imm, Register rs1, uint32_t funct3, Register rd, uint32_t opcode) noexcept {
    111     imm &= 0xFFF;
    112 
    113     buffer.Emit32((imm << 20) | (rs1.Index() << 15) | ((funct3 & 0b111) << 12) | (rd.Index() << 7) | (opcode & 0x7F));
    114 }
    115 
    116 // Emits a J type RISC-V instruction. These consist of:
    117 // imm[20|10:1|11|19:12] | rd | opcode
    118 void EmitJType(CodeBuffer& buffer, uint32_t imm, GPR rd, uint32_t opcode) noexcept {
    119     imm &= 0x1FFFFE;
    120 
    121     buffer.Emit32(TransformToJTypeImm(imm) | rd.Index() << 7 | (opcode & 0x7F));
    122 }
    123 
    124 // Emits a R type RISC instruction. These consist of:
    125 // funct7 | rs2 | rs1 | funct3 | rd | opcode
    126 void EmitRType(CodeBuffer& buffer, uint32_t funct7, Register rs2, Register rs1, uint32_t funct3,
    127                Register rd, uint32_t opcode) noexcept {
    128     // clang-format off
    129     const auto value = ((funct7 & 0xFF) << 25) |
    130                        (rs2.Index() << 20) |
    131                        (rs1.Index() << 15) |
    132                        ((funct3 & 0b111) << 12) |
    133                        (rd.Index() << 7) |
    134                        (opcode & 0x7F);
    135     // clang-format off
    136 
    137     buffer.Emit32(value);
    138 }
    139 
    140 // Emits a R type RISC instruction. These consist of:
    141 // funct7 | rs2 | rs1 | funct3 | rd | opcode
    142 void EmitRType(CodeBuffer& buffer, uint32_t funct7, FPR rs2, FPR rs1, RMode funct3, FPR rd, uint32_t opcode) noexcept {
    143     EmitRType(buffer, funct7, rs2, rs1, static_cast<uint32_t>(funct3), rd, opcode);
    144 }
    145 
    146 // Emits a R4 type RISC instruction. These consist of:
    147 // rs3 | funct2 | rs2 | rs1 | funct3 | rd | opcode
    148 void EmitR4Type(CodeBuffer& buffer, FPR rs3, uint32_t funct2, FPR rs2, FPR rs1, RMode funct3, FPR rd, uint32_t opcode) noexcept {
    149     const auto reg_bits = (rs3.Index() << 27) | (rs2.Index() << 20) | (rs1.Index() << 15) | (rd.Index() << 7);
    150     const auto funct_bits = ((funct2 & 0b11) << 25) | (static_cast<uint32_t>(funct3) << 12);
    151     buffer.Emit32(reg_bits | funct_bits | (opcode & 0x7F));
    152 }
    153 
    154 // Emits a S type RISC-V instruction. These consist of:
    155 // imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode
    156 void EmitSType(CodeBuffer& buffer, uint32_t imm, Register rs2, GPR rs1, uint32_t funct3, uint32_t opcode) noexcept {
    157     imm &= 0xFFF;
    158 
    159     // clang-format off
    160     const auto new_imm = ((imm & 0x01F) << 7) |
    161                          ((imm & 0xFE0) << 20);
    162     // clang-format on
    163 
    164     buffer.Emit32(new_imm | (rs2.Index() << 20) | (rs1.Index() << 15) | ((funct3 & 0b111) << 12) | (opcode & 0x7F));
    165 }
    166 
    167 // Emits a U type RISC-V instruction. These consist of:
    168 // imm[31:12] | rd | opcode
    169 void EmitUType(CodeBuffer& buffer, uint32_t imm, GPR rd, uint32_t opcode) noexcept {
    170     buffer.Emit32((imm & 0x000FFFFF) << 12 | rd.Index() << 7 | (opcode & 0x7F));
    171 }
    172 
    173 // Emits an atomic instruction.
    174 void EmitAtomic(CodeBuffer& buffer, uint32_t funct5, Ordering ordering, GPR rs2, GPR rs1,
    175                 uint32_t funct3, GPR rd, uint32_t opcode) noexcept {
    176     const auto funct7 = (funct5 << 2) | static_cast<uint32_t>(ordering);
    177     EmitRType(buffer, funct7, rs2, rs1, funct3, rd, opcode);
    178 }
    179 
    180 // Emits a fence instruction
    181 void EmitFENCE(CodeBuffer& buffer, uint32_t fm, FenceOrder pred, FenceOrder succ,
    182                GPR rs, uint32_t funct3, GPR rd, uint32_t opcode) noexcept {
    183     // clang-format off
    184     buffer.Emit32(((fm & 0b1111) << 28) |
    185                   (static_cast<uint32_t>(pred) << 24) |
    186                   (static_cast<uint32_t>(succ) << 20) |
    187                   (rs.Index() << 15) |
    188                   ((funct3 & 0b111) << 12) |
    189                   (rd.Index() << 7) |
    190                   (opcode & 0x7F));
    191     // clang-format on
    192 }
    193 
    194 // Emits a compressed branch instruction. These consist of:
    195 // funct3 | imm[8|4:3] | rs | imm[7:6|2:1|5] | op
    196 void EmitCompressedBranch(CodeBuffer& buffer, uint32_t funct3, int32_t offset, GPR rs, uint32_t op) noexcept {
    197     BISCUIT_ASSERT(IsValidCBTypeImm(offset));
    198     BISCUIT_ASSERT(IsValid3BitCompressedReg(rs));
    199 
    200     const auto transformed_imm = TransformToCBTypeImm(static_cast<uint32_t>(offset));
    201     const auto rs_san = CompressedRegTo3BitEncoding(rs);
    202     buffer.Emit16(((funct3 & 0b111) << 13) | transformed_imm | (rs_san << 7) | (op & 0b11));
    203 }
    204 
    205 // Emits a compressed jump instruction. These consist of:
    206 // funct3 | imm | op
    207 void EmitCompressedJump(CodeBuffer& buffer, uint32_t funct3, int32_t offset, uint32_t op) noexcept {
    208     BISCUIT_ASSERT(IsValidCJTypeImm(offset));
    209     buffer.Emit16(TransformToCJTypeImm(static_cast<uint32_t>(offset)) | ((funct3 & 0b111) << 13) | (op & 0b11));
    210 }
    211 
    212 // Emits a compress immediate instruction. These consist of:
    213 // funct3 | imm | rd | imm | op
    214 void EmitCompressedImmediate(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, GPR rd, uint32_t op) noexcept {
    215     BISCUIT_ASSERT(rd != x0);
    216     const auto new_imm = ((imm & 0b11111) << 2) | ((imm & 0b100000) << 7);
    217     buffer.Emit16(((funct3 & 0b111) << 13) | new_imm | (rd.Index() << 7) | (op & 0b11));
    218 }
    219 
    220 // Emits a compressed load instruction. These consist of:
    221 // funct3 | imm | rs1 | imm | rd | op
    222 void EmitCompressedLoad(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, GPR rs, Register rd, uint32_t op) noexcept {
    223     BISCUIT_ASSERT(IsValid3BitCompressedReg(rs));
    224     BISCUIT_ASSERT(IsValid3BitCompressedReg(rd));
    225 
    226     imm &= 0xF8;
    227 
    228     const auto imm_enc = ((imm & 0x38) << 7) | ((imm & 0xC0) >> 1);
    229     const auto rd_san = CompressedRegTo3BitEncoding(rd);
    230     const auto rs_san = CompressedRegTo3BitEncoding(rs);
    231     buffer.Emit16(((funct3 & 0b111) << 13) | imm_enc | (rs_san << 7) | (rd_san << 2) | (op & 0b11));
    232 }
    233 
    234 // Emits a compressed register arithmetic instruction. These consist of:
    235 // funct6 | rd | funct2 | rs | op
    236 void EmitCompressedRegArith(CodeBuffer& buffer, uint32_t funct6, GPR rd, uint32_t funct2, GPR rs, uint32_t op) noexcept {
    237     BISCUIT_ASSERT(IsValid3BitCompressedReg(rs));
    238     BISCUIT_ASSERT(IsValid3BitCompressedReg(rd));
    239 
    240     const auto rd_san = CompressedRegTo3BitEncoding(rd);
    241     const auto rs_san = CompressedRegTo3BitEncoding(rs);
    242     buffer.Emit16(((funct6 & 0b111111) << 10) | (rd_san << 7) | ((funct2 & 0b11) << 5) | (rs_san << 2) | (op & 0b11));
    243 }
    244 
    245 // Emits a compressed store instruction. These consist of:
    246 // funct3 | imm | rs1 | imm | rs2 | op
    247 void EmitCompressedStore(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, GPR rs1, Register rs2, uint32_t op) noexcept {
    248     // This has the same format as a compressed load, with rs2 taking the place of rd.
    249     // We can reuse the code we've already written to handle this.
    250     EmitCompressedLoad(buffer, funct3, imm, rs1, rs2, op);
    251 }
    252 
    253 // Emits a compressed wide immediate instruction. These consist of:
    254 // funct3 | imm | rd | opcode
    255 void EmitCompressedWideImmediate(CodeBuffer& buffer, uint32_t funct3, uint32_t imm, GPR rd, uint32_t op) noexcept {
    256     BISCUIT_ASSERT(IsValid3BitCompressedReg(rd));
    257     const auto rd_sanitized = CompressedRegTo3BitEncoding(rd);
    258     buffer.Emit16(((funct3 & 0b111) << 13) | ((imm & 0xFF) << 5) | (rd_sanitized << 2) | (op & 0b11));
    259 }
    260 } // Anonymous namespace
    261 
    262 Assembler::Assembler(size_t capacity)
    263     : m_buffer(capacity) {}
    264 
    265 Assembler::Assembler(uint8_t* buffer, size_t capacity)
    266     : m_buffer(buffer, capacity) {}
    267 
    268 Assembler::~Assembler() = default;
    269 
    270 CodeBuffer& Assembler::GetCodeBuffer() {
    271     return m_buffer;
    272 }
    273 
    274 CodeBuffer Assembler::SwapCodeBuffer(CodeBuffer&& buffer) noexcept {
    275     return std::exchange(m_buffer, std::move(buffer));
    276 }
    277 
    278 void Assembler::Bind(Label* label) {
    279     BindToOffset(label, m_buffer.GetCursorOffset());
    280 }
    281 
    282 void Assembler::ADD(GPR rd, GPR lhs, GPR rhs) noexcept {
    283     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b000, rd, 0b0110011);
    284 }
    285 
    286 void Assembler::ADDI(GPR rd, GPR rs, int32_t imm) noexcept {
    287     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b000, rd, 0b0010011);
    288 }
    289 
    290 void Assembler::AND(GPR rd, GPR lhs, GPR rhs) noexcept {
    291     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b111, rd, 0b0110011);
    292 }
    293 
    294 void Assembler::ANDI(GPR rd, GPR rs, uint32_t imm) noexcept {
    295     EmitIType(m_buffer, imm, rs, 0b111, rd, 0b0010011);
    296 }
    297 
    298 void Assembler::AUIPC(GPR rd, int32_t imm) noexcept {
    299     EmitUType(m_buffer, static_cast<uint32_t>(imm), rd, 0b0010111);
    300 }
    301 
    302 void Assembler::BEQ(GPR rs1, GPR rs2, Label* label) noexcept {
    303     const auto address = LinkAndGetOffset(label);
    304     BEQ(rs1, rs2, static_cast<int32_t>(address));
    305 }
    306 
    307 void Assembler::BEQZ(GPR rs, Label* label) noexcept {
    308     const auto address = LinkAndGetOffset(label);
    309     BEQZ(rs, static_cast<int32_t>(address));
    310 }
    311 
    312 void Assembler::BGE(GPR rs1, GPR rs2, Label* label) noexcept {
    313     const auto address = LinkAndGetOffset(label);
    314     BGE(rs1, rs2, static_cast<int32_t>(address));
    315 }
    316 
    317 void Assembler::BGEU(GPR rs1, GPR rs2, Label* label) noexcept {
    318     const auto address = LinkAndGetOffset(label);
    319     BGEU(rs1, rs2, static_cast<int32_t>(address));
    320 }
    321 
    322 void Assembler::BGEZ(GPR rs, Label* label) noexcept {
    323     const auto address = LinkAndGetOffset(label);
    324     BGEZ(rs, static_cast<int32_t>(address));
    325 }
    326 
    327 void Assembler::BGT(GPR rs, GPR rt, Label* label) noexcept {
    328     const auto address = LinkAndGetOffset(label);
    329     BGT(rs, rt, static_cast<int32_t>(address));
    330 }
    331 
    332 void Assembler::BGTU(GPR rs, GPR rt, Label* label) noexcept {
    333     const auto address = LinkAndGetOffset(label);
    334     BGTU(rs, rt, static_cast<int32_t>(address));
    335 }
    336 
    337 void Assembler::BGTZ(GPR rs, Label* label) noexcept {
    338     const auto address = LinkAndGetOffset(label);
    339     BGTZ(rs, static_cast<int32_t>(address));
    340 }
    341 
    342 void Assembler::BLE(GPR rs, GPR rt, Label* label) noexcept {
    343     const auto address = LinkAndGetOffset(label);
    344     BLE(rs, rt, static_cast<int32_t>(address));
    345 }
    346 
    347 void Assembler::BLEU(GPR rs, GPR rt, Label* label) noexcept {
    348     const auto address = LinkAndGetOffset(label);
    349     BLEU(rs, rt, static_cast<int32_t>(address));
    350 }
    351 
    352 void Assembler::BLEZ(GPR rs, Label* label) noexcept {
    353     const auto address = LinkAndGetOffset(label);
    354     BLEZ(rs, static_cast<int32_t>(address));
    355 }
    356 
    357 void Assembler::BLT(GPR rs1, GPR rs2, Label* label) noexcept {
    358     const auto address = LinkAndGetOffset(label);
    359     BLT(rs1, rs2, static_cast<int32_t>(address));
    360 }
    361 
    362 void Assembler::BLTU(GPR rs1, GPR rs2, Label* label) noexcept {
    363     const auto address = LinkAndGetOffset(label);
    364     BLTU(rs1, rs2, static_cast<int32_t>(address));
    365 }
    366 
    367 void Assembler::BLTZ(GPR rs, Label* label) noexcept {
    368     const auto address = LinkAndGetOffset(label);
    369     BLTZ(rs, static_cast<int32_t>(address));
    370 }
    371 
    372 void Assembler::BNE(GPR rs1, GPR rs2, Label* label) noexcept {
    373     const auto address = LinkAndGetOffset(label);
    374     BNE(rs1, rs2, static_cast<int32_t>(address));
    375 }
    376 
    377 void Assembler::BNEZ(GPR rs, Label* label) noexcept {
    378     const auto address = LinkAndGetOffset(label);
    379     BNEZ(rs, static_cast<int32_t>(address));
    380 }
    381 
    382 void Assembler::BEQ(GPR rs1, GPR rs2, int32_t imm) noexcept {
    383     BISCUIT_ASSERT(IsValidBTypeImm(imm));
    384     EmitBType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b000, 0b1100011);
    385 }
    386 
    387 void Assembler::BEQZ(GPR rs, int32_t imm) noexcept {
    388     BEQ(rs, x0, imm);
    389 }
    390 
    391 void Assembler::BGE(GPR rs1, GPR rs2, int32_t imm) noexcept {
    392     BISCUIT_ASSERT(IsValidBTypeImm(imm));
    393     EmitBType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b101, 0b1100011);
    394 }
    395 
    396 void Assembler::BGEU(GPR rs1, GPR rs2, int32_t imm) noexcept {
    397     BISCUIT_ASSERT(IsValidBTypeImm(imm));
    398     EmitBType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b111, 0b1100011);
    399 }
    400 
    401 void Assembler::BGEZ(GPR rs, int32_t imm) noexcept {
    402     BGE(rs, x0, imm);
    403 }
    404 
    405 void Assembler::BGT(GPR rs, GPR rt, int32_t imm) noexcept {
    406     BLT(rt, rs, imm);
    407 }
    408 
    409 void Assembler::BGTU(GPR rs, GPR rt, int32_t imm) noexcept {
    410     BLTU(rt, rs, imm);
    411 }
    412 
    413 void Assembler::BGTZ(GPR rs, int32_t imm) noexcept {
    414     BLT(x0, rs, imm);
    415 }
    416 
    417 void Assembler::BLE(GPR rs, GPR rt, int32_t imm) noexcept {
    418     BGE(rt, rs, imm);
    419 }
    420 
    421 void Assembler::BLEU(GPR rs, GPR rt, int32_t imm) noexcept {
    422     BGEU(rt, rs, imm);
    423 }
    424 
    425 void Assembler::BLEZ(GPR rs, int32_t imm) noexcept {
    426     BGE(x0, rs, imm);
    427 }
    428 
    429 void Assembler::BLT(GPR rs1, GPR rs2, int32_t imm) noexcept {
    430     BISCUIT_ASSERT(IsValidBTypeImm(imm));
    431     EmitBType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b100, 0b1100011);
    432 }
    433 
    434 void Assembler::BLTU(GPR rs1, GPR rs2, int32_t imm) noexcept {
    435     BISCUIT_ASSERT(IsValidBTypeImm(imm));
    436     EmitBType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b110, 0b1100011);
    437 }
    438 
    439 void Assembler::BLTZ(GPR rs, int32_t imm) noexcept {
    440     BLT(rs, x0, imm);
    441 }
    442 
    443 void Assembler::BNE(GPR rs1, GPR rs2, int32_t imm) noexcept {
    444     BISCUIT_ASSERT(IsValidBTypeImm(imm));
    445     EmitBType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b001, 0b1100011);
    446 }
    447 
    448 void Assembler::BNEZ(GPR rs, int32_t imm) noexcept {
    449     BNE(x0, rs, imm);
    450 }
    451 
    452 void Assembler::CALL(int32_t offset) noexcept {
    453     const auto uimm = static_cast<uint32_t>(offset);
    454     const auto lower = uimm & 0xFFF;
    455     const auto upper = (uimm & 0xFFFFF000) >> 12;
    456     const auto needs_increment = (uimm & 0x800) != 0;
    457 
    458     // Sign-extend the lower portion if the MSB of it is set.
    459     const auto new_lower = needs_increment ? static_cast<int32_t>(lower << 20) >> 20
    460                                            : static_cast<int32_t>(lower);
    461     const auto new_upper = needs_increment ? upper + 1 : upper;
    462 
    463     AUIPC(x1, static_cast<int32_t>(new_upper));
    464     JALR(x1, new_lower, x1);
    465 }
    466 
    467 void Assembler::EBREAK() noexcept {
    468     m_buffer.Emit32(0x00100073);
    469 }
    470 
    471 void Assembler::ECALL() noexcept {
    472     m_buffer.Emit32(0x00000073);
    473 }
    474 
    475 void Assembler::FENCE() noexcept {
    476     FENCE(FenceOrder::IORW, FenceOrder::IORW);
    477 }
    478 
    479 void Assembler::FENCE(FenceOrder pred, FenceOrder succ) noexcept {
    480     EmitFENCE(m_buffer, 0b0000, pred, succ, x0, 0b000, x0, 0b0001111);
    481 }
    482 
    483 void Assembler::FENCEI(GPR rd, GPR rs, uint32_t imm) noexcept {
    484     m_buffer.Emit32(((imm & 0xFFF) << 20) | (rs.Index() << 15) | 0x1000U | (rd.Index() << 7) | 0b0001111);
    485 }
    486 
    487 void Assembler::FENCETSO() noexcept {
    488     EmitFENCE(m_buffer, 0b1000, FenceOrder::RW, FenceOrder::RW, x0, 0b000, x0, 0b0001111);
    489 }
    490 
    491 void Assembler::J(Label* label) noexcept {
    492     const auto address = LinkAndGetOffset(label);
    493     BISCUIT_ASSERT(IsValidJTypeImm(address));
    494     J(static_cast<int32_t>(address));
    495 }
    496 
    497 void Assembler::JAL(Label* label) noexcept {
    498     const auto address = LinkAndGetOffset(label);
    499     BISCUIT_ASSERT(IsValidJTypeImm(address));
    500     JAL(static_cast<int32_t>(address));
    501 }
    502 
    503 void Assembler::JAL(GPR rd, Label* label) noexcept {
    504     const auto address = LinkAndGetOffset(label);
    505     BISCUIT_ASSERT(IsValidJTypeImm(address));
    506     JAL(rd, static_cast<int32_t>(address));
    507 }
    508 
    509 void Assembler::J(int32_t imm) noexcept {
    510     BISCUIT_ASSERT(IsValidJTypeImm(imm));
    511     JAL(x0, imm);
    512 }
    513 
    514 void Assembler::JAL(int32_t imm) noexcept {
    515     BISCUIT_ASSERT(IsValidJTypeImm(imm));
    516     EmitJType(m_buffer, static_cast<uint32_t>(imm), x1, 0b1101111);
    517 }
    518 
    519 void Assembler::JAL(GPR rd, int32_t imm) noexcept {
    520     BISCUIT_ASSERT(IsValidJTypeImm(imm));
    521     EmitJType(m_buffer, static_cast<uint32_t>(imm), rd, 0b1101111);
    522 }
    523 
    524 void Assembler::JALR(GPR rs) noexcept {
    525     JALR(x1, 0, rs);
    526 }
    527 
    528 void Assembler::JALR(GPR rd, int32_t imm, GPR rs1) noexcept {
    529     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    530     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs1, 0b000, rd, 0b1100111);
    531 }
    532 
    533 void Assembler::JR(GPR rs) noexcept {
    534     JALR(x0, 0, rs);
    535 }
    536 
    537 void Assembler::LB(GPR rd, int32_t imm, GPR rs) noexcept {
    538     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    539     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b000, rd, 0b0000011);
    540 }
    541 
    542 void Assembler::LBU(GPR rd, int32_t imm, GPR rs) noexcept {
    543     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    544     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b100, rd, 0b0000011);
    545 }
    546 
    547 void Assembler::LH(GPR rd, int32_t imm, GPR rs) noexcept {
    548     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    549     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b001, rd, 0b0000011);
    550 }
    551 
    552 void Assembler::LHU(GPR rd, int32_t imm, GPR rs) noexcept {
    553     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    554     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b101, rd, 0b0000011);
    555 }
    556 
    557 void Assembler::LI(GPR rd, uint32_t imm) noexcept {
    558     const auto lower = imm & 0xFFF;
    559     const auto upper = (imm & 0xFFFFF000) >> 12;
    560     const auto simm = static_cast<int32_t>(imm);
    561 
    562     // If the immediate can fit within 12 bits, we only need to emit an ADDI.
    563     if (IsValidSigned12BitImm(simm)) {
    564         ADDI(rd, x0, static_cast<int32_t>(lower));
    565     } else {
    566         const bool needs_increment = (lower & 0x800) != 0;
    567         const auto upper_imm = needs_increment ? upper + 1 : upper;
    568 
    569         // Note that we add 1 to the upper portion of the immediate if the lower
    570         // immediate's most significant bit is set. This is necessary, as ADDI
    571         // sign-extends its 12-bit immediate before performing addition.
    572         //
    573         // In the event of the sign-extension, this means that we'll be adding
    574         // an equivalent of "lower - 4096" to the upper immediate.
    575         //
    576         // We add 1 to the upper part of the immediate. the upper part's least
    577         // significant bit is bit 12. Adding 1 to this bit is equivalent to adding
    578         // 4096, which counteracts the sign-extension, preserving the value.
    579 
    580         LUI(rd, upper_imm);
    581         ADDI(rd, rd, static_cast<int32_t>(lower));
    582     }
    583 }
    584 
    585 void Assembler::LUI(GPR rd, uint32_t imm) noexcept {
    586     EmitUType(m_buffer, imm, rd, 0b0110111);
    587 }
    588 
    589 void Assembler::LW(GPR rd, int32_t imm, GPR rs) noexcept {
    590     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    591     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b010, rd, 0b0000011);
    592 }
    593 
    594 void Assembler::MV(GPR rd, GPR rs) noexcept {
    595     ADDI(rd, rs, 0);
    596 }
    597 
    598 void Assembler::NEG(GPR rd, GPR rs) noexcept {
    599     SUB(rd, x0, rs);
    600 }
    601 
    602 void Assembler::NOP() noexcept {
    603     ADDI(x0, x0, 0);
    604 }
    605 
    606 void Assembler::NOT(GPR rd, GPR rs) noexcept {
    607     XORI(rd, rs, UINT32_MAX);
    608 }
    609 
    610 void Assembler::OR(GPR rd, GPR lhs, GPR rhs) noexcept {
    611     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b110, rd, 0b0110011);
    612 }
    613 
    614 void Assembler::ORI(GPR rd, GPR rs, uint32_t imm) noexcept {
    615     EmitIType(m_buffer, imm, rs, 0b110, rd, 0b0010011);
    616 }
    617 
    618 void Assembler::PAUSE() noexcept {
    619     m_buffer.Emit32(0x0100000F);
    620 }
    621 
    622 void Assembler::RET() noexcept {
    623     JALR(x0, 0, x1);
    624 }
    625 
    626 void Assembler::SB(GPR rs2, int32_t imm, GPR rs1) noexcept {
    627     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    628     EmitSType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b000, 0b0100011);
    629 }
    630 
    631 void Assembler::SEQZ(GPR rd, GPR rs) noexcept {
    632     SLTIU(rd, rs, 1);
    633 }
    634 
    635 void Assembler::SGTZ(GPR rd, GPR rs) noexcept {
    636     SLT(rd, x0, rs);
    637 }
    638 
    639 void Assembler::SH(GPR rs2, int32_t imm, GPR rs1) noexcept {
    640     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    641     EmitSType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b001, 0b0100011);
    642 }
    643 
    644 void Assembler::SLL(GPR rd, GPR lhs, GPR rhs) noexcept {
    645     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b001, rd, 0b0110011);
    646 }
    647 
    648 void Assembler::SLLI(GPR rd, GPR rs, uint32_t shift) noexcept {
    649     BISCUIT_ASSERT(shift <= 31);
    650     EmitIType(m_buffer, shift & 0x1F, rs, 0b001, rd, 0b0010011);
    651 }
    652 
    653 void Assembler::SLT(GPR rd, GPR lhs, GPR rhs) noexcept {
    654     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b010, rd, 0b0110011);
    655 }
    656 
    657 void Assembler::SLTI(GPR rd, GPR rs, int32_t imm) noexcept {
    658     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    659     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b010, rd, 0b0010011);
    660 }
    661 
    662 void Assembler::SLTIU(GPR rd, GPR rs, int32_t imm) noexcept {
    663     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    664     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b011, rd, 0b0010011);
    665 }
    666 
    667 void Assembler::SLTU(GPR rd, GPR lhs, GPR rhs) noexcept {
    668     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b011, rd, 0b0110011);
    669 }
    670 
    671 void Assembler::SLTZ(GPR rd, GPR rs) noexcept {
    672     SLT(rd, rs, x0);
    673 }
    674 
    675 void Assembler::SNEZ(GPR rd, GPR rs) noexcept {
    676     SLTU(rd, x0, rs);
    677 }
    678 
    679 void Assembler::SRA(GPR rd, GPR lhs, GPR rhs) noexcept {
    680     EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b101, rd, 0b0110011);
    681 }
    682 
    683 void Assembler::SRAI(GPR rd, GPR rs, uint32_t shift) noexcept {
    684     BISCUIT_ASSERT(shift <= 31);
    685     EmitIType(m_buffer, (0b0100000 << 5) | (shift & 0x1F), rs, 0b101, rd, 0b0010011);
    686 }
    687 
    688 void Assembler::SRL(GPR rd, GPR lhs, GPR rhs) noexcept {
    689     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b101, rd, 0b0110011);
    690 }
    691 
    692 void Assembler::SRLI(GPR rd, GPR rs, uint32_t shift) noexcept {
    693     BISCUIT_ASSERT(shift <= 31);
    694     EmitIType(m_buffer, shift & 0x1F, rs, 0b101, rd, 0b0010011);
    695 }
    696 
    697 void Assembler::SUB(GPR rd, GPR lhs, GPR rhs) noexcept {
    698     EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b000, rd, 0b0110011);
    699 }
    700 
    701 void Assembler::SW(GPR rs2, int32_t imm, GPR rs1) noexcept {
    702     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    703     EmitSType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b010, 0b0100011);
    704 }
    705 
    706 void Assembler::XOR(GPR rd, GPR lhs, GPR rhs) noexcept {
    707     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b100, rd, 0b0110011);
    708 }
    709 
    710 void Assembler::XORI(GPR rd, GPR rs, uint32_t imm) noexcept {
    711     EmitIType(m_buffer, imm, rs, 0b100, rd, 0b0010011);
    712 }
    713 
    714 // RV64I Instructions
    715 
    716 void Assembler::ADDIW(GPR rd, GPR rs, int32_t imm) noexcept {
    717     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b000, rd, 0b0011011);
    718 }
    719 
    720 void Assembler::ADDW(GPR rd, GPR lhs, GPR rhs) noexcept {
    721     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b000, rd, 0b0111011);
    722 }
    723 
    724 void Assembler::LD(GPR rd, int32_t imm, GPR rs) noexcept {
    725     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    726     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b011, rd, 0b0000011);
    727 }
    728 
    729 void Assembler::LWU(GPR rd, int32_t imm, GPR rs) noexcept {
    730     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    731     EmitIType(m_buffer, static_cast<uint32_t>(imm), rs, 0b110, rd, 0b0000011);
    732 }
    733 
    734 void Assembler::SD(GPR rs2, int32_t imm, GPR rs1) noexcept {
    735     BISCUIT_ASSERT(IsValidSigned12BitImm(imm));
    736     EmitSType(m_buffer, static_cast<uint32_t>(imm), rs2, rs1, 0b011, 0b0100011);
    737 }
    738 
    739 void Assembler::SRAI64(GPR rd, GPR rs, uint32_t shift) noexcept {
    740     BISCUIT_ASSERT(shift <= 63);
    741     EmitIType(m_buffer, (0b0100000 << 5) | (shift & 0x3F), rs, 0b101, rd, 0b0010011);
    742 }
    743 void Assembler::SLLI64(GPR rd, GPR rs, uint32_t shift) noexcept {
    744     BISCUIT_ASSERT(shift <= 63);
    745     EmitIType(m_buffer, shift & 0x3F, rs, 0b001, rd, 0b0010011);
    746 }
    747 void Assembler::SRLI64(GPR rd, GPR rs, uint32_t shift) noexcept {
    748     BISCUIT_ASSERT(shift <= 63);
    749     EmitIType(m_buffer, shift & 0x3F, rs, 0b101, rd, 0b0010011);
    750 }
    751 
    752 void Assembler::SLLIW(GPR rd, GPR rs, uint32_t shift) noexcept {
    753     BISCUIT_ASSERT(shift <= 31);
    754     EmitIType(m_buffer, shift & 0x1F, rs, 0b001, rd, 0b0011011);
    755 }
    756 void Assembler::SRAIW(GPR rd, GPR rs, uint32_t shift) noexcept {
    757     BISCUIT_ASSERT(shift <= 31);
    758     EmitIType(m_buffer, (0b0100000 << 5) | (shift & 0x1F), rs, 0b101, rd, 0b0011011);
    759 }
    760 void Assembler::SRLIW(GPR rd, GPR rs, uint32_t shift) noexcept {
    761     BISCUIT_ASSERT(shift <= 31);
    762     EmitIType(m_buffer, shift & 0x1F, rs, 0b101, rd, 0b0011011);
    763 }
    764 
    765 void Assembler::SLLW(GPR rd, GPR lhs, GPR rhs) noexcept {
    766     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b001, rd, 0b0111011);
    767 }
    768 void Assembler::SRAW(GPR rd, GPR lhs, GPR rhs) noexcept {
    769     EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b101, rd, 0b0111011);
    770 }
    771 void Assembler::SRLW(GPR rd, GPR lhs, GPR rhs) noexcept {
    772     EmitRType(m_buffer, 0b0000000, rhs, lhs, 0b101, rd, 0b0111011);
    773 }
    774 
    775 void Assembler::SUBW(GPR rd, GPR lhs, GPR rhs) noexcept {
    776     EmitRType(m_buffer, 0b0100000, rhs, lhs, 0b000, rd, 0b0111011);
    777 }
    778 
    779 // Zicsr Extension Instructions
    780 
    781 void Assembler::CSRRC(GPR rd, CSR csr, GPR rs) noexcept {
    782     EmitIType(m_buffer, static_cast<uint32_t>(csr), rs, 0b011, rd, 0b1110011);
    783 }
    784 void Assembler::CSRRCI(GPR rd, CSR csr, uint32_t imm) noexcept {
    785     BISCUIT_ASSERT(imm <= 0x1F);
    786     EmitIType(m_buffer, static_cast<uint32_t>(csr), GPR{imm & 0x1F}, 0b111, rd, 0b1110011);
    787 }
    788 void Assembler::CSRRS(GPR rd, CSR csr, GPR rs) noexcept {
    789     EmitIType(m_buffer, static_cast<uint32_t>(csr), rs, 0b010, rd, 0b1110011);
    790 }
    791 void Assembler::CSRRSI(GPR rd, CSR csr, uint32_t imm) noexcept {
    792     BISCUIT_ASSERT(imm <= 0x1F);
    793     EmitIType(m_buffer, static_cast<uint32_t>(csr), GPR{imm & 0x1F}, 0b110, rd, 0b1110011);
    794 }
    795 void Assembler::CSRRW(GPR rd, CSR csr, GPR rs) noexcept {
    796     EmitIType(m_buffer, static_cast<uint32_t>(csr), rs, 0b001, rd, 0b1110011);
    797 }
    798 void Assembler::CSRRWI(GPR rd, CSR csr, uint32_t imm) noexcept {
    799     BISCUIT_ASSERT(imm <= 0x1F);
    800     EmitIType(m_buffer, static_cast<uint32_t>(csr), GPR{imm & 0x1F}, 0b101, rd, 0b1110011);
    801 }
    802 
    803 void Assembler::CSRR(GPR rd, CSR csr) noexcept {
    804     CSRRS(rd, csr, x0);
    805 }
    806 void Assembler::CSWR(CSR csr, GPR rs) noexcept {
    807     CSRRW(x0, csr, rs);
    808 }
    809 
    810 void Assembler::CSRS(CSR csr, GPR rs) noexcept {
    811     CSRRS(x0, csr, rs);
    812 }
    813 void Assembler::CSRC(CSR csr, GPR rs) noexcept {
    814     CSRRC(x0, csr, rs);
    815 }
    816 
    817 void Assembler::CSRCI(CSR csr, uint32_t imm) noexcept {
    818     CSRRCI(x0, csr, imm);
    819 }
    820 void Assembler::CSRSI(CSR csr, uint32_t imm) noexcept {
    821     CSRRSI(x0, csr, imm);
    822 }
    823 void Assembler::CSRWI(CSR csr, uint32_t imm) noexcept {
    824     CSRRWI(x0, csr, imm);
    825 }
    826 
    827 void Assembler::FRCSR(GPR rd) noexcept {
    828     CSRRS(rd, CSR::FCSR, x0);
    829 }
    830 void Assembler::FSCSR(GPR rd, GPR rs) noexcept {
    831     CSRRW(rd, CSR::FCSR, rs);
    832 }
    833 void Assembler::FSCSR(GPR rs) noexcept {
    834     CSRRW(x0, CSR::FCSR, rs);
    835 }
    836 
    837 void Assembler::FRRM(GPR rd) noexcept {
    838     CSRRS(rd, CSR::FRM, x0);
    839 }
    840 void Assembler::FSRM(GPR rd, GPR rs) noexcept {
    841     CSRRW(rd, CSR::FRM, rs);
    842 }
    843 void Assembler::FSRM(GPR rs) noexcept {
    844     CSRRW(x0, CSR::FRM, rs);
    845 }
    846 
    847 void Assembler::FSRMI(GPR rd, uint32_t imm) noexcept {
    848     CSRRWI(rd, CSR::FRM, imm);
    849 }
    850 void Assembler::FSRMI(uint32_t imm) noexcept {
    851     CSRRWI(x0, CSR::FRM, imm);
    852 }
    853 
    854 void Assembler::FRFLAGS(GPR rd) noexcept {
    855     CSRRS(rd, CSR::FFlags, x0);
    856 }
    857 void Assembler::FSFLAGS(GPR rd, GPR rs) noexcept {
    858     CSRRW(rd, CSR::FFlags, rs);
    859 }
    860 void Assembler::FSFLAGS(GPR rs) noexcept {
    861     CSRRW(x0, CSR::FFlags, rs);
    862 }
    863 
    864 void Assembler::FSFLAGSI(GPR rd, uint32_t imm) noexcept {
    865     CSRRWI(rd, CSR::FFlags, imm);
    866 }
    867 void Assembler::FSFLAGSI(uint32_t imm) noexcept {
    868     CSRRWI(x0, CSR::FFlags, imm);
    869 }
    870 
    871 void Assembler::RDCYCLE(GPR rd) noexcept {
    872     CSRRS(rd, CSR::Cycle, x0);
    873 }
    874 void Assembler::RDCYCLEH(GPR rd) noexcept {
    875     CSRRS(rd, CSR::CycleH, x0);
    876 }
    877 
    878 void Assembler::RDINSTRET(GPR rd) noexcept {
    879     CSRRS(rd, CSR::InstRet, x0);
    880 }
    881 void Assembler::RDINSTRETH(GPR rd) noexcept {
    882     CSRRS(rd, CSR::InstRetH, x0);
    883 }
    884 
    885 void Assembler::RDTIME(GPR rd) noexcept {
    886     CSRRS(rd, CSR::Time, x0);
    887 }
    888 void Assembler::RDTIMEH(GPR rd) noexcept {
    889     CSRRS(rd, CSR::TimeH, x0);
    890 }
    891 
    892 // Zihintntl Extension Instructions
    893 
    894 void Assembler::C_NTL_ALL() noexcept {
    895     C_ADD(x0, x5);
    896 }
    897 void Assembler::C_NTL_S1() noexcept {
    898     C_ADD(x0, x4);
    899 }
    900 void Assembler::C_NTL_P1() noexcept {
    901     C_ADD(x0, x2);
    902 }
    903 void Assembler::C_NTL_PALL() noexcept {
    904     C_ADD(x0, x3);
    905 }
    906 void Assembler::NTL_ALL() noexcept {
    907     ADD(x0, x0, x5);
    908 }
    909 void Assembler::NTL_S1() noexcept {
    910     ADD(x0, x0, x4);
    911 }
    912 void Assembler::NTL_P1() noexcept {
    913     ADD(x0, x0, x2);
    914 }
    915 void Assembler::NTL_PALL() noexcept {
    916     ADD(x0, x0, x3);
    917 }
    918 
    919 // RV32M Extension Instructions
    920 
    921 void Assembler::DIV(GPR rd, GPR rs1, GPR rs2) noexcept {
    922     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b100, rd, 0b0110011);
    923 }
    924 void Assembler::DIVU(GPR rd, GPR rs1, GPR rs2) noexcept {
    925     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b101, rd, 0b0110011);
    926 }
    927 void Assembler::MUL(GPR rd, GPR rs1, GPR rs2) noexcept {
    928     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b000, rd, 0b0110011);
    929 }
    930 void Assembler::MULH(GPR rd, GPR rs1, GPR rs2) noexcept {
    931     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b001, rd, 0b0110011);
    932 }
    933 void Assembler::MULHSU(GPR rd, GPR rs1, GPR rs2) noexcept {
    934     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b010, rd, 0b0110011);
    935 }
    936 void Assembler::MULHU(GPR rd, GPR rs1, GPR rs2) noexcept {
    937     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b011, rd, 0b0110011);
    938 }
    939 void Assembler::REM(GPR rd, GPR rs1, GPR rs2) noexcept {
    940     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b110, rd, 0b0110011);
    941 }
    942 void Assembler::REMU(GPR rd, GPR rs1, GPR rs2) noexcept {
    943     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b111, rd, 0b0110011);
    944 }
    945 
    946 // RV64M Extension Instructions
    947 
    948 void Assembler::DIVW(GPR rd, GPR rs1, GPR rs2) noexcept {
    949     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b100, rd, 0b0111011);
    950 }
    951 void Assembler::DIVUW(GPR rd, GPR rs1, GPR rs2) noexcept {
    952     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b101, rd, 0b0111011);
    953 }
    954 void Assembler::MULW(GPR rd, GPR rs1, GPR rs2) noexcept {
    955     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b000, rd, 0b0111011);
    956 }
    957 void Assembler::REMW(GPR rd, GPR rs1, GPR rs2) noexcept {
    958     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b110, rd, 0b0111011);
    959 }
    960 void Assembler::REMUW(GPR rd, GPR rs1, GPR rs2) noexcept {
    961     EmitRType(m_buffer, 0b0000001, rs2, rs1, 0b111, rd, 0b0111011);
    962 }
    963 
    964 // RV32A Extension Instructions
    965 
    966 void Assembler::AMOADD_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    967     EmitAtomic(m_buffer, 0b00000, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    968 }
    969 void Assembler::AMOAND_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    970     EmitAtomic(m_buffer, 0b01100, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    971 }
    972 void Assembler::AMOMAX_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    973     EmitAtomic(m_buffer, 0b10100, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    974 }
    975 void Assembler::AMOMAXU_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    976     EmitAtomic(m_buffer, 0b11100, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    977 }
    978 void Assembler::AMOMIN_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    979     EmitAtomic(m_buffer, 0b10000, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    980 }
    981 void Assembler::AMOMINU_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    982     EmitAtomic(m_buffer, 0b11000, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    983 }
    984 void Assembler::AMOOR_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    985     EmitAtomic(m_buffer, 0b01000, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    986 }
    987 void Assembler::AMOSWAP_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    988     EmitAtomic(m_buffer, 0b00001, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    989 }
    990 void Assembler::AMOXOR_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    991     EmitAtomic(m_buffer, 0b00100, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    992 }
    993 void Assembler::LR_W(Ordering ordering, GPR rd, GPR rs) noexcept {
    994     EmitAtomic(m_buffer, 0b00010, ordering, x0, rs, 0b010, rd, 0b0101111);
    995 }
    996 void Assembler::SC_W(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
    997     EmitAtomic(m_buffer, 0b00011, ordering, rs2, rs1, 0b010, rd, 0b0101111);
    998 }
    999 
   1000 // RV64A Extension Instructions
   1001 
   1002 void Assembler::AMOADD_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1003     EmitAtomic(m_buffer, 0b00000, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1004 }
   1005 void Assembler::AMOAND_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1006     EmitAtomic(m_buffer, 0b01100, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1007 }
   1008 void Assembler::AMOMAX_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1009     EmitAtomic(m_buffer, 0b10100, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1010 }
   1011 void Assembler::AMOMAXU_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1012     EmitAtomic(m_buffer, 0b11100, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1013 }
   1014 void Assembler::AMOMIN_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1015     EmitAtomic(m_buffer, 0b10000, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1016 }
   1017 void Assembler::AMOMINU_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1018     EmitAtomic(m_buffer, 0b11000, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1019 }
   1020 void Assembler::AMOOR_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1021     EmitAtomic(m_buffer, 0b01000, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1022 }
   1023 void Assembler::AMOSWAP_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1024     EmitAtomic(m_buffer, 0b00001, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1025 }
   1026 void Assembler::AMOXOR_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1027     EmitAtomic(m_buffer, 0b00100, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1028 }
   1029 void Assembler::LR_D(Ordering ordering, GPR rd, GPR rs) noexcept {
   1030     EmitAtomic(m_buffer, 0b00010, ordering, x0, rs, 0b011, rd, 0b0101111);
   1031 }
   1032 void Assembler::SC_D(Ordering ordering, GPR rd, GPR rs2, GPR rs1) noexcept {
   1033     EmitAtomic(m_buffer, 0b00011, ordering, rs2, rs1, 0b011, rd, 0b0101111);
   1034 }
   1035 
   1036 // RV32F Extension Instructions
   1037 
   1038 void Assembler::FADD_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1039     EmitRType(m_buffer, 0b0000000, rs2, rs1, rmode, rd, 0b1010011);
   1040 }
   1041 void Assembler::FCLASS_S(GPR rd, FPR rs1) noexcept {
   1042     EmitRType(m_buffer, 0b1110000, f0, rs1, 0b001, rd, 0b1010011);
   1043 }
   1044 void Assembler::FCVT_S_W(FPR rd, GPR rs1, RMode rmode) noexcept {
   1045     EmitRType(m_buffer, 0b1101000, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1046 }
   1047 void Assembler::FCVT_S_WU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1048     EmitRType(m_buffer, 0b1101000, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1049 }
   1050 void Assembler::FCVT_W_S(GPR rd, FPR rs1, RMode rmode) noexcept {
   1051     EmitRType(m_buffer, 0b1100000, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1052 }
   1053 void Assembler::FCVT_WU_S(GPR rd, FPR rs1, RMode rmode) noexcept {
   1054     EmitRType(m_buffer, 0b1100000, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1055 }
   1056 void Assembler::FDIV_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1057     EmitRType(m_buffer, 0b0001100, rs2, rs1, rmode, rd, 0b1010011);
   1058 }
   1059 void Assembler::FEQ_S(GPR rd, FPR rs1, FPR rs2) noexcept {
   1060     EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b010, rd, 0b1010011);
   1061 }
   1062 void Assembler::FLE_S(GPR rd, FPR rs1, FPR rs2) noexcept {
   1063     EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b000, rd, 0b1010011);
   1064 }
   1065 void Assembler::FLT_S(GPR rd, FPR rs1, FPR rs2) noexcept {
   1066     EmitRType(m_buffer, 0b1010000, rs2, rs1, 0b001, rd, 0b1010011);
   1067 }
   1068 void Assembler::FLW(FPR rd, int32_t offset, GPR rs) noexcept {
   1069     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1070     EmitIType(m_buffer, static_cast<uint32_t>(offset), rs, 0b010, rd, 0b0000111);
   1071 }
   1072 void Assembler::FMADD_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1073     EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1000011);
   1074 }
   1075 void Assembler::FMAX_S(FPR rd, FPR rs1, FPR rs2) noexcept {
   1076     EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b001, rd, 0b1010011);
   1077 }
   1078 void Assembler::FMIN_S(FPR rd, FPR rs1, FPR rs2) noexcept {
   1079     EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b000, rd, 0b1010011);
   1080 }
   1081 void Assembler::FMSUB_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1082     EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1000111);
   1083 }
   1084 void Assembler::FMUL_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1085     EmitRType(m_buffer, 0b0001000, rs2, rs1, rmode, rd, 0b1010011);
   1086 }
   1087 void Assembler::FMV_W_X(FPR rd, GPR rs1) noexcept {
   1088     EmitRType(m_buffer, 0b1111000, f0, rs1, 0b000, rd, 0b1010011);
   1089 }
   1090 void Assembler::FMV_X_W(GPR rd, FPR rs1) noexcept {
   1091     EmitRType(m_buffer, 0b1110000, f0, rs1, 0b000, rd, 0b1010011);
   1092 }
   1093 void Assembler::FNMADD_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1094     EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1001111);
   1095 }
   1096 void Assembler::FNMSUB_S(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1097     EmitR4Type(m_buffer, rs3, 0b00, rs2, rs1, rmode, rd, 0b1001011);
   1098 }
   1099 void Assembler::FSGNJ_S(FPR rd, FPR rs1, FPR rs2) noexcept {
   1100     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b000, rd, 0b1010011);
   1101 }
   1102 void Assembler::FSGNJN_S(FPR rd, FPR rs1, FPR rs2) noexcept {
   1103     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b001, rd, 0b1010011);
   1104 }
   1105 void Assembler::FSGNJX_S(FPR rd, FPR rs1, FPR rs2) noexcept {
   1106     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b010, rd, 0b1010011);
   1107 }
   1108 void Assembler::FSQRT_S(FPR rd, FPR rs1, RMode rmode) noexcept {
   1109     EmitRType(m_buffer, 0b0101100, f0, rs1, rmode, rd, 0b1010011);
   1110 }
   1111 void Assembler::FSUB_S(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1112     EmitRType(m_buffer, 0b0000100, rs2, rs1, rmode, rd, 0b1010011);
   1113 }
   1114 void Assembler::FSW(FPR rs2, int32_t offset, GPR rs1) noexcept {
   1115     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1116     EmitSType(m_buffer, static_cast<uint32_t>(offset), rs2, rs1, 0b010, 0b0100111);
   1117 }
   1118 
   1119 void Assembler::FABS_S(FPR rd, FPR rs) noexcept {
   1120     FSGNJX_S(rd, rs, rs);
   1121 }
   1122 void Assembler::FMV_S(FPR rd, FPR rs) noexcept {
   1123     FSGNJ_S(rd, rs, rs);
   1124 }
   1125 void Assembler::FNEG_S(FPR rd, FPR rs) noexcept {
   1126     FSGNJN_S(rd, rs, rs);
   1127 }
   1128 
   1129 // RV64F Extension Instructions
   1130 
   1131 void Assembler::FCVT_L_S(GPR rd, FPR rs1, RMode rmode) noexcept {
   1132     EmitRType(m_buffer, 0b1100000, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1133 }
   1134 void Assembler::FCVT_LU_S(GPR rd, FPR rs1, RMode rmode) noexcept {
   1135     EmitRType(m_buffer, 0b1100000, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1136 }
   1137 void Assembler::FCVT_S_L(FPR rd, GPR rs1, RMode rmode) noexcept {
   1138     EmitRType(m_buffer, 0b1101000, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1139 }
   1140 void Assembler::FCVT_S_LU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1141     EmitRType(m_buffer, 0b1101000, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1142 }
   1143 
   1144 // RV32D Extension Instructions
   1145 
   1146 void Assembler::FADD_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1147     EmitRType(m_buffer, 0b0000001, rs2, rs1, rmode, rd, 0b1010011);
   1148 }
   1149 void Assembler::FCLASS_D(GPR rd, FPR rs1) noexcept {
   1150     EmitRType(m_buffer, 0b1110001, f0, rs1, 0b001, rd, 0b1010011);
   1151 }
   1152 void Assembler::FCVT_D_W(FPR rd, GPR rs1, RMode rmode) noexcept {
   1153     EmitRType(m_buffer, 0b1101001, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1154 }
   1155 void Assembler::FCVT_D_WU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1156     EmitRType(m_buffer, 0b1101001, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1157 }
   1158 void Assembler::FCVT_W_D(GPR rd, FPR rs1, RMode rmode) noexcept {
   1159     EmitRType(m_buffer, 0b1100001, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1160 }
   1161 void Assembler::FCVT_WU_D(GPR rd, FPR rs1, RMode rmode) noexcept {
   1162     EmitRType(m_buffer, 0b1100001, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1163 }
   1164 void Assembler::FCVT_D_S(FPR rd, FPR rs1, RMode rmode) noexcept {
   1165     EmitRType(m_buffer, 0b0100001, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1166 }
   1167 void Assembler::FCVT_S_D(FPR rd, FPR rs1, RMode rmode) noexcept {
   1168     EmitRType(m_buffer, 0b0100000, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1169 }
   1170 void Assembler::FDIV_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1171     EmitRType(m_buffer, 0b0001101, rs2, rs1, rmode, rd, 0b1010011);
   1172 }
   1173 void Assembler::FEQ_D(GPR rd, FPR rs1, FPR rs2) noexcept {
   1174     EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b010, rd, 0b1010011);
   1175 }
   1176 void Assembler::FLE_D(GPR rd, FPR rs1, FPR rs2) noexcept {
   1177     EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b000, rd, 0b1010011);
   1178 }
   1179 void Assembler::FLT_D(GPR rd, FPR rs1, FPR rs2) noexcept {
   1180     EmitRType(m_buffer, 0b1010001, rs2, rs1, 0b001, rd, 0b1010011);
   1181 }
   1182 void Assembler::FLD(FPR rd, int32_t offset, GPR rs) noexcept {
   1183     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1184     EmitIType(m_buffer, static_cast<uint32_t>(offset), rs, 0b011, rd, 0b0000111);
   1185 }
   1186 void Assembler::FMADD_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1187     EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1000011);
   1188 }
   1189 void Assembler::FMAX_D(FPR rd, FPR rs1, FPR rs2) noexcept {
   1190     EmitRType(m_buffer, 0b0010101, rs2, rs1, 0b001, rd, 0b1010011);
   1191 }
   1192 void Assembler::FMIN_D(FPR rd, FPR rs1, FPR rs2) noexcept {
   1193     EmitRType(m_buffer, 0b0010101, rs2, rs1, 0b000, rd, 0b1010011);
   1194 }
   1195 void Assembler::FMSUB_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1196     EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1000111);
   1197 }
   1198 void Assembler::FMUL_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1199     EmitRType(m_buffer, 0b0001001, rs2, rs1, rmode, rd, 0b1010011);
   1200 }
   1201 void Assembler::FNMADD_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1202     EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1001111);
   1203 }
   1204 void Assembler::FNMSUB_D(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1205     EmitR4Type(m_buffer, rs3, 0b01, rs2, rs1, rmode, rd, 0b1001011);
   1206 }
   1207 void Assembler::FSGNJ_D(FPR rd, FPR rs1, FPR rs2) noexcept {
   1208     EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b000, rd, 0b1010011);
   1209 }
   1210 void Assembler::FSGNJN_D(FPR rd, FPR rs1, FPR rs2) noexcept {
   1211     EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b001, rd, 0b1010011);
   1212 }
   1213 void Assembler::FSGNJX_D(FPR rd, FPR rs1, FPR rs2) noexcept {
   1214     EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b010, rd, 0b1010011);
   1215 }
   1216 void Assembler::FSQRT_D(FPR rd, FPR rs1, RMode rmode) noexcept {
   1217     EmitRType(m_buffer, 0b0101101, f0, rs1, rmode, rd, 0b1010011);
   1218 }
   1219 void Assembler::FSUB_D(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1220     EmitRType(m_buffer, 0b0000101, rs2, rs1, rmode, rd, 0b1010011);
   1221 }
   1222 void Assembler::FSD(FPR rs2, int32_t offset, GPR rs1) noexcept {
   1223     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1224     EmitSType(m_buffer, static_cast<uint32_t>(offset), rs2, rs1, 0b011, 0b0100111);
   1225 }
   1226 
   1227 void Assembler::FABS_D(FPR rd, FPR rs) noexcept {
   1228     FSGNJX_D(rd, rs, rs);
   1229 }
   1230 void Assembler::FMV_D(FPR rd, FPR rs) noexcept {
   1231     FSGNJ_D(rd, rs, rs);
   1232 }
   1233 void Assembler::FNEG_D(FPR rd, FPR rs) noexcept {
   1234     FSGNJN_D(rd, rs, rs);
   1235 }
   1236 
   1237 // RV64D Extension Instructions
   1238 
   1239 void Assembler::FCVT_L_D(GPR rd, FPR rs1, RMode rmode) noexcept {
   1240     EmitRType(m_buffer, 0b1100001, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1241 }
   1242 void Assembler::FCVT_LU_D(GPR rd, FPR rs1, RMode rmode) noexcept {
   1243     EmitRType(m_buffer, 0b1100001, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1244 }
   1245 void Assembler::FCVT_D_L(FPR rd, GPR rs1, RMode rmode) noexcept {
   1246     EmitRType(m_buffer, 0b1101001, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1247 }
   1248 void Assembler::FCVT_D_LU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1249     EmitRType(m_buffer, 0b1101001, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1250 }
   1251 void Assembler::FMV_D_X(FPR rd, GPR rs1) noexcept {
   1252     EmitRType(m_buffer, 0b1111001, f0, rs1, 0b000, rd, 0b1010011);
   1253 }
   1254 void Assembler::FMV_X_D(GPR rd, FPR rs1) noexcept {
   1255     EmitRType(m_buffer, 0b1110001, f0, rs1, 0b000, rd, 0b1010011);
   1256 }
   1257 
   1258 // RV32Q Extension Instructions
   1259 
   1260 void Assembler::FADD_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1261     EmitRType(m_buffer, 0b0000011, rs2, rs1, rmode, rd, 0b1010011);
   1262 }
   1263 void Assembler::FCLASS_Q(GPR rd, FPR rs1) noexcept {
   1264     EmitRType(m_buffer, 0b1110011, f0, rs1, 0b001, rd, 0b1010011);
   1265 }
   1266 void Assembler::FCVT_Q_W(FPR rd, GPR rs1, RMode rmode) noexcept {
   1267     EmitRType(m_buffer, 0b1101011, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1268 }
   1269 void Assembler::FCVT_Q_WU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1270     EmitRType(m_buffer, 0b1101011, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1271 }
   1272 void Assembler::FCVT_W_Q(GPR rd, FPR rs1, RMode rmode) noexcept {
   1273     EmitRType(m_buffer, 0b1100011, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1274 }
   1275 void Assembler::FCVT_WU_Q(GPR rd, FPR rs1, RMode rmode) noexcept {
   1276     EmitRType(m_buffer, 0b1100011, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1277 }
   1278 void Assembler::FCVT_Q_D(FPR rd, FPR rs1, RMode rmode) noexcept {
   1279     EmitRType(m_buffer, 0b0100011, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1280 }
   1281 void Assembler::FCVT_D_Q(FPR rd, FPR rs1, RMode rmode) noexcept {
   1282     EmitRType(m_buffer, 0b0100001, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1283 }
   1284 void Assembler::FCVT_Q_S(FPR rd, FPR rs1, RMode rmode) noexcept {
   1285     EmitRType(m_buffer, 0b0100011, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1286 }
   1287 void Assembler::FCVT_S_Q(FPR rd, FPR rs1, RMode rmode) noexcept {
   1288     EmitRType(m_buffer, 0b0100000, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1289 }
   1290 void Assembler::FDIV_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1291     EmitRType(m_buffer, 0b0001111, rs2, rs1, rmode, rd, 0b1010011);
   1292 }
   1293 void Assembler::FEQ_Q(GPR rd, FPR rs1, FPR rs2) noexcept {
   1294     EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b010, rd, 0b1010011);
   1295 }
   1296 void Assembler::FLE_Q(GPR rd, FPR rs1, FPR rs2) noexcept {
   1297     EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b000, rd, 0b1010011);
   1298 }
   1299 void Assembler::FLT_Q(GPR rd, FPR rs1, FPR rs2) noexcept {
   1300     EmitRType(m_buffer, 0b1010011, rs2, rs1, 0b001, rd, 0b1010011);
   1301 }
   1302 void Assembler::FLQ(FPR rd, int32_t offset, GPR rs) noexcept {
   1303     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1304     EmitIType(m_buffer, static_cast<uint32_t>(offset), rs, 0b100, rd, 0b0000111);
   1305 }
   1306 void Assembler::FMADD_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1307     EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1000011);
   1308 }
   1309 void Assembler::FMAX_Q(FPR rd, FPR rs1, FPR rs2) noexcept {
   1310     EmitRType(m_buffer, 0b0010111, rs2, rs1, 0b001, rd, 0b1010011);
   1311 }
   1312 void Assembler::FMIN_Q(FPR rd, FPR rs1, FPR rs2) noexcept {
   1313     EmitRType(m_buffer, 0b0010111, rs2, rs1, 0b000, rd, 0b1010011);
   1314 }
   1315 void Assembler::FMSUB_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1316     EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1000111);
   1317 }
   1318 void Assembler::FMUL_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1319     EmitRType(m_buffer, 0b0001011, rs2, rs1, rmode, rd, 0b1010011);
   1320 }
   1321 void Assembler::FNMADD_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1322     EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1001111);
   1323 }
   1324 void Assembler::FNMSUB_Q(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1325     EmitR4Type(m_buffer, rs3, 0b11, rs2, rs1, rmode, rd, 0b1001011);
   1326 }
   1327 void Assembler::FSGNJ_Q(FPR rd, FPR rs1, FPR rs2) noexcept {
   1328     EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b000, rd, 0b1010011);
   1329 }
   1330 void Assembler::FSGNJN_Q(FPR rd, FPR rs1, FPR rs2) noexcept {
   1331     EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b001, rd, 0b1010011);
   1332 }
   1333 void Assembler::FSGNJX_Q(FPR rd, FPR rs1, FPR rs2) noexcept {
   1334     EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b010, rd, 0b1010011);
   1335 }
   1336 void Assembler::FSQRT_Q(FPR rd, FPR rs1, RMode rmode) noexcept {
   1337     EmitRType(m_buffer, 0b0101111, f0, rs1, rmode, rd, 0b1010011);
   1338 }
   1339 void Assembler::FSUB_Q(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1340     EmitRType(m_buffer, 0b0000111, rs2, rs1, rmode, rd, 0b1010011);
   1341 }
   1342 void Assembler::FSQ(FPR rs2, int32_t offset, GPR rs1) noexcept {
   1343     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1344     EmitSType(m_buffer, static_cast<uint32_t>(offset), rs2, rs1, 0b100, 0b0100111);
   1345 }
   1346 
   1347 void Assembler::FABS_Q(FPR rd, FPR rs) noexcept {
   1348     FSGNJX_Q(rd, rs, rs);
   1349 }
   1350 void Assembler::FMV_Q(FPR rd, FPR rs) noexcept {
   1351     FSGNJ_Q(rd, rs, rs);
   1352 }
   1353 void Assembler::FNEG_Q(FPR rd, FPR rs) noexcept {
   1354     FSGNJN_Q(rd, rs, rs);
   1355 }
   1356 
   1357 // RV64Q Extension Instructions
   1358 
   1359 void Assembler::FCVT_L_Q(GPR rd, FPR rs1, RMode rmode) noexcept {
   1360     EmitRType(m_buffer, 0b1100011, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1361 }
   1362 void Assembler::FCVT_LU_Q(GPR rd, FPR rs1, RMode rmode) noexcept {
   1363     EmitRType(m_buffer, 0b1100011, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1364 }
   1365 void Assembler::FCVT_Q_L(FPR rd, GPR rs1, RMode rmode) noexcept {
   1366     EmitRType(m_buffer, 0b1101011, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1367 }
   1368 void Assembler::FCVT_Q_LU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1369     EmitRType(m_buffer, 0b1101011, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1370 }
   1371 
   1372 // RV32Zfh Extension Instructions
   1373 
   1374 void Assembler::FADD_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1375     EmitRType(m_buffer, 0b0000010, rs2, rs1, rmode, rd, 0b1010011);
   1376 }
   1377 void Assembler::FCLASS_H(GPR rd, FPR rs1) noexcept {
   1378     EmitRType(m_buffer, 0b1110010, f0, rs1, 0b001, rd, 0b1010011);
   1379 }
   1380 void Assembler::FCVT_D_H(FPR rd, FPR rs1, RMode rmode) noexcept {
   1381     EmitRType(m_buffer, 0b0100001, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1382 }
   1383 void Assembler::FCVT_H_D(FPR rd, FPR rs1, RMode rmode) noexcept {
   1384     EmitRType(m_buffer, 0b0100010, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1385 }
   1386 void Assembler::FCVT_H_Q(FPR rd, FPR rs1, RMode rmode) noexcept {
   1387     EmitRType(m_buffer, 0b0100010, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1388 }
   1389 void Assembler::FCVT_H_S(FPR rd, FPR rs1, RMode rmode) noexcept {
   1390     EmitRType(m_buffer, 0b0100010, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1391 }
   1392 void Assembler::FCVT_H_W(FPR rd, GPR rs1, RMode rmode) noexcept {
   1393     EmitRType(m_buffer, 0b1101010, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1394 }
   1395 void Assembler::FCVT_H_WU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1396     EmitRType(m_buffer, 0b1101010, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1397 }
   1398 void Assembler::FCVT_Q_H(FPR rd, FPR rs1, RMode rmode) noexcept {
   1399     EmitRType(m_buffer, 0b0100011, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1400 }
   1401 void Assembler::FCVT_S_H(FPR rd, FPR rs1, RMode rmode) noexcept {
   1402     EmitRType(m_buffer, 0b0100000, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1403 }
   1404 void Assembler::FCVT_W_H(GPR rd, FPR rs1, RMode rmode) noexcept {
   1405     EmitRType(m_buffer, 0b1100010, f0, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1406 }
   1407 void Assembler::FCVT_WU_H(GPR rd, FPR rs1, RMode rmode) noexcept {
   1408     EmitRType(m_buffer, 0b1100010, f1, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1409 }
   1410 void Assembler::FDIV_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1411     EmitRType(m_buffer, 0b0001110, rs2, rs1, rmode, rd, 0b1010011);
   1412 }
   1413 void Assembler::FEQ_H(GPR rd, FPR rs1, FPR rs2) noexcept {
   1414     EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b010, rd, 0b1010011);
   1415 }
   1416 void Assembler::FLE_H(GPR rd, FPR rs1, FPR rs2) noexcept {
   1417     EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b000, rd, 0b1010011);
   1418 }
   1419 void Assembler::FLH(FPR rd, int32_t offset, GPR rs) noexcept {
   1420     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1421     EmitIType(m_buffer, static_cast<uint32_t>(offset), rs, 0b001, rd, 0b0000111);
   1422 }
   1423 void Assembler::FLT_H(GPR rd, FPR rs1, FPR rs2) noexcept {
   1424     EmitRType(m_buffer, 0b1010010, rs2, rs1, 0b001, rd, 0b1010011);
   1425 }
   1426 void Assembler::FMADD_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1427     EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1000011);
   1428 }
   1429 void Assembler::FMAX_H(FPR rd, FPR rs1, FPR rs2) noexcept {
   1430     EmitRType(m_buffer, 0b0010110, rs2, rs1, 0b001, rd, 0b1010011);
   1431 }
   1432 void Assembler::FMIN_H(FPR rd, FPR rs1, FPR rs2) noexcept {
   1433     EmitRType(m_buffer, 0b0010110, rs2, rs1, 0b000, rd, 0b1010011);
   1434 }
   1435 void Assembler::FMSUB_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1436     EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1000111);
   1437 }
   1438 void Assembler::FMUL_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1439     EmitRType(m_buffer, 0b0001010, rs2, rs1, rmode, rd, 0b1010011);
   1440 }
   1441 void Assembler::FMV_H_X(FPR rd, GPR rs1) noexcept {
   1442     EmitRType(m_buffer, 0b1111010, f0, rs1, 0b000, rd, 0b1010011);
   1443 }
   1444 void Assembler::FMV_X_H(GPR rd, FPR rs1) noexcept {
   1445     EmitRType(m_buffer, 0b1110010, f0, rs1, 0b000, rd, 0b1010011);
   1446 }
   1447 void Assembler::FNMADD_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1448     EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1001111);
   1449 }
   1450 void Assembler::FNMSUB_H(FPR rd, FPR rs1, FPR rs2, FPR rs3, RMode rmode) noexcept {
   1451     EmitR4Type(m_buffer, rs3, 0b10, rs2, rs1, rmode, rd, 0b1001011);
   1452 }
   1453 void Assembler::FSGNJ_H(FPR rd, FPR rs1, FPR rs2) noexcept {
   1454     EmitRType(m_buffer, 0b0010010, rs2, rs1, 0b000, rd, 0b1010011);
   1455 }
   1456 void Assembler::FSGNJN_H(FPR rd, FPR rs1, FPR rs2) noexcept {
   1457     EmitRType(m_buffer, 0b0010010, rs2, rs1, 0b001, rd, 0b1010011);
   1458 }
   1459 void Assembler::FSGNJX_H(FPR rd, FPR rs1, FPR rs2) noexcept {
   1460     EmitRType(m_buffer, 0b0010010, rs2, rs1, 0b010, rd, 0b1010011);
   1461 }
   1462 void Assembler::FSH(FPR rs2, int32_t offset, GPR rs1) noexcept {
   1463     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   1464     EmitSType(m_buffer, static_cast<uint32_t>(offset), rs2, rs1, 0b001, 0b0100111);
   1465 }
   1466 void Assembler::FSQRT_H(FPR rd, FPR rs1, RMode rmode) noexcept {
   1467     EmitRType(m_buffer, 0b0101110, f0, rs1, rmode, rd, 0b1010011);
   1468 }
   1469 void Assembler::FSUB_H(FPR rd, FPR rs1, FPR rs2, RMode rmode) noexcept {
   1470     EmitRType(m_buffer, 0b0000110, rs2, rs1, rmode, rd, 0b1010011);
   1471 }
   1472 
   1473 // RV64Zfh Extension Instructions
   1474 
   1475 void Assembler::FCVT_L_H(GPR rd, FPR rs1, RMode rmode) noexcept {
   1476     EmitRType(m_buffer, 0b1100010, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1477 }
   1478 void Assembler::FCVT_LU_H(GPR rd, FPR rs1, RMode rmode) noexcept {
   1479     EmitRType(m_buffer, 0b1100010, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1480 }
   1481 void Assembler::FCVT_H_L(FPR rd, GPR rs1, RMode rmode) noexcept {
   1482     EmitRType(m_buffer, 0b1101010, f2, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1483 }
   1484 void Assembler::FCVT_H_LU(FPR rd, GPR rs1, RMode rmode) noexcept {
   1485     EmitRType(m_buffer, 0b1101010, f3, rs1, static_cast<uint32_t>(rmode), rd, 0b1010011);
   1486 }
   1487 
   1488 // RVB Extension Instructions
   1489 
   1490 void Assembler::ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept {
   1491     EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b000, rd, 0b0111011);
   1492 }
   1493 
   1494 void Assembler::ANDN(GPR rd, GPR rs1, GPR rs2) noexcept {
   1495     EmitRType(m_buffer, 0b0100000, rs2, rs1, 0b111, rd, 0b0110011);
   1496 }
   1497 
   1498 void Assembler::BCLR(GPR rd, GPR rs1, GPR rs2) noexcept {
   1499     EmitRType(m_buffer, 0b0100100, rs2, rs1, 0b001, rd, 0b0110011);
   1500 }
   1501 
   1502 void Assembler::BCLRI(GPR rd, GPR rs, uint32_t bit) noexcept {
   1503     BISCUIT_ASSERT(bit <= 63);
   1504     const auto imm = (0b010010U << 6) | bit;
   1505     EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0010011);
   1506 }
   1507 
   1508 void Assembler::BEXT(GPR rd, GPR rs1, GPR rs2) noexcept {
   1509     EmitRType(m_buffer, 0b0100100, rs2, rs1, 0b101, rd, 0b0110011);
   1510 }
   1511 
   1512 void Assembler::BEXTI(GPR rd, GPR rs, uint32_t bit) noexcept {
   1513     BISCUIT_ASSERT(bit <= 63);
   1514     const auto imm = (0b010010U << 6) | bit;
   1515     EmitIType(m_buffer, imm, rs, 0b101, rd, 0b0010011);
   1516 }
   1517 
   1518 void Assembler::BINV(GPR rd, GPR rs1, GPR rs2) noexcept {
   1519     EmitRType(m_buffer, 0b0110100, rs2, rs1, 0b001, rd, 0b0110011);
   1520 }
   1521 
   1522 void Assembler::BINVI(GPR rd, GPR rs, uint32_t bit) noexcept {
   1523     BISCUIT_ASSERT(bit <= 63);
   1524     const auto imm = (0b011010U << 6) | bit;
   1525     EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0010011);
   1526 }
   1527 
   1528 void Assembler::CLMUL(GPR rd, GPR rs1, GPR rs2) noexcept {
   1529     EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b001, rd, 0b0110011);
   1530 }
   1531 
   1532 void Assembler::CLMULH(GPR rd, GPR rs1, GPR rs2) noexcept {
   1533     EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b011, rd, 0b0110011);
   1534 }
   1535 
   1536 void Assembler::CLMULR(GPR rd, GPR rs1, GPR rs2) noexcept {
   1537     EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b010, rd, 0b0110011);
   1538 }
   1539 
   1540 void Assembler::CLZ(GPR rd, GPR rs) noexcept {
   1541     EmitIType(m_buffer, 0b011000000000, rs, 0b001, rd, 0b0010011);
   1542 }
   1543 
   1544 void Assembler::CLZW(GPR rd, GPR rs) noexcept {
   1545     EmitIType(m_buffer, 0b011000000000, rs, 0b001, rd, 0b0011011);
   1546 }
   1547 
   1548 void Assembler::CPOP(GPR rd, GPR rs) noexcept {
   1549     EmitIType(m_buffer, 0b011000000010, rs, 0b001, rd, 0b0010011);
   1550 }
   1551 
   1552 void Assembler::CPOPW(GPR rd, GPR rs) noexcept {
   1553     EmitIType(m_buffer, 0b011000000010, rs, 0b001, rd, 0b0011011);
   1554 }
   1555 
   1556 void Assembler::CTZ(GPR rd, GPR rs) noexcept {
   1557     EmitIType(m_buffer, 0b011000000001, rs, 0b001, rd, 0b0010011);
   1558 }
   1559 
   1560 void Assembler::CTZW(GPR rd, GPR rs) noexcept {
   1561     EmitIType(m_buffer, 0b011000000001, rs, 0b001, rd, 0b0011011);
   1562 }
   1563 
   1564 void Assembler::MAX(GPR rd, GPR rs1, GPR rs2) noexcept {
   1565     EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b110, rd, 0b0110011);
   1566 }
   1567 
   1568 void Assembler::MAXU(GPR rd, GPR rs1, GPR rs2) noexcept {
   1569     EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b111, rd, 0b0110011);
   1570 }
   1571 
   1572 void Assembler::MIN(GPR rd, GPR rs1, GPR rs2) noexcept {
   1573     EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b100, rd, 0b0110011);
   1574 }
   1575 
   1576 void Assembler::MINU(GPR rd, GPR rs1, GPR rs2) noexcept {
   1577     EmitRType(m_buffer, 0b0000101, rs2, rs1, 0b101, rd, 0b0110011);
   1578 }
   1579 
   1580 void Assembler::ORCB(GPR rd, GPR rs) noexcept {
   1581     EmitIType(m_buffer, 0b001010000111, rs, 0b101, rd, 0b0010011);
   1582 }
   1583 
   1584 void Assembler::ORN(GPR rd, GPR rs1, GPR rs2) noexcept {
   1585     EmitRType(m_buffer, 0b0100000, rs2, rs1, 0b110, rd, 0b0110011);
   1586 }
   1587 
   1588 void Assembler::PACK(GPR rd, GPR rs1, GPR rs2) noexcept {
   1589     EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b100, rd, 0b0110011);
   1590 }
   1591 
   1592 void Assembler::PACKH(GPR rd, GPR rs1, GPR rs2) noexcept {
   1593     EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b111, rd, 0b0110011);
   1594 }
   1595 
   1596 void Assembler::PACKW(GPR rd, GPR rs1, GPR rs2) noexcept {
   1597     EmitRType(m_buffer, 0b0000100, rs2, rs1, 0b100, rd, 0b0111011);
   1598 }
   1599 
   1600 void Assembler::REV8_32(GPR rd, GPR rs) noexcept {
   1601     EmitIType(m_buffer, 0b011010011000, rs, 0b101, rd, 0b0010011);
   1602 }
   1603 
   1604 void Assembler::REV8_64(GPR rd, GPR rs) noexcept {
   1605     EmitIType(m_buffer, 0b011010111000, rs, 0b101, rd, 0b0010011);
   1606 }
   1607 
   1608 void Assembler::REV_B(GPR rd, GPR rs) noexcept {
   1609     EmitIType(m_buffer, 0b011010000111, rs, 0b101, rd, 0b0010011);
   1610 }
   1611 
   1612 void Assembler::ROL(GPR rd, GPR rs1, GPR rs2) noexcept {
   1613     EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b001, rd, 0b0110011);
   1614 }
   1615 
   1616 void Assembler::ROLW(GPR rd, GPR rs1, GPR rs2) noexcept {
   1617     EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b001, rd, 0b0111011);
   1618 }
   1619 
   1620 void Assembler::ROR(GPR rd, GPR rs1, GPR rs2) noexcept {
   1621     EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b101, rd, 0b0110011);
   1622 }
   1623 
   1624 void Assembler::RORI(GPR rd, GPR rs, uint32_t rotate_amount) noexcept {
   1625     BISCUIT_ASSERT(rotate_amount <= 63);
   1626     const auto imm = (0b011000U << 6) | rotate_amount;
   1627     EmitIType(m_buffer, imm, rs, 0b101, rd, 0b0010011);
   1628 }
   1629 
   1630 void Assembler::RORIW(GPR rd, GPR rs, uint32_t rotate_amount) noexcept {
   1631     BISCUIT_ASSERT(rotate_amount <= 63);
   1632     const auto imm = (0b011000U << 6) | rotate_amount;
   1633     EmitIType(m_buffer, imm, rs, 0b101, rd, 0b0011011);
   1634 }
   1635 
   1636 void Assembler::RORW(GPR rd, GPR rs1, GPR rs2) noexcept {
   1637     EmitRType(m_buffer, 0b0110000, rs2, rs1, 0b101, rd, 0b0111011);
   1638 }
   1639 
   1640 void Assembler::SEXTB(GPR rd, GPR rs) noexcept {
   1641     EmitIType(m_buffer, 0b011000000100, rs, 0b001, rd, 0b0010011);
   1642 }
   1643 
   1644 void Assembler::SEXTH(GPR rd, GPR rs) noexcept {
   1645     EmitIType(m_buffer, 0b011000000101, rs, 0b001, rd, 0b0010011);
   1646 }
   1647 
   1648 void Assembler::SH1ADD(GPR rd, GPR rs1, GPR rs2) noexcept {
   1649     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b010, rd, 0b0110011);
   1650 }
   1651 
   1652 void Assembler::SH1ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept {
   1653     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b010, rd, 0b0111011);
   1654 }
   1655 
   1656 void Assembler::SH2ADD(GPR rd, GPR rs1, GPR rs2) noexcept {
   1657     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b100, rd, 0b0110011);
   1658 }
   1659 
   1660 void Assembler::SH2ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept {
   1661     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b100, rd, 0b0111011);
   1662 }
   1663 
   1664 void Assembler::SH3ADD(GPR rd, GPR rs1, GPR rs2) noexcept {
   1665     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b110, rd, 0b0110011);
   1666 }
   1667 
   1668 void Assembler::SH3ADDUW(GPR rd, GPR rs1, GPR rs2) noexcept {
   1669     EmitRType(m_buffer, 0b0010000, rs2, rs1, 0b110, rd, 0b0111011);
   1670 }
   1671 
   1672 void Assembler::SLLIUW(GPR rd, GPR rs, uint32_t shift_amount) noexcept {
   1673     BISCUIT_ASSERT(shift_amount <= 63);
   1674     const auto imm = (0b000010U << 6) | shift_amount;
   1675     EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0011011);
   1676 }
   1677 
   1678 void Assembler::UNZIP(GPR rd, GPR rs) noexcept {
   1679     EmitIType(m_buffer, 0b000010011111, rs, 0b101, rd, 0b0010011);
   1680 }
   1681 
   1682 void Assembler::XNOR(GPR rd, GPR rs1, GPR rs2) noexcept {
   1683     EmitRType(m_buffer, 0b0100000, rs2, rs1, 0b100, rd, 0b0110011);
   1684 }
   1685 
   1686 void Assembler::XPERMB(GPR rd, GPR rs1, GPR rs2) noexcept {
   1687     EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b100, rd, 0b0110011);
   1688 }
   1689 
   1690 void Assembler::XPERMN(GPR rd, GPR rs1, GPR rs2) noexcept {
   1691     EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b010, rd, 0b0110011);
   1692 }
   1693 
   1694 void Assembler::ZEXTH_32(GPR rd, GPR rs) noexcept {
   1695     EmitIType(m_buffer, 0b000010000000, rs, 0b100, rd, 0b0110011);
   1696 }
   1697 
   1698 void Assembler::ZEXTH_64(GPR rd, GPR rs) noexcept {
   1699     EmitIType(m_buffer, 0b000010000000, rs, 0b100, rd, 0b0111011);
   1700 }
   1701 
   1702 void Assembler::ZEXTW(GPR rd, GPR rs) noexcept {
   1703     ADDUW(rd, rs, x0);
   1704 }
   1705 
   1706 void Assembler::ZIP(GPR rd, GPR rs) noexcept {
   1707     EmitIType(m_buffer, 0b000010011110, rs, 0b001, rd, 0b0010011);
   1708 }
   1709 
   1710 void Assembler::BSET(GPR rd, GPR rs1, GPR rs2) noexcept {
   1711     EmitRType(m_buffer, 0b0010100, rs2, rs1, 0b001, rd, 0b0110011);
   1712 }
   1713 
   1714 void Assembler::BSETI(GPR rd, GPR rs, uint32_t bit) noexcept {
   1715     BISCUIT_ASSERT(bit <= 63);
   1716     const auto imm = (0b001010U << 6) | bit;
   1717     EmitIType(m_buffer, imm, rs, 0b001, rd, 0b0110011);
   1718 }
   1719 
   1720 // RVC Extension Instructions
   1721 
   1722 void Assembler::C_ADD(GPR rd, GPR rs) noexcept {
   1723     BISCUIT_ASSERT(rs != x0);
   1724     m_buffer.Emit16(0x9002 | (rd.Index() << 7) | (rs.Index() << 2));
   1725 }
   1726 
   1727 void Assembler::C_ADDI(GPR rd, int32_t imm) noexcept {
   1728     BISCUIT_ASSERT(imm != 0);
   1729     BISCUIT_ASSERT(IsValidSigned6BitImm(imm));
   1730     EmitCompressedImmediate(m_buffer, 0b000, static_cast<uint32_t>(imm), rd, 0b01);
   1731 }
   1732 
   1733 void Assembler::C_ADDIW(GPR rd, int32_t imm) noexcept {
   1734     BISCUIT_ASSERT(IsValidSigned6BitImm(imm));
   1735     EmitCompressedImmediate(m_buffer, 0b001, static_cast<uint32_t>(imm), rd, 0b01);
   1736 }
   1737 
   1738 void Assembler::C_ADDI4SPN(GPR rd, uint32_t imm) noexcept {
   1739     BISCUIT_ASSERT(imm != 0);
   1740     BISCUIT_ASSERT(imm <= 1020);
   1741     BISCUIT_ASSERT(imm % 4 == 0);
   1742 
   1743     // clang-format off
   1744     const auto new_imm = ((imm & 0x030) << 2) |
   1745                          ((imm & 0x3C0) >> 4) |
   1746                          ((imm & 0x004) >> 1) |
   1747                          ((imm & 0x008) >> 3);
   1748     // clang-format on
   1749 
   1750     EmitCompressedWideImmediate(m_buffer, 0b000, new_imm, rd, 0b00);
   1751 }
   1752 
   1753 void Assembler::C_ADDW(GPR rd, GPR rs) noexcept {
   1754     EmitCompressedRegArith(m_buffer, 0b100111, rd, 0b01, rs, 0b01);
   1755 }
   1756 
   1757 void Assembler::C_ADDI16SP(int32_t imm) noexcept {
   1758     BISCUIT_ASSERT(imm != 0);
   1759     BISCUIT_ASSERT(imm >= -512 && imm <= 496);
   1760     BISCUIT_ASSERT(imm % 16 == 0);
   1761 
   1762     // clang-format off
   1763     const auto uimm = static_cast<uint32_t>(imm);
   1764     const auto new_imm = ((uimm & 0x020) >> 3) |
   1765                          ((uimm & 0x180) >> 4) |
   1766                          ((uimm & 0x040) >> 1) |
   1767                          ((uimm & 0x010) << 2) |
   1768                          ((uimm & 0x200) << 3);
   1769     // clang-format on
   1770 
   1771     m_buffer.Emit16(0x6000U | new_imm | (x2.Index() << 7) | 0b01U);
   1772 }
   1773 
   1774 void Assembler::C_AND(GPR rd, GPR rs) noexcept {
   1775     EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b11, rs, 0b01);
   1776 }
   1777 
   1778 void Assembler::C_ANDI(GPR rd, uint32_t imm) noexcept {
   1779     BISCUIT_ASSERT(IsValid3BitCompressedReg(rd));
   1780 
   1781     constexpr auto base = 0x8801U;
   1782     const auto shift_enc = ((imm & 0b11111) << 2) | ((imm & 0b100000) << 7);
   1783     const auto reg = CompressedRegTo3BitEncoding(rd);
   1784 
   1785     m_buffer.Emit16(base | shift_enc | (reg << 7));
   1786 }
   1787 
   1788 void Assembler::C_BEQZ(GPR rs, int32_t offset) noexcept {
   1789     EmitCompressedBranch(m_buffer, 0b110, offset, rs, 0b01);
   1790 }
   1791 
   1792 void Assembler::C_BEQZ(GPR rs, Label* label) noexcept {
   1793     const auto address = LinkAndGetOffset(label);
   1794     C_BEQZ(rs, static_cast<int32_t>(address));
   1795 }
   1796 
   1797 void Assembler::C_BNEZ(GPR rs, int32_t offset) noexcept {
   1798     EmitCompressedBranch(m_buffer, 0b111, offset, rs, 0b01);
   1799 }
   1800 
   1801 void Assembler::C_BNEZ(GPR rs, Label* label) noexcept {
   1802     const auto address = LinkAndGetOffset(label);
   1803     C_BNEZ(rs, static_cast<int32_t>(address));
   1804 }
   1805 
   1806 void Assembler::C_EBREAK() noexcept {
   1807     m_buffer.Emit16(0x9002);
   1808 }
   1809 
   1810 void Assembler::C_FLD(FPR rd, uint32_t imm, GPR rs) noexcept {
   1811     BISCUIT_ASSERT(imm <= 248);
   1812     BISCUIT_ASSERT(imm % 8 == 0);
   1813 
   1814     EmitCompressedLoad(m_buffer, 0b001, imm, rs, rd, 0b00);
   1815 }
   1816 
   1817 void Assembler::C_FLDSP(FPR rd, uint32_t imm) noexcept {
   1818     BISCUIT_ASSERT(imm <= 504);
   1819     BISCUIT_ASSERT(imm % 8 == 0);
   1820 
   1821     // clang-format off
   1822     const auto new_imm = ((imm & 0x018) << 2) |
   1823                          ((imm & 0x1C0) >> 4) |
   1824                          ((imm & 0x020) << 7);
   1825     // clang-format on
   1826 
   1827     m_buffer.Emit16(0x2002U | (rd.Index() << 7) | new_imm);
   1828 }
   1829 
   1830 void Assembler::C_FLW(FPR rd, uint32_t imm, GPR rs) noexcept {
   1831     BISCUIT_ASSERT(imm <= 124);
   1832     BISCUIT_ASSERT(imm % 4 == 0);
   1833 
   1834     imm &= 0x7C;
   1835     const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78);
   1836     EmitCompressedLoad(m_buffer, 0b011, new_imm, rs, rd, 0b00);
   1837 }
   1838 
   1839 void Assembler::C_FLWSP(FPR rd, uint32_t imm) noexcept {
   1840     BISCUIT_ASSERT(imm <= 252);
   1841     BISCUIT_ASSERT(imm % 4 == 0);
   1842 
   1843     // clang-format off
   1844     const auto new_imm = ((imm & 0x020) << 7) |
   1845                          ((imm & 0x0C0) >> 4) |
   1846                          ((imm & 0x01C) << 2);
   1847     // clang-format on
   1848 
   1849     m_buffer.Emit16(0x6002U | (rd.Index() << 7) | new_imm);
   1850 }
   1851 
   1852 void Assembler::C_FSD(FPR rs2, uint32_t imm, GPR rs1) noexcept {
   1853     BISCUIT_ASSERT(imm <= 248);
   1854     BISCUIT_ASSERT(imm % 8 == 0);
   1855 
   1856     EmitCompressedStore(m_buffer, 0b101, imm, rs1, rs2, 0b00);
   1857 }
   1858 
   1859 void Assembler::C_FSDSP(FPR rs, uint32_t imm) noexcept {
   1860     BISCUIT_ASSERT(imm <= 504);
   1861     BISCUIT_ASSERT(imm % 8 == 0);
   1862 
   1863     // clang-format off
   1864     const auto new_imm = ((imm & 0x038) << 7) |
   1865                          ((imm & 0x1C0) << 1);
   1866     // clang-format on
   1867 
   1868     m_buffer.Emit16(0xA002U | (rs.Index() << 2) | new_imm);
   1869 }
   1870 
   1871 void Assembler::C_J(Label* label) noexcept {
   1872     const auto address = LinkAndGetOffset(label);
   1873     C_J(static_cast<int32_t>(address));
   1874 }
   1875 
   1876 void Assembler::C_J(int32_t offset) noexcept {
   1877     EmitCompressedJump(m_buffer, 0b101, offset, 0b01);
   1878 }
   1879 
   1880 void Assembler::C_JAL(Label* label) noexcept {
   1881     const auto address = LinkAndGetOffset(label);
   1882     C_JAL(static_cast<int32_t>(address));
   1883 }
   1884 
   1885 void Assembler::C_JAL(int32_t offset) noexcept {
   1886     EmitCompressedJump(m_buffer, 0b001, offset, 0b01);
   1887 }
   1888 
   1889 void Assembler::C_FSW(FPR rs2, uint32_t imm, GPR rs1) noexcept {
   1890     imm &= 0x7C;
   1891     const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78);
   1892     EmitCompressedStore(m_buffer, 0b111, new_imm, rs1, rs2, 0b00);
   1893 }
   1894 
   1895 void Assembler::C_FSWSP(FPR rs, uint32_t imm) noexcept {
   1896     BISCUIT_ASSERT(imm <= 252);
   1897     BISCUIT_ASSERT(imm % 4 == 0);
   1898 
   1899     // clang-format off
   1900     const auto new_imm = ((imm & 0x0C0) << 1) |
   1901                          ((imm & 0x03C) << 7);
   1902     // clang-format on
   1903 
   1904     m_buffer.Emit16(0xE002U | (rs.Index() << 2) | new_imm);
   1905 }
   1906 
   1907 void Assembler::C_JALR(GPR rs) noexcept {
   1908     BISCUIT_ASSERT(rs != x0);
   1909     m_buffer.Emit16(0x9002 | (rs.Index() << 7));
   1910 }
   1911 
   1912 void Assembler::C_JR(GPR rs) noexcept {
   1913     BISCUIT_ASSERT(rs != x0);
   1914     m_buffer.Emit16(0x8002 | (rs.Index() << 7));
   1915 }
   1916 
   1917 void Assembler::C_LD(GPR rd, uint32_t imm, GPR rs) noexcept {
   1918     BISCUIT_ASSERT(imm <= 248);
   1919     BISCUIT_ASSERT(imm % 8 == 0);
   1920 
   1921     EmitCompressedLoad(m_buffer, 0b011, imm, rs, rd, 0b00);
   1922 }
   1923 
   1924 void Assembler::C_LDSP(GPR rd, uint32_t imm) noexcept {
   1925     BISCUIT_ASSERT(rd != x0);
   1926     BISCUIT_ASSERT(imm <= 504);
   1927     BISCUIT_ASSERT(imm % 8 == 0);
   1928 
   1929     // clang-format off
   1930     const auto new_imm = ((imm & 0x018) << 2) |
   1931                          ((imm & 0x1C0) >> 4) |
   1932                          ((imm & 0x020) << 7);
   1933     // clang-format on
   1934 
   1935     m_buffer.Emit16(0x6002U | (rd.Index() << 7) | new_imm);
   1936 }
   1937 
   1938 void Assembler::C_LI(GPR rd, int32_t imm) noexcept {
   1939     BISCUIT_ASSERT(IsValidSigned6BitImm(imm));
   1940     EmitCompressedImmediate(m_buffer, 0b010, static_cast<uint32_t>(imm), rd, 0b01);
   1941 }
   1942 
   1943 void Assembler::C_LQ(GPR rd, uint32_t imm, GPR rs) noexcept {
   1944     BISCUIT_ASSERT(imm <= 496);
   1945     BISCUIT_ASSERT(imm % 16 == 0);
   1946 
   1947     imm &= 0x1F0;
   1948     const auto new_imm = ((imm & 0x100) >> 5) | (imm & 0xF0);
   1949     EmitCompressedLoad(m_buffer, 0b001, new_imm, rs, rd, 0b00);
   1950 }
   1951 
   1952 void Assembler::C_LQSP(GPR rd, uint32_t imm) noexcept {
   1953     BISCUIT_ASSERT(rd != x0);
   1954     BISCUIT_ASSERT(imm <= 1008);
   1955     BISCUIT_ASSERT(imm % 16 == 0);
   1956 
   1957     // clang-format off
   1958     const auto new_imm = ((imm & 0x020) << 7) |
   1959                          ((imm & 0x010) << 2) |
   1960                          ((imm & 0x3C0) >> 4);
   1961     // clang-format on
   1962 
   1963     m_buffer.Emit16(0x2002U | (rd.Index() << 7) | new_imm);
   1964 }
   1965 
   1966 void Assembler::C_LUI(GPR rd, uint32_t imm) noexcept {
   1967     BISCUIT_ASSERT(imm != 0);
   1968     BISCUIT_ASSERT(rd != x0 && rd != x2);
   1969 
   1970     const auto new_imm = (imm & 0x3F000) >> 12;
   1971     EmitCompressedImmediate(m_buffer, 0b011, new_imm, rd, 0b01);
   1972 }
   1973 
   1974 void Assembler::C_LW(GPR rd, uint32_t imm, GPR rs) noexcept {
   1975     BISCUIT_ASSERT(imm <= 124);
   1976     BISCUIT_ASSERT(imm % 4 == 0);
   1977 
   1978     imm &= 0x7C;
   1979     const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78);
   1980     EmitCompressedLoad(m_buffer, 0b010, new_imm, rs, rd, 0b00);
   1981 }
   1982 
   1983 void Assembler::C_LWSP(GPR rd, uint32_t imm) noexcept {
   1984     BISCUIT_ASSERT(rd != x0);
   1985     BISCUIT_ASSERT(imm <= 252);
   1986     BISCUIT_ASSERT(imm % 4 == 0);
   1987 
   1988     // clang-format off
   1989     const auto new_imm = ((imm & 0x020) << 7) |
   1990                          ((imm & 0x0C0) >> 4) |
   1991                          ((imm & 0x01C) << 2);
   1992     // clang-format on
   1993 
   1994     m_buffer.Emit16(0x4002U | (rd.Index() << 7) | new_imm);
   1995 }
   1996 
   1997 void Assembler::C_MV(GPR rd, GPR rs) noexcept {
   1998     BISCUIT_ASSERT(rd != x0);
   1999     BISCUIT_ASSERT(rs != x0);
   2000     m_buffer.Emit16(0x8002 | (rd.Index() << 7) | (rs.Index() << 2));
   2001 }
   2002 
   2003 void Assembler::C_NOP() noexcept {
   2004     m_buffer.Emit16(1);
   2005 }
   2006 
   2007 void Assembler::C_OR(GPR rd, GPR rs) noexcept {
   2008     EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b10, rs, 0b01);
   2009 }
   2010 
   2011 void Assembler::C_SD(GPR rs2, uint32_t imm, GPR rs1) noexcept {
   2012     BISCUIT_ASSERT(imm <= 248);
   2013     BISCUIT_ASSERT(imm % 8 == 0);
   2014 
   2015     EmitCompressedLoad(m_buffer, 0b111, imm, rs1, rs2, 0b00);
   2016 }
   2017 
   2018 void Assembler::C_SDSP(GPR rs, uint32_t imm) noexcept {
   2019     BISCUIT_ASSERT(imm <= 504);
   2020     BISCUIT_ASSERT(imm % 8 == 0);
   2021 
   2022     // clang-format off
   2023     const auto new_imm = ((imm & 0x038) << 7) |
   2024                          ((imm & 0x1C0) << 1);
   2025     // clang-format on
   2026 
   2027     m_buffer.Emit16(0xE002U | (rs.Index() << 2) | new_imm);
   2028 }
   2029 
   2030 void Assembler::C_SLLI(GPR rd, uint32_t shift) noexcept {
   2031     BISCUIT_ASSERT(rd != x0);
   2032     BISCUIT_ASSERT(IsValidCompressedShiftAmount(shift));
   2033 
   2034     // RV128C encodes a 64-bit shift with an encoding of 0.
   2035     if (shift == 64) {
   2036         shift = 0;
   2037     }
   2038 
   2039     const auto shift_enc = ((shift & 0b11111) << 2) | ((shift & 0b100000) << 7);
   2040     m_buffer.Emit16(0x0002U | shift_enc | (rd.Index() << 7));
   2041 }
   2042 
   2043 void Assembler::C_SQ(GPR rs2, uint32_t imm, GPR rs1) noexcept {
   2044     BISCUIT_ASSERT(imm <= 496);
   2045     BISCUIT_ASSERT(imm % 16 == 0);
   2046 
   2047     imm &= 0x1F0;
   2048     const auto new_imm = ((imm & 0x100) >> 5) | (imm & 0xF0);
   2049     EmitCompressedStore(m_buffer, 0b101, new_imm, rs1, rs2, 0b00);
   2050 }
   2051 
   2052 void Assembler::C_SQSP(GPR rs, uint32_t imm) noexcept {
   2053     BISCUIT_ASSERT(imm <= 1008);
   2054     BISCUIT_ASSERT(imm % 16 == 0);
   2055 
   2056     // clang-format off
   2057     const auto new_imm = ((imm & 0x3C0) << 1) |
   2058                          ((imm & 0x030) << 7);
   2059     // clang-format on
   2060 
   2061     m_buffer.Emit16(0xA002U | (rs.Index() << 2) | new_imm);
   2062 }
   2063 
   2064 void Assembler::C_SRAI(GPR rd, uint32_t shift) noexcept {
   2065     BISCUIT_ASSERT(IsValid3BitCompressedReg(rd));
   2066     BISCUIT_ASSERT(IsValidCompressedShiftAmount(shift));
   2067 
   2068     // RV128C encodes a 64-bit shift with an encoding of 0.
   2069     if (shift == 64) {
   2070         shift = 0;
   2071     }
   2072 
   2073     constexpr auto base = 0x8401U;
   2074     const auto shift_enc = ((shift & 0b11111) << 2) | ((shift & 0b100000) << 7);
   2075     const auto reg = CompressedRegTo3BitEncoding(rd);
   2076 
   2077     m_buffer.Emit16(base | shift_enc | (reg << 7));
   2078 }
   2079 
   2080 void Assembler::C_SRLI(GPR rd, uint32_t shift) noexcept {
   2081     BISCUIT_ASSERT(IsValid3BitCompressedReg(rd));
   2082     BISCUIT_ASSERT(IsValidCompressedShiftAmount(shift));
   2083 
   2084     // RV128C encodes a 64-bit shift with an encoding of 0.
   2085     if (shift == 64) {
   2086         shift = 0;
   2087     }
   2088 
   2089     constexpr auto base = 0x8001U;
   2090     const auto shift_enc = ((shift & 0b11111) << 2) | ((shift & 0b100000) << 7);
   2091     const auto reg = CompressedRegTo3BitEncoding(rd);
   2092 
   2093     m_buffer.Emit16(base | shift_enc | (reg << 7));
   2094 }
   2095 
   2096 void Assembler::C_SUB(GPR rd, GPR rs) noexcept {
   2097     EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b00, rs, 0b01);
   2098 }
   2099 
   2100 void Assembler::C_SUBW(GPR rd, GPR rs) noexcept {
   2101     EmitCompressedRegArith(m_buffer, 0b100111, rd, 0b00, rs, 0b01);
   2102 }
   2103 
   2104 void Assembler::C_SW(GPR rs2, uint32_t imm, GPR rs1) noexcept {
   2105     BISCUIT_ASSERT(imm <= 124);
   2106     BISCUIT_ASSERT(imm % 4 == 0);
   2107 
   2108     imm &= 0x7C;
   2109     const auto new_imm = ((imm & 0b0100) << 5) | (imm & 0x78);
   2110     EmitCompressedStore(m_buffer, 0b110, new_imm, rs1, rs2, 0b00);
   2111 }
   2112 
   2113 void Assembler::C_SWSP(GPR rs, uint32_t imm) noexcept {
   2114     BISCUIT_ASSERT(imm <= 252);
   2115     BISCUIT_ASSERT(imm % 4 == 0);
   2116 
   2117     // clang-format off
   2118     const auto new_imm = ((imm & 0x0C0) << 1) |
   2119                          ((imm & 0x03C) << 7);
   2120     // clang-format on
   2121 
   2122     m_buffer.Emit16(0xC002U | (rs.Index() << 2) | new_imm);
   2123 }
   2124 
   2125 void Assembler::C_UNDEF() noexcept {
   2126     m_buffer.Emit16(0);
   2127 }
   2128 
   2129 void Assembler::C_XOR(GPR rd, GPR rs) noexcept {
   2130     EmitCompressedRegArith(m_buffer, 0b100011, rd, 0b01, rs, 0b01);
   2131 }
   2132 
   2133 // Cache Management Operation Extension Instructions
   2134 
   2135 void Assembler::CBO_CLEAN(GPR rs) noexcept {
   2136     EmitRType(m_buffer, 0b0000000, x1, rs, 0b010, x0, 0b0001111);
   2137 }
   2138 
   2139 void Assembler::CBO_FLUSH(GPR rs) noexcept {
   2140     EmitRType(m_buffer, 0b0000000, x2, rs, 0b010, x0, 0b0001111);
   2141 }
   2142 
   2143 void Assembler::CBO_INVAL(GPR rs) noexcept {
   2144     EmitRType(m_buffer, 0b0000000, x0, rs, 0b010, x0, 0b0001111);
   2145 }
   2146 
   2147 void Assembler::CBO_ZERO(GPR rs) noexcept {
   2148     EmitRType(m_buffer, 0b0000000, x4, rs, 0b010, x0, 0b0001111);
   2149 }
   2150 
   2151 void Assembler::PREFETCH_I(GPR rs, int32_t offset) noexcept {
   2152     // Offset must be able to fit in a 12-bit signed immediate and be
   2153     // cleanly divisible by 32 since the bottom 5 bits are encoded as zero.
   2154     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   2155     BISCUIT_ASSERT(offset % 32 == 0);
   2156     EmitIType(m_buffer, static_cast<uint32_t>(offset), rs, 0b110, x0, 0b0010011);
   2157 }
   2158 
   2159 void Assembler::PREFETCH_R(GPR rs, int32_t offset) noexcept {
   2160     // Offset must be able to fit in a 12-bit signed immediate and be
   2161     // cleanly divisible by 32 since the bottom 5 bits are encoded as zero.
   2162     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   2163     BISCUIT_ASSERT(offset % 32 == 0);
   2164     EmitIType(m_buffer, static_cast<uint32_t>(offset) | 0b01, rs, 0b110, x0, 0b0010011);
   2165 }
   2166 
   2167 void Assembler::PREFETCH_W(GPR rs, int32_t offset) noexcept {
   2168     // Offset must be able to fit in a 12-bit signed immediate and be
   2169     // cleanly divisible by 32 since the bottom 5 bits are encoded as zero.
   2170     BISCUIT_ASSERT(IsValidSigned12BitImm(offset));
   2171     BISCUIT_ASSERT(offset % 32 == 0);
   2172     EmitIType(m_buffer, static_cast<uint32_t>(offset) | 0b11, rs, 0b110, x0, 0b0010011);
   2173 }
   2174 
   2175 // Privileged Instructions
   2176 
   2177 void Assembler::HFENCE_GVMA(GPR rs1, GPR rs2) noexcept {
   2178     EmitRType(m_buffer, 0b0110001, rs2, rs1, 0b000, x0, 0b1110011);
   2179 }
   2180 
   2181 void Assembler::HFENCE_VVMA(GPR rs1, GPR rs2) noexcept {
   2182     EmitRType(m_buffer, 0b0010001, rs2, rs1, 0b000, x0, 0b1110011);
   2183 }
   2184 
   2185 void Assembler::HINVAL_GVMA(GPR rs1, GPR rs2) noexcept {
   2186     EmitRType(m_buffer, 0b0110011, rs2, rs1, 0b000, x0, 0b1110011);
   2187 }
   2188 
   2189 void Assembler::HINVAL_VVMA(GPR rs1, GPR rs2) noexcept {
   2190     EmitRType(m_buffer, 0b0010011, rs2, rs1, 0b000, x0, 0b1110011);
   2191 }
   2192 
   2193 void Assembler::HLV_B(GPR rd, GPR rs) noexcept {
   2194     EmitRType(m_buffer, 0b0110000, x0, rs, 0b100, rd, 0b1110011);
   2195 }
   2196 
   2197 void Assembler::HLV_BU(GPR rd, GPR rs) noexcept {
   2198     EmitRType(m_buffer, 0b0110000, x1, rs, 0b100, rd, 0b1110011);
   2199 }
   2200 
   2201 void Assembler::HLV_D(GPR rd, GPR rs) noexcept {
   2202     EmitRType(m_buffer, 0b0110110, x0, rs, 0b100, rd, 0b1110011);
   2203 }
   2204 
   2205 void Assembler::HLV_H(GPR rd, GPR rs) noexcept {
   2206     EmitRType(m_buffer, 0b0110010, x0, rs, 0b100, rd, 0b1110011);
   2207 }
   2208 
   2209 void Assembler::HLV_HU(GPR rd, GPR rs) noexcept {
   2210     EmitRType(m_buffer, 0b0110010, x1, rs, 0b100, rd, 0b1110011);
   2211 }
   2212 
   2213 void Assembler::HLV_W(GPR rd, GPR rs) noexcept {
   2214     EmitRType(m_buffer, 0b0110100, x0, rs, 0b100, rd, 0b1110011);
   2215 }
   2216 
   2217 void Assembler::HLV_WU(GPR rd, GPR rs) noexcept {
   2218     EmitRType(m_buffer, 0b0110100, x1, rs, 0b100, rd, 0b1110011);
   2219 }
   2220 
   2221 void Assembler::HLVX_HU(GPR rd, GPR rs) noexcept {
   2222     EmitRType(m_buffer, 0b0110010, x3, rs, 0b100, rd, 0b1110011);
   2223 }
   2224 
   2225 void Assembler::HLVX_WU(GPR rd, GPR rs) noexcept {
   2226     EmitRType(m_buffer, 0b0110100, x3, rs, 0b100, rd, 0b1110011);
   2227 }
   2228 
   2229 void Assembler::HSV_B(GPR rs2, GPR rs1) noexcept {
   2230     EmitRType(m_buffer, 0b0110001, rs2, rs1, 0b100, x0, 0b1110011);
   2231 }
   2232 
   2233 void Assembler::HSV_D(GPR rs2, GPR rs1) noexcept {
   2234     EmitRType(m_buffer, 0b0110111, rs2, rs1, 0b100, x0, 0b1110011);
   2235 }
   2236 
   2237 void Assembler::HSV_H(GPR rs2, GPR rs1) noexcept {
   2238     EmitRType(m_buffer, 0b0110011, rs2, rs1, 0b100, x0, 0b1110011);
   2239 }
   2240 
   2241 void Assembler::HSV_W(GPR rs2, GPR rs1) noexcept {
   2242     EmitRType(m_buffer, 0b0110101, rs2, rs1, 0b100, x0, 0b1110011);
   2243 }
   2244 
   2245 void Assembler::MRET() noexcept {
   2246     m_buffer.Emit32(0x30200073);
   2247 }
   2248 
   2249 void Assembler::SFENCE_INVAL_IR() noexcept {
   2250     m_buffer.Emit32(0x18100073U);
   2251 }
   2252 
   2253 void Assembler::SFENCE_VMA(GPR rs1, GPR rs2) noexcept {
   2254     EmitRType(m_buffer, 0b0001001, rs2, rs1, 0b000, x0, 0b1110011);
   2255 }
   2256 
   2257 void Assembler::SFENCE_W_INVAL() noexcept {
   2258     m_buffer.Emit32(0x18000073U);
   2259 }
   2260 
   2261 void Assembler::SINVAL_VMA(GPR rs1, GPR rs2) noexcept {
   2262     EmitRType(m_buffer, 0b0001011, rs2, rs1, 0b000, x0, 0b1110011);
   2263 }
   2264 
   2265 void Assembler::SRET() noexcept {
   2266     m_buffer.Emit32(0x10200073);
   2267 }
   2268 
   2269 void Assembler::URET() noexcept {
   2270     m_buffer.Emit32(0x00200073);
   2271 }
   2272 
   2273 void Assembler::WFI() noexcept {
   2274     m_buffer.Emit32(0x10500073);
   2275 }
   2276 
   2277 void Assembler::BindToOffset(Label* label, Label::LocationOffset offset) {
   2278     BISCUIT_ASSERT(label != nullptr);
   2279     BISCUIT_ASSERT(offset >= 0 && offset <= m_buffer.GetCursorOffset());
   2280 
   2281     label->Bind(offset);
   2282     ResolveLabelOffsets(label);
   2283     label->ClearOffsets();
   2284 }
   2285 
   2286 ptrdiff_t Assembler::LinkAndGetOffset(Label* label) {
   2287     BISCUIT_ASSERT(label != nullptr);
   2288 
   2289     // If we have a bound label, then it's straightforward to calculate
   2290     // the offsets.
   2291     if (label->IsBound()) {
   2292         const auto cursor_address = m_buffer.GetCursorAddress();
   2293         const auto label_offset = m_buffer.GetOffsetAddress(*label->GetLocation());
   2294         return static_cast<ptrdiff_t>(label_offset - cursor_address);
   2295     }
   2296 
   2297     // If we don't have a bound location, we return an offset of zero.
   2298     // While the emitter will emit a bogus branch instruction initially,
   2299     // the offset will be patched over once the label has been properly
   2300     // bound to a location.
   2301     label->AddOffset(m_buffer.GetCursorOffset());
   2302     return 0;
   2303 }
   2304 
   2305 void Assembler::ResolveLabelOffsets(Label* label) {
   2306     // Conditional branch instructions make use of the B-type immediate encoding for offsets.
   2307     const auto is_b_type = [](uint32_t instruction) {
   2308         return (instruction & 0x7F) == 0b1100011;
   2309     };
   2310     // JAL makes use of the J-type immediate encoding for offsets.
   2311     const auto is_j_type = [](uint32_t instruction) {
   2312         return (instruction & 0x7F) == 0b1101111;
   2313     };
   2314     // C.BEQZ and C.BNEZ make use of this encoding type.
   2315     const auto is_cb_type = [](uint32_t instruction) {
   2316         const auto op = instruction & 0b11;
   2317         const auto funct3 = instruction & 0xE000;
   2318         return op == 0b01 && funct3 >= 0xC000;
   2319     };
   2320     // C.JAL and C.J make use of this encoding type.
   2321     const auto is_cj_type = [](uint32_t instruction) {
   2322         const auto op = instruction & 0b11;
   2323         const auto funct3 = instruction & 0xE000;
   2324         return op == 0b01 && (funct3 == 0x2000 || funct3 == 0xA000);
   2325     };
   2326     // If we know an instruction is a compressed branch, then it's a 16-bit instruction
   2327     // Otherwise it's a regular-sized 32-bit instruction.
   2328     const auto determine_inst_size = [&](uint32_t instruction) -> size_t {
   2329         if (is_cj_type(instruction) || is_cb_type(instruction)) {
   2330             return 2;
   2331         } else {
   2332             return 4;
   2333         }
   2334     };
   2335 
   2336     const auto label_location = *label->GetLocation();
   2337 
   2338     for (const auto offset : label->m_offsets) {
   2339         const auto address = m_buffer.GetOffsetAddress(offset);
   2340         auto* const ptr = reinterpret_cast<uint8_t*>(address);
   2341         const auto inst_size = determine_inst_size(uint32_t{*ptr} | (uint32_t{*(ptr + 1)} << 8));
   2342 
   2343         uint32_t instruction = 0;
   2344         std::memcpy(&instruction, ptr, inst_size);
   2345 
   2346         // Given all branch instructions we need to patch have 0 encoded as
   2347         // their branch offset, we don't need to worry about any masking work.
   2348         //
   2349         // It's enough to verify that the immediate is going to be valid
   2350         // and then OR it into the instruction.
   2351 
   2352         const auto encoded_offset = label_location - offset;
   2353 
   2354         if (inst_size == sizeof(uint32_t)) {
   2355             if (is_b_type(instruction)) {
   2356                 BISCUIT_ASSERT(IsValidBTypeImm(encoded_offset));
   2357                 instruction |= TransformToBTypeImm(static_cast<uint32_t>(encoded_offset));
   2358             } else if (is_j_type(instruction)) {
   2359                 BISCUIT_ASSERT(IsValidJTypeImm(encoded_offset));
   2360                 instruction |= TransformToJTypeImm(static_cast<uint32_t>(encoded_offset));
   2361             }
   2362         } else {
   2363             if (is_cb_type(instruction)) {
   2364                 BISCUIT_ASSERT(IsValidCBTypeImm(encoded_offset));
   2365                 instruction |= TransformToCBTypeImm(static_cast<uint32_t>(encoded_offset));
   2366             } else if (is_cj_type(instruction)) {
   2367                 BISCUIT_ASSERT(IsValidCJTypeImm(encoded_offset));
   2368                 instruction |= TransformToCJTypeImm(static_cast<uint32_t>(encoded_offset));
   2369             }
   2370         }
   2371 
   2372         std::memcpy(ptr, &instruction, inst_size);
   2373     }
   2374 }
   2375 
   2376 } // namespace biscuit