fastjmp.cpp (3623B)
1 // SPDX-FileCopyrightText: 2021-2024 Connor McLaughlin <stenzek@gmail.com> 2 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) 3 4 // Win32 uses Fastjmp.asm, because MSVC doesn't support inline asm. 5 #if !defined(_WIN32) || defined(_M_ARM64) 6 7 #include "fastjmp.h" 8 9 #if defined(__APPLE__) 10 #define PREFIX "_" 11 #else 12 #define PREFIX "" 13 #endif 14 15 #if defined(__x86_64__) 16 17 asm("\t.global " PREFIX "fastjmp_set\n" 18 "\t.global " PREFIX "fastjmp_jmp\n" 19 "\t.text\n" 20 "\t" PREFIX "fastjmp_set:" 21 R"( 22 movq 0(%rsp), %rax 23 movq %rsp, %rdx # fixup stack pointer, so it doesn't include the call to fastjmp_set 24 addq $8, %rdx 25 movq %rax, 0(%rdi) # actually rip 26 movq %rbx, 8(%rdi) 27 movq %rdx, 16(%rdi) # actually rsp 28 movq %rbp, 24(%rdi) 29 movq %r12, 32(%rdi) 30 movq %r13, 40(%rdi) 31 movq %r14, 48(%rdi) 32 movq %r15, 56(%rdi) 33 xorl %eax, %eax 34 ret 35 )" 36 "\t" PREFIX "fastjmp_jmp:" 37 R"( 38 movl %esi, %eax 39 movq 0(%rdi), %rdx # actually rip 40 movq 8(%rdi), %rbx 41 movq 16(%rdi), %rsp # actually rsp 42 movq 24(%rdi), %rbp 43 movq 32(%rdi), %r12 44 movq 40(%rdi), %r13 45 movq 48(%rdi), %r14 46 movq 56(%rdi), %r15 47 jmp *%rdx 48 )"); 49 50 #elif defined(__aarch64__) 51 52 asm( 53 "\t.global " PREFIX "fastjmp_set\n" 54 "\t.global " PREFIX "fastjmp_jmp\n" 55 "\t.text\n" 56 "\t.align 16\n" 57 "\t" PREFIX "fastjmp_set:" R"( 58 mov x16, sp 59 stp x16, x30, [x0] 60 stp x19, x20, [x0, #16] 61 stp x21, x22, [x0, #32] 62 stp x23, x24, [x0, #48] 63 stp x25, x26, [x0, #64] 64 stp x27, x28, [x0, #80] 65 str x29, [x0, #96] 66 stp d8, d9, [x0, #112] 67 stp d10, d11, [x0, #128] 68 stp d12, d13, [x0, #144] 69 stp d14, d15, [x0, #160] 70 mov w0, wzr 71 br x30 72 )" 73 ".align 16\n" 74 "\t" PREFIX "fastjmp_jmp:" R"( 75 ldp x16, x30, [x0] 76 mov sp, x16 77 ldp x19, x20, [x0, #16] 78 ldp x21, x22, [x0, #32] 79 ldp x23, x24, [x0, #48] 80 ldp x25, x26, [x0, #64] 81 ldp x27, x28, [x0, #80] 82 ldr x29, [x0, #96] 83 ldp d8, d9, [x0, #112] 84 ldp d10, d11, [x0, #128] 85 ldp d12, d13, [x0, #144] 86 ldp d14, d15, [x0, #160] 87 mov w0, w1 88 br x30 89 )"); 90 91 #elif defined(__arm__) 92 93 asm( 94 "\t.global " PREFIX "fastjmp_set\n" 95 "\t.global " PREFIX "fastjmp_jmp\n" 96 "\t.text\n" 97 "\t" PREFIX "fastjmp_set:" R"( 98 vstmia r0!, {d8-d15} 99 stmia r0!, {r4-r14} 100 fmrx r1, fpscr 101 str r1, [r0] 102 mov r0, #0 103 bx lr 104 )" 105 106 "\t" PREFIX "fastjmp_jmp:" R"( 107 vldmia r0!, {d8-d15} 108 ldmia r0!, {r4-r14} 109 ldr r0, [r0] 110 fmxr fpscr, r0 111 mov r0, r1 112 bx lr 113 )"); 114 115 #elif defined(__riscv) && __riscv_xlen == 64 116 117 asm( 118 "\t.global " PREFIX "fastjmp_set\n" 119 "\t.global " PREFIX "fastjmp_jmp\n" 120 "\t.text\n" 121 "\t.align 16\n" 122 "\t" PREFIX "fastjmp_set:" R"( 123 sd sp, 0(a0) 124 sd s0, 8(a0) 125 sd s1, 16(a0) 126 sd s2, 24(a0) 127 sd s3, 32(a0) 128 sd s4, 40(a0) 129 sd s5, 48(a0) 130 sd s6, 56(a0) 131 sd s7, 64(a0) 132 sd s8, 72(a0) 133 sd s9, 80(a0) 134 sd s10, 88(a0) 135 sd s11, 96(a0) 136 fsd fs0, 104(a0) 137 fsd fs1, 112(a0) 138 fsd fs2, 120(a0) 139 fsd fs3, 128(a0) 140 fsd fs4, 136(a0) 141 fsd fs5, 144(a0) 142 fsd fs6, 152(a0) 143 fsd fs7, 160(a0) 144 fsd fs8, 168(a0) 145 fsd fs9, 176(a0) 146 fsd fs10, 184(a0) 147 fsd fs11, 192(a0) 148 sd ra, 208(a0) 149 li a0, 0 150 jr ra 151 )" 152 ".align 16\n" 153 "\t" PREFIX "fastjmp_jmp:" R"( 154 ld ra, 208(a0) 155 fld fs11, 192(a0) 156 fld fs10, 184(a0) 157 fld fs9, 176(a0) 158 fld fs8, 168(a0) 159 fld fs7, 160(a0) 160 fld fs6, 152(a0) 161 fld fs5, 144(a0) 162 fld fs4, 136(a0) 163 fld fs3, 128(a0) 164 fld fs2, 120(a0) 165 fld fs1, 112(a0) 166 fld fs0, 104(a0) 167 ld s11, 96(a0) 168 ld s10, 88(a0) 169 ld s9, 80(a0) 170 ld s8, 72(a0) 171 ld s7, 64(a0) 172 ld s6, 56(a0) 173 ld s5, 48(a0) 174 ld s4, 40(a0) 175 ld s3, 32(a0) 176 ld s2, 24(a0) 177 ld s1, 16(a0) 178 ld s0, 8(a0) 179 ld sp, 0(a0) 180 mv a0, a1 181 jr ra 182 )"); 183 184 185 #else 186 187 #error Unknown platform. 188 189 #endif 190 191 #endif // __WIN32