qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

oslib-posix.c (23508B)


      1 /*
      2  * os-posix-lib.c
      3  *
      4  * Copyright (c) 2003-2008 Fabrice Bellard
      5  * Copyright (c) 2010 Red Hat, Inc.
      6  *
      7  * QEMU library functions on POSIX which are shared between QEMU and
      8  * the QEMU tools.
      9  *
     10  * Permission is hereby granted, free of charge, to any person obtaining a copy
     11  * of this software and associated documentation files (the "Software"), to deal
     12  * in the Software without restriction, including without limitation the rights
     13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     14  * copies of the Software, and to permit persons to whom the Software is
     15  * furnished to do so, subject to the following conditions:
     16  *
     17  * The above copyright notice and this permission notice shall be included in
     18  * all copies or substantial portions of the Software.
     19  *
     20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     26  * THE SOFTWARE.
     27  */
     28 
     29 #include "qemu/osdep.h"
     30 #include <termios.h>
     31 
     32 #include <glib/gprintf.h>
     33 
     34 #include "sysemu/sysemu.h"
     35 #include "trace.h"
     36 #include "qapi/error.h"
     37 #include "qemu/error-report.h"
     38 #include "qemu/madvise.h"
     39 #include "qemu/sockets.h"
     40 #include "qemu/thread.h"
     41 #include <libgen.h>
     42 #include "qemu/cutils.h"
     43 #include "qemu/compiler.h"
     44 #include "qemu/units.h"
     45 #include "qemu/thread-context.h"
     46 
     47 #ifdef CONFIG_LINUX
     48 #include <sys/syscall.h>
     49 #endif
     50 
     51 #ifdef __FreeBSD__
     52 #include <sys/thr.h>
     53 #include <sys/types.h>
     54 #include <sys/user.h>
     55 #include <libutil.h>
     56 #endif
     57 
     58 #ifdef __NetBSD__
     59 #include <lwp.h>
     60 #endif
     61 
     62 #include "qemu/mmap-alloc.h"
     63 
     64 #ifdef CONFIG_DEBUG_STACK_USAGE
     65 #include "qemu/error-report.h"
     66 #endif
     67 
     68 #define MAX_MEM_PREALLOC_THREAD_COUNT 16
     69 
     70 struct MemsetThread;
     71 
     72 typedef struct MemsetContext {
     73     bool all_threads_created;
     74     bool any_thread_failed;
     75     struct MemsetThread *threads;
     76     int num_threads;
     77 } MemsetContext;
     78 
     79 struct MemsetThread {
     80     char *addr;
     81     size_t numpages;
     82     size_t hpagesize;
     83     QemuThread pgthread;
     84     sigjmp_buf env;
     85     MemsetContext *context;
     86 };
     87 typedef struct MemsetThread MemsetThread;
     88 
     89 /* used by sigbus_handler() */
     90 static MemsetContext *sigbus_memset_context;
     91 struct sigaction sigbus_oldact;
     92 static QemuMutex sigbus_mutex;
     93 
     94 static QemuMutex page_mutex;
     95 static QemuCond page_cond;
     96 
     97 int qemu_get_thread_id(void)
     98 {
     99 #if defined(__linux__)
    100     return syscall(SYS_gettid);
    101 #elif defined(__FreeBSD__)
    102     /* thread id is up to INT_MAX */
    103     long tid;
    104     thr_self(&tid);
    105     return (int)tid;
    106 #elif defined(__NetBSD__)
    107     return _lwp_self();
    108 #elif defined(__OpenBSD__)
    109     return getthrid();
    110 #else
    111     return getpid();
    112 #endif
    113 }
    114 
    115 int qemu_daemon(int nochdir, int noclose)
    116 {
    117     return daemon(nochdir, noclose);
    118 }
    119 
    120 bool qemu_write_pidfile(const char *path, Error **errp)
    121 {
    122     int fd;
    123     char pidstr[32];
    124 
    125     while (1) {
    126         struct stat a, b;
    127         struct flock lock = {
    128             .l_type = F_WRLCK,
    129             .l_whence = SEEK_SET,
    130             .l_len = 0,
    131         };
    132 
    133         fd = qemu_create(path, O_WRONLY, S_IRUSR | S_IWUSR, errp);
    134         if (fd == -1) {
    135             return false;
    136         }
    137 
    138         if (fstat(fd, &b) < 0) {
    139             error_setg_errno(errp, errno, "Cannot stat file");
    140             goto fail_close;
    141         }
    142 
    143         if (fcntl(fd, F_SETLK, &lock)) {
    144             error_setg_errno(errp, errno, "Cannot lock pid file");
    145             goto fail_close;
    146         }
    147 
    148         /*
    149          * Now make sure the path we locked is the same one that now
    150          * exists on the filesystem.
    151          */
    152         if (stat(path, &a) < 0) {
    153             /*
    154              * PID file disappeared, someone else must be racing with
    155              * us, so try again.
    156              */
    157             close(fd);
    158             continue;
    159         }
    160 
    161         if (a.st_ino == b.st_ino) {
    162             break;
    163         }
    164 
    165         /*
    166          * PID file was recreated, someone else must be racing with
    167          * us, so try again.
    168          */
    169         close(fd);
    170     }
    171 
    172     if (ftruncate(fd, 0) < 0) {
    173         error_setg_errno(errp, errno, "Failed to truncate pid file");
    174         goto fail_unlink;
    175     }
    176 
    177     snprintf(pidstr, sizeof(pidstr), FMT_pid "\n", getpid());
    178     if (qemu_write_full(fd, pidstr, strlen(pidstr)) != strlen(pidstr)) {
    179         error_setg(errp, "Failed to write pid file");
    180         goto fail_unlink;
    181     }
    182 
    183     return true;
    184 
    185 fail_unlink:
    186     unlink(path);
    187 fail_close:
    188     close(fd);
    189     return false;
    190 }
    191 
    192 /* alloc shared memory pages */
    193 void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment, bool shared,
    194                           bool noreserve)
    195 {
    196     const uint32_t qemu_map_flags = (shared ? QEMU_MAP_SHARED : 0) |
    197                                     (noreserve ? QEMU_MAP_NORESERVE : 0);
    198     size_t align = QEMU_VMALLOC_ALIGN;
    199     void *ptr = qemu_ram_mmap(-1, size, align, qemu_map_flags, 0);
    200 
    201     if (ptr == MAP_FAILED) {
    202         return NULL;
    203     }
    204 
    205     if (alignment) {
    206         *alignment = align;
    207     }
    208 
    209     trace_qemu_anon_ram_alloc(size, ptr);
    210     return ptr;
    211 }
    212 
    213 void qemu_anon_ram_free(void *ptr, size_t size)
    214 {
    215     trace_qemu_anon_ram_free(ptr, size);
    216     qemu_ram_munmap(-1, ptr, size);
    217 }
    218 
    219 void qemu_socket_set_block(int fd)
    220 {
    221     g_unix_set_fd_nonblocking(fd, false, NULL);
    222 }
    223 
    224 int qemu_socket_try_set_nonblock(int fd)
    225 {
    226     return g_unix_set_fd_nonblocking(fd, true, NULL) ? 0 : -errno;
    227 }
    228 
    229 void qemu_socket_set_nonblock(int fd)
    230 {
    231     int f;
    232     f = qemu_socket_try_set_nonblock(fd);
    233     assert(f == 0);
    234 }
    235 
    236 int socket_set_fast_reuse(int fd)
    237 {
    238     int val = 1, ret;
    239 
    240     ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
    241                      (const char *)&val, sizeof(val));
    242 
    243     assert(ret == 0);
    244 
    245     return ret;
    246 }
    247 
    248 void qemu_set_cloexec(int fd)
    249 {
    250     int f;
    251     f = fcntl(fd, F_GETFD);
    252     assert(f != -1);
    253     f = fcntl(fd, F_SETFD, f | FD_CLOEXEC);
    254     assert(f != -1);
    255 }
    256 
    257 int qemu_socketpair(int domain, int type, int protocol, int sv[2])
    258 {
    259     int ret;
    260 
    261 #ifdef SOCK_CLOEXEC
    262     ret = socketpair(domain, type | SOCK_CLOEXEC, protocol, sv);
    263     if (ret != -1 || errno != EINVAL) {
    264         return ret;
    265     }
    266 #endif
    267     ret = socketpair(domain, type, protocol, sv);;
    268     if (ret == 0) {
    269         qemu_set_cloexec(sv[0]);
    270         qemu_set_cloexec(sv[1]);
    271     }
    272 
    273     return ret;
    274 }
    275 
    276 char *
    277 qemu_get_local_state_dir(void)
    278 {
    279     return get_relocated_path(CONFIG_QEMU_LOCALSTATEDIR);
    280 }
    281 
    282 void qemu_set_tty_echo(int fd, bool echo)
    283 {
    284     struct termios tty;
    285 
    286     tcgetattr(fd, &tty);
    287 
    288     if (echo) {
    289         tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
    290     } else {
    291         tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
    292     }
    293 
    294     tcsetattr(fd, TCSANOW, &tty);
    295 }
    296 
    297 #ifdef CONFIG_LINUX
    298 static void sigbus_handler(int signal, siginfo_t *siginfo, void *ctx)
    299 #else /* CONFIG_LINUX */
    300 static void sigbus_handler(int signal)
    301 #endif /* CONFIG_LINUX */
    302 {
    303     int i;
    304 
    305     if (sigbus_memset_context) {
    306         for (i = 0; i < sigbus_memset_context->num_threads; i++) {
    307             MemsetThread *thread = &sigbus_memset_context->threads[i];
    308 
    309             if (qemu_thread_is_self(&thread->pgthread)) {
    310                 siglongjmp(thread->env, 1);
    311             }
    312         }
    313     }
    314 
    315 #ifdef CONFIG_LINUX
    316     /*
    317      * We assume that the MCE SIGBUS handler could have been registered. We
    318      * should never receive BUS_MCEERR_AO on any of our threads, but only on
    319      * the main thread registered for PR_MCE_KILL_EARLY. Further, we should not
    320      * receive BUS_MCEERR_AR triggered by action of other threads on one of
    321      * our threads. So, no need to check for unrelated SIGBUS when seeing one
    322      * for our threads.
    323      *
    324      * We will forward to the MCE handler, which will either handle the SIGBUS
    325      * or reinstall the default SIGBUS handler and reraise the SIGBUS. The
    326      * default SIGBUS handler will crash the process, so we don't care.
    327      */
    328     if (sigbus_oldact.sa_flags & SA_SIGINFO) {
    329         sigbus_oldact.sa_sigaction(signal, siginfo, ctx);
    330         return;
    331     }
    332 #endif /* CONFIG_LINUX */
    333     warn_report("qemu_prealloc_mem: unrelated SIGBUS detected and ignored");
    334 }
    335 
    336 static void *do_touch_pages(void *arg)
    337 {
    338     MemsetThread *memset_args = (MemsetThread *)arg;
    339     sigset_t set, oldset;
    340     int ret = 0;
    341 
    342     /*
    343      * On Linux, the page faults from the loop below can cause mmap_sem
    344      * contention with allocation of the thread stacks.  Do not start
    345      * clearing until all threads have been created.
    346      */
    347     qemu_mutex_lock(&page_mutex);
    348     while (!memset_args->context->all_threads_created) {
    349         qemu_cond_wait(&page_cond, &page_mutex);
    350     }
    351     qemu_mutex_unlock(&page_mutex);
    352 
    353     /* unblock SIGBUS */
    354     sigemptyset(&set);
    355     sigaddset(&set, SIGBUS);
    356     pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
    357 
    358     if (sigsetjmp(memset_args->env, 1)) {
    359         ret = -EFAULT;
    360     } else {
    361         char *addr = memset_args->addr;
    362         size_t numpages = memset_args->numpages;
    363         size_t hpagesize = memset_args->hpagesize;
    364         size_t i;
    365         for (i = 0; i < numpages; i++) {
    366             /*
    367              * Read & write back the same value, so we don't
    368              * corrupt existing user/app data that might be
    369              * stored.
    370              *
    371              * 'volatile' to stop compiler optimizing this away
    372              * to a no-op
    373              */
    374             *(volatile char *)addr = *addr;
    375             addr += hpagesize;
    376         }
    377     }
    378     pthread_sigmask(SIG_SETMASK, &oldset, NULL);
    379     return (void *)(uintptr_t)ret;
    380 }
    381 
    382 static void *do_madv_populate_write_pages(void *arg)
    383 {
    384     MemsetThread *memset_args = (MemsetThread *)arg;
    385     const size_t size = memset_args->numpages * memset_args->hpagesize;
    386     char * const addr = memset_args->addr;
    387     int ret = 0;
    388 
    389     /* See do_touch_pages(). */
    390     qemu_mutex_lock(&page_mutex);
    391     while (!memset_args->context->all_threads_created) {
    392         qemu_cond_wait(&page_cond, &page_mutex);
    393     }
    394     qemu_mutex_unlock(&page_mutex);
    395 
    396     if (size && qemu_madvise(addr, size, QEMU_MADV_POPULATE_WRITE)) {
    397         ret = -errno;
    398     }
    399     return (void *)(uintptr_t)ret;
    400 }
    401 
    402 static inline int get_memset_num_threads(size_t hpagesize, size_t numpages,
    403                                          int max_threads)
    404 {
    405     long host_procs = sysconf(_SC_NPROCESSORS_ONLN);
    406     int ret = 1;
    407 
    408     if (host_procs > 0) {
    409         ret = MIN(MIN(host_procs, MAX_MEM_PREALLOC_THREAD_COUNT), max_threads);
    410     }
    411 
    412     /* Especially with gigantic pages, don't create more threads than pages. */
    413     ret = MIN(ret, numpages);
    414     /* Don't start threads to prealloc comparatively little memory. */
    415     ret = MIN(ret, MAX(1, hpagesize * numpages / (64 * MiB)));
    416 
    417     /* In case sysconf() fails, we fall back to single threaded */
    418     return ret;
    419 }
    420 
    421 static int touch_all_pages(char *area, size_t hpagesize, size_t numpages,
    422                            int max_threads, ThreadContext *tc,
    423                            bool use_madv_populate_write)
    424 {
    425     static gsize initialized = 0;
    426     MemsetContext context = {
    427         .num_threads = get_memset_num_threads(hpagesize, numpages, max_threads),
    428     };
    429     size_t numpages_per_thread, leftover;
    430     void *(*touch_fn)(void *);
    431     int ret = 0, i = 0;
    432     char *addr = area;
    433 
    434     if (g_once_init_enter(&initialized)) {
    435         qemu_mutex_init(&page_mutex);
    436         qemu_cond_init(&page_cond);
    437         g_once_init_leave(&initialized, 1);
    438     }
    439 
    440     if (use_madv_populate_write) {
    441         /* Avoid creating a single thread for MADV_POPULATE_WRITE */
    442         if (context.num_threads == 1) {
    443             if (qemu_madvise(area, hpagesize * numpages,
    444                              QEMU_MADV_POPULATE_WRITE)) {
    445                 return -errno;
    446             }
    447             return 0;
    448         }
    449         touch_fn = do_madv_populate_write_pages;
    450     } else {
    451         touch_fn = do_touch_pages;
    452     }
    453 
    454     context.threads = g_new0(MemsetThread, context.num_threads);
    455     numpages_per_thread = numpages / context.num_threads;
    456     leftover = numpages % context.num_threads;
    457     for (i = 0; i < context.num_threads; i++) {
    458         context.threads[i].addr = addr;
    459         context.threads[i].numpages = numpages_per_thread + (i < leftover);
    460         context.threads[i].hpagesize = hpagesize;
    461         context.threads[i].context = &context;
    462         if (tc) {
    463             thread_context_create_thread(tc, &context.threads[i].pgthread,
    464                                          "touch_pages",
    465                                          touch_fn, &context.threads[i],
    466                                          QEMU_THREAD_JOINABLE);
    467         } else {
    468             qemu_thread_create(&context.threads[i].pgthread, "touch_pages",
    469                                touch_fn, &context.threads[i],
    470                                QEMU_THREAD_JOINABLE);
    471         }
    472         addr += context.threads[i].numpages * hpagesize;
    473     }
    474 
    475     if (!use_madv_populate_write) {
    476         sigbus_memset_context = &context;
    477     }
    478 
    479     qemu_mutex_lock(&page_mutex);
    480     context.all_threads_created = true;
    481     qemu_cond_broadcast(&page_cond);
    482     qemu_mutex_unlock(&page_mutex);
    483 
    484     for (i = 0; i < context.num_threads; i++) {
    485         int tmp = (uintptr_t)qemu_thread_join(&context.threads[i].pgthread);
    486 
    487         if (tmp) {
    488             ret = tmp;
    489         }
    490     }
    491 
    492     if (!use_madv_populate_write) {
    493         sigbus_memset_context = NULL;
    494     }
    495     g_free(context.threads);
    496 
    497     return ret;
    498 }
    499 
    500 static bool madv_populate_write_possible(char *area, size_t pagesize)
    501 {
    502     return !qemu_madvise(area, pagesize, QEMU_MADV_POPULATE_WRITE) ||
    503            errno != EINVAL;
    504 }
    505 
    506 void qemu_prealloc_mem(int fd, char *area, size_t sz, int max_threads,
    507                        ThreadContext *tc, Error **errp)
    508 {
    509     static gsize initialized;
    510     int ret;
    511     size_t hpagesize = qemu_fd_getpagesize(fd);
    512     size_t numpages = DIV_ROUND_UP(sz, hpagesize);
    513     bool use_madv_populate_write;
    514     struct sigaction act;
    515 
    516     /*
    517      * Sense on every invocation, as MADV_POPULATE_WRITE cannot be used for
    518      * some special mappings, such as mapping /dev/mem.
    519      */
    520     use_madv_populate_write = madv_populate_write_possible(area, hpagesize);
    521 
    522     if (!use_madv_populate_write) {
    523         if (g_once_init_enter(&initialized)) {
    524             qemu_mutex_init(&sigbus_mutex);
    525             g_once_init_leave(&initialized, 1);
    526         }
    527 
    528         qemu_mutex_lock(&sigbus_mutex);
    529         memset(&act, 0, sizeof(act));
    530 #ifdef CONFIG_LINUX
    531         act.sa_sigaction = &sigbus_handler;
    532         act.sa_flags = SA_SIGINFO;
    533 #else /* CONFIG_LINUX */
    534         act.sa_handler = &sigbus_handler;
    535         act.sa_flags = 0;
    536 #endif /* CONFIG_LINUX */
    537 
    538         ret = sigaction(SIGBUS, &act, &sigbus_oldact);
    539         if (ret) {
    540             qemu_mutex_unlock(&sigbus_mutex);
    541             error_setg_errno(errp, errno,
    542                 "qemu_prealloc_mem: failed to install signal handler");
    543             return;
    544         }
    545     }
    546 
    547     /* touch pages simultaneously */
    548     ret = touch_all_pages(area, hpagesize, numpages, max_threads, tc,
    549                           use_madv_populate_write);
    550     if (ret) {
    551         error_setg_errno(errp, -ret,
    552                          "qemu_prealloc_mem: preallocating memory failed");
    553     }
    554 
    555     if (!use_madv_populate_write) {
    556         ret = sigaction(SIGBUS, &sigbus_oldact, NULL);
    557         if (ret) {
    558             /* Terminate QEMU since it can't recover from error */
    559             perror("qemu_prealloc_mem: failed to reinstall signal handler");
    560             exit(1);
    561         }
    562         qemu_mutex_unlock(&sigbus_mutex);
    563     }
    564 }
    565 
    566 char *qemu_get_pid_name(pid_t pid)
    567 {
    568     char *name = NULL;
    569 
    570 #if defined(__FreeBSD__)
    571     /* BSDs don't have /proc, but they provide a nice substitute */
    572     struct kinfo_proc *proc = kinfo_getproc(pid);
    573 
    574     if (proc) {
    575         name = g_strdup(proc->ki_comm);
    576         free(proc);
    577     }
    578 #else
    579     /* Assume a system with reasonable procfs */
    580     char *pid_path;
    581     size_t len;
    582 
    583     pid_path = g_strdup_printf("/proc/%d/cmdline", pid);
    584     g_file_get_contents(pid_path, &name, &len, NULL);
    585     g_free(pid_path);
    586 #endif
    587 
    588     return name;
    589 }
    590 
    591 
    592 pid_t qemu_fork(Error **errp)
    593 {
    594     sigset_t oldmask, newmask;
    595     struct sigaction sig_action;
    596     int saved_errno;
    597     pid_t pid;
    598 
    599     /*
    600      * Need to block signals now, so that child process can safely
    601      * kill off caller's signal handlers without a race.
    602      */
    603     sigfillset(&newmask);
    604     if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
    605         error_setg_errno(errp, errno,
    606                          "cannot block signals");
    607         return -1;
    608     }
    609 
    610     pid = fork();
    611     saved_errno = errno;
    612 
    613     if (pid < 0) {
    614         /* attempt to restore signal mask, but ignore failure, to
    615          * avoid obscuring the fork failure */
    616         (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
    617         error_setg_errno(errp, saved_errno,
    618                          "cannot fork child process");
    619         errno = saved_errno;
    620         return -1;
    621     } else if (pid) {
    622         /* parent process */
    623 
    624         /* Restore our original signal mask now that the child is
    625          * safely running. Only documented failures are EFAULT (not
    626          * possible, since we are using just-grabbed mask) or EINVAL
    627          * (not possible, since we are using correct arguments).  */
    628         (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
    629     } else {
    630         /* child process */
    631         size_t i;
    632 
    633         /* Clear out all signal handlers from parent so nothing
    634          * unexpected can happen in our child once we unblock
    635          * signals */
    636         sig_action.sa_handler = SIG_DFL;
    637         sig_action.sa_flags = 0;
    638         sigemptyset(&sig_action.sa_mask);
    639 
    640         for (i = 1; i < NSIG; i++) {
    641             /* Only possible errors are EFAULT or EINVAL The former
    642              * won't happen, the latter we expect, so no need to check
    643              * return value */
    644             (void)sigaction(i, &sig_action, NULL);
    645         }
    646 
    647         /* Unmask all signals in child, since we've no idea what the
    648          * caller's done with their signal mask and don't want to
    649          * propagate that to children */
    650         sigemptyset(&newmask);
    651         if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
    652             Error *local_err = NULL;
    653             error_setg_errno(&local_err, errno,
    654                              "cannot unblock signals");
    655             error_report_err(local_err);
    656             _exit(1);
    657         }
    658     }
    659     return pid;
    660 }
    661 
    662 void *qemu_alloc_stack(size_t *sz)
    663 {
    664     void *ptr, *guardpage;
    665     int flags;
    666 #ifdef CONFIG_DEBUG_STACK_USAGE
    667     void *ptr2;
    668 #endif
    669     size_t pagesz = qemu_real_host_page_size();
    670 #ifdef _SC_THREAD_STACK_MIN
    671     /* avoid stacks smaller than _SC_THREAD_STACK_MIN */
    672     long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
    673     *sz = MAX(MAX(min_stack_sz, 0), *sz);
    674 #endif
    675     /* adjust stack size to a multiple of the page size */
    676     *sz = ROUND_UP(*sz, pagesz);
    677     /* allocate one extra page for the guard page */
    678     *sz += pagesz;
    679 
    680     flags = MAP_PRIVATE | MAP_ANONYMOUS;
    681 #if defined(MAP_STACK) && defined(__OpenBSD__)
    682     /* Only enable MAP_STACK on OpenBSD. Other OS's such as
    683      * Linux/FreeBSD/NetBSD have a flag with the same name
    684      * but have differing functionality. OpenBSD will SEGV
    685      * if it spots execution with a stack pointer pointing
    686      * at memory that was not allocated with MAP_STACK.
    687      */
    688     flags |= MAP_STACK;
    689 #endif
    690 
    691     ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, flags, -1, 0);
    692     if (ptr == MAP_FAILED) {
    693         perror("failed to allocate memory for stack");
    694         abort();
    695     }
    696 
    697 #if defined(HOST_IA64)
    698     /* separate register stack */
    699     guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
    700 #elif defined(HOST_HPPA)
    701     /* stack grows up */
    702     guardpage = ptr + *sz - pagesz;
    703 #else
    704     /* stack grows down */
    705     guardpage = ptr;
    706 #endif
    707     if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
    708         perror("failed to set up stack guard page");
    709         abort();
    710     }
    711 
    712 #ifdef CONFIG_DEBUG_STACK_USAGE
    713     for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
    714         *(uint32_t *)ptr2 = 0xdeadbeaf;
    715     }
    716 #endif
    717 
    718     return ptr;
    719 }
    720 
    721 #ifdef CONFIG_DEBUG_STACK_USAGE
    722 static __thread unsigned int max_stack_usage;
    723 #endif
    724 
    725 void qemu_free_stack(void *stack, size_t sz)
    726 {
    727 #ifdef CONFIG_DEBUG_STACK_USAGE
    728     unsigned int usage;
    729     void *ptr;
    730 
    731     for (ptr = stack + qemu_real_host_page_size(); ptr < stack + sz;
    732          ptr += sizeof(uint32_t)) {
    733         if (*(uint32_t *)ptr != 0xdeadbeaf) {
    734             break;
    735         }
    736     }
    737     usage = sz - (uintptr_t) (ptr - stack);
    738     if (usage > max_stack_usage) {
    739         error_report("thread %d max stack usage increased from %u to %u",
    740                      qemu_get_thread_id(), max_stack_usage, usage);
    741         max_stack_usage = usage;
    742     }
    743 #endif
    744 
    745     munmap(stack, sz);
    746 }
    747 
    748 /*
    749  * Disable CFI checks.
    750  * We are going to call a signal hander directly. Such handler may or may not
    751  * have been defined in our binary, so there's no guarantee that the pointer
    752  * used to set the handler is a cfi-valid pointer. Since the handlers are
    753  * stored in kernel memory, changing the handler to an attacker-defined
    754  * function requires being able to call a sigaction() syscall,
    755  * which is not as easy as overwriting a pointer in memory.
    756  */
    757 QEMU_DISABLE_CFI
    758 void sigaction_invoke(struct sigaction *action,
    759                       struct qemu_signalfd_siginfo *info)
    760 {
    761     siginfo_t si = {};
    762     si.si_signo = info->ssi_signo;
    763     si.si_errno = info->ssi_errno;
    764     si.si_code = info->ssi_code;
    765 
    766     /* Convert the minimal set of fields defined by POSIX.
    767      * Positive si_code values are reserved for kernel-generated
    768      * signals, where the valid siginfo fields are determined by
    769      * the signal number.  But according to POSIX, it is unspecified
    770      * whether SI_USER and SI_QUEUE have values less than or equal to
    771      * zero.
    772      */
    773     if (info->ssi_code == SI_USER || info->ssi_code == SI_QUEUE ||
    774         info->ssi_code <= 0) {
    775         /* SIGTERM, etc.  */
    776         si.si_pid = info->ssi_pid;
    777         si.si_uid = info->ssi_uid;
    778     } else if (info->ssi_signo == SIGILL || info->ssi_signo == SIGFPE ||
    779                info->ssi_signo == SIGSEGV || info->ssi_signo == SIGBUS) {
    780         si.si_addr = (void *)(uintptr_t)info->ssi_addr;
    781     } else if (info->ssi_signo == SIGCHLD) {
    782         si.si_pid = info->ssi_pid;
    783         si.si_status = info->ssi_status;
    784         si.si_uid = info->ssi_uid;
    785     }
    786     action->sa_sigaction(info->ssi_signo, &si, NULL);
    787 }
    788 
    789 size_t qemu_get_host_physmem(void)
    790 {
    791 #ifdef _SC_PHYS_PAGES
    792     long pages = sysconf(_SC_PHYS_PAGES);
    793     if (pages > 0) {
    794         if (pages > SIZE_MAX / qemu_real_host_page_size()) {
    795             return SIZE_MAX;
    796         } else {
    797             return pages * qemu_real_host_page_size();
    798         }
    799     }
    800 #endif
    801     return 0;
    802 }
    803 
    804 int qemu_msync(void *addr, size_t length, int fd)
    805 {
    806     size_t align_mask = ~(qemu_real_host_page_size() - 1);
    807 
    808     /**
    809      * There are no strict reqs as per the length of mapping
    810      * to be synced. Still the length needs to follow the address
    811      * alignment changes. Additionally - round the size to the multiple
    812      * of PAGE_SIZE
    813      */
    814     length += ((uintptr_t)addr & (qemu_real_host_page_size() - 1));
    815     length = (length + ~align_mask) & align_mask;
    816 
    817     addr = (void *)((uintptr_t)addr & align_mask);
    818 
    819     return msync(addr, length, MS_SYNC);
    820 }