qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

osdep.c (14122B)


      1 /*
      2  * QEMU low level functions
      3  *
      4  * Copyright (c) 2003 Fabrice Bellard
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to deal
      8  * in the Software without restriction, including without limitation the rights
      9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     10  * copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     22  * THE SOFTWARE.
     23  */
     24 #include "qemu/osdep.h"
     25 #include "qapi/error.h"
     26 #include "qemu/cutils.h"
     27 #include "qemu/sockets.h"
     28 #include "qemu/error-report.h"
     29 #include "qemu/madvise.h"
     30 #include "qemu/mprotect.h"
     31 #include "qemu/hw-version.h"
     32 #include "monitor/monitor.h"
     33 
     34 static const char *hw_version = QEMU_HW_VERSION;
     35 
     36 int socket_set_cork(int fd, int v)
     37 {
     38 #if defined(SOL_TCP) && defined(TCP_CORK)
     39     return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
     40 #else
     41     return 0;
     42 #endif
     43 }
     44 
     45 int socket_set_nodelay(int fd)
     46 {
     47     int v = 1;
     48     return setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
     49 }
     50 
     51 int qemu_madvise(void *addr, size_t len, int advice)
     52 {
     53     if (advice == QEMU_MADV_INVALID) {
     54         errno = EINVAL;
     55         return -1;
     56     }
     57 #if defined(CONFIG_MADVISE)
     58     return madvise(addr, len, advice);
     59 #elif defined(CONFIG_POSIX_MADVISE)
     60     return posix_madvise(addr, len, advice);
     61 #else
     62     errno = EINVAL;
     63     return -1;
     64 #endif
     65 }
     66 
     67 static int qemu_mprotect__osdep(void *addr, size_t size, int prot)
     68 {
     69     g_assert(!((uintptr_t)addr & ~qemu_real_host_page_mask()));
     70     g_assert(!(size & ~qemu_real_host_page_mask()));
     71 
     72 #ifdef _WIN32
     73     DWORD old_protect;
     74 
     75     if (!VirtualProtect(addr, size, prot, &old_protect)) {
     76         g_autofree gchar *emsg = g_win32_error_message(GetLastError());
     77         error_report("%s: VirtualProtect failed: %s", __func__, emsg);
     78         return -1;
     79     }
     80     return 0;
     81 #else
     82     if (mprotect(addr, size, prot)) {
     83         error_report("%s: mprotect failed: %s", __func__, strerror(errno));
     84         return -1;
     85     }
     86     return 0;
     87 #endif
     88 }
     89 
     90 int qemu_mprotect_rw(void *addr, size_t size)
     91 {
     92 #ifdef _WIN32
     93     return qemu_mprotect__osdep(addr, size, PAGE_READWRITE);
     94 #else
     95     return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE);
     96 #endif
     97 }
     98 
     99 int qemu_mprotect_rwx(void *addr, size_t size)
    100 {
    101 #ifdef _WIN32
    102     return qemu_mprotect__osdep(addr, size, PAGE_EXECUTE_READWRITE);
    103 #else
    104     return qemu_mprotect__osdep(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
    105 #endif
    106 }
    107 
    108 int qemu_mprotect_none(void *addr, size_t size)
    109 {
    110 #ifdef _WIN32
    111     return qemu_mprotect__osdep(addr, size, PAGE_NOACCESS);
    112 #else
    113     return qemu_mprotect__osdep(addr, size, PROT_NONE);
    114 #endif
    115 }
    116 
    117 #ifndef _WIN32
    118 
    119 static int fcntl_op_setlk = -1;
    120 static int fcntl_op_getlk = -1;
    121 
    122 /*
    123  * Dups an fd and sets the flags
    124  */
    125 int qemu_dup_flags(int fd, int flags)
    126 {
    127     int ret;
    128     int serrno;
    129     int dup_flags;
    130 
    131     ret = qemu_dup(fd);
    132     if (ret == -1) {
    133         goto fail;
    134     }
    135 
    136     dup_flags = fcntl(ret, F_GETFL);
    137     if (dup_flags == -1) {
    138         goto fail;
    139     }
    140 
    141     if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
    142         errno = EINVAL;
    143         goto fail;
    144     }
    145 
    146     /* Set/unset flags that we can with fcntl */
    147     if (fcntl(ret, F_SETFL, flags) == -1) {
    148         goto fail;
    149     }
    150 
    151     /* Truncate the file in the cases that open() would truncate it */
    152     if (flags & O_TRUNC ||
    153             ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
    154         if (ftruncate(ret, 0) == -1) {
    155             goto fail;
    156         }
    157     }
    158 
    159     return ret;
    160 
    161 fail:
    162     serrno = errno;
    163     if (ret != -1) {
    164         close(ret);
    165     }
    166     errno = serrno;
    167     return -1;
    168 }
    169 
    170 int qemu_dup(int fd)
    171 {
    172     int ret;
    173 #ifdef F_DUPFD_CLOEXEC
    174     ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
    175 #else
    176     ret = dup(fd);
    177     if (ret != -1) {
    178         qemu_set_cloexec(ret);
    179     }
    180 #endif
    181     return ret;
    182 }
    183 
    184 static int qemu_parse_fdset(const char *param)
    185 {
    186     return qemu_parse_fd(param);
    187 }
    188 
    189 static void qemu_probe_lock_ops(void)
    190 {
    191     if (fcntl_op_setlk == -1) {
    192 #ifdef F_OFD_SETLK
    193         int fd;
    194         int ret;
    195         struct flock fl = {
    196             .l_whence = SEEK_SET,
    197             .l_start  = 0,
    198             .l_len    = 0,
    199             .l_type   = F_WRLCK,
    200         };
    201 
    202         fd = open("/dev/null", O_RDWR);
    203         if (fd < 0) {
    204             fprintf(stderr,
    205                     "Failed to open /dev/null for OFD lock probing: %s\n",
    206                     strerror(errno));
    207             fcntl_op_setlk = F_SETLK;
    208             fcntl_op_getlk = F_GETLK;
    209             return;
    210         }
    211         ret = fcntl(fd, F_OFD_GETLK, &fl);
    212         close(fd);
    213         if (!ret) {
    214             fcntl_op_setlk = F_OFD_SETLK;
    215             fcntl_op_getlk = F_OFD_GETLK;
    216         } else {
    217             fcntl_op_setlk = F_SETLK;
    218             fcntl_op_getlk = F_GETLK;
    219         }
    220 #else
    221         fcntl_op_setlk = F_SETLK;
    222         fcntl_op_getlk = F_GETLK;
    223 #endif
    224     }
    225 }
    226 
    227 bool qemu_has_ofd_lock(void)
    228 {
    229     qemu_probe_lock_ops();
    230 #ifdef F_OFD_SETLK
    231     return fcntl_op_setlk == F_OFD_SETLK;
    232 #else
    233     return false;
    234 #endif
    235 }
    236 
    237 static int qemu_lock_fcntl(int fd, int64_t start, int64_t len, int fl_type)
    238 {
    239     int ret;
    240     struct flock fl = {
    241         .l_whence = SEEK_SET,
    242         .l_start  = start,
    243         .l_len    = len,
    244         .l_type   = fl_type,
    245     };
    246     qemu_probe_lock_ops();
    247     do {
    248         ret = fcntl(fd, fcntl_op_setlk, &fl);
    249     } while (ret == -1 && errno == EINTR);
    250     return ret == -1 ? -errno : 0;
    251 }
    252 
    253 int qemu_lock_fd(int fd, int64_t start, int64_t len, bool exclusive)
    254 {
    255     return qemu_lock_fcntl(fd, start, len, exclusive ? F_WRLCK : F_RDLCK);
    256 }
    257 
    258 int qemu_unlock_fd(int fd, int64_t start, int64_t len)
    259 {
    260     return qemu_lock_fcntl(fd, start, len, F_UNLCK);
    261 }
    262 
    263 int qemu_lock_fd_test(int fd, int64_t start, int64_t len, bool exclusive)
    264 {
    265     int ret;
    266     struct flock fl = {
    267         .l_whence = SEEK_SET,
    268         .l_start  = start,
    269         .l_len    = len,
    270         .l_type   = exclusive ? F_WRLCK : F_RDLCK,
    271     };
    272     qemu_probe_lock_ops();
    273     ret = fcntl(fd, fcntl_op_getlk, &fl);
    274     if (ret == -1) {
    275         return -errno;
    276     } else {
    277         return fl.l_type == F_UNLCK ? 0 : -EAGAIN;
    278     }
    279 }
    280 #endif
    281 
    282 static int qemu_open_cloexec(const char *name, int flags, mode_t mode)
    283 {
    284     int ret;
    285 #ifdef O_CLOEXEC
    286     ret = open(name, flags | O_CLOEXEC, mode);
    287 #else
    288     ret = open(name, flags, mode);
    289     if (ret >= 0) {
    290         qemu_set_cloexec(ret);
    291     }
    292 #endif
    293     return ret;
    294 }
    295 
    296 /*
    297  * Opens a file with FD_CLOEXEC set
    298  */
    299 static int
    300 qemu_open_internal(const char *name, int flags, mode_t mode, Error **errp)
    301 {
    302     int ret;
    303 
    304 #ifndef _WIN32
    305     const char *fdset_id_str;
    306 
    307     /* Attempt dup of fd from fd set */
    308     if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
    309         int64_t fdset_id;
    310         int dupfd;
    311 
    312         fdset_id = qemu_parse_fdset(fdset_id_str);
    313         if (fdset_id == -1) {
    314             error_setg(errp, "Could not parse fdset %s", name);
    315             errno = EINVAL;
    316             return -1;
    317         }
    318 
    319         dupfd = monitor_fdset_dup_fd_add(fdset_id, flags);
    320         if (dupfd == -1) {
    321             error_setg_errno(errp, errno, "Could not dup FD for %s flags %x",
    322                              name, flags);
    323             return -1;
    324         }
    325 
    326         return dupfd;
    327     }
    328 #endif
    329 
    330     ret = qemu_open_cloexec(name, flags, mode);
    331 
    332     if (ret == -1) {
    333         const char *action = flags & O_CREAT ? "create" : "open";
    334 #ifdef O_DIRECT
    335         /* Give more helpful error message for O_DIRECT */
    336         if (errno == EINVAL && (flags & O_DIRECT)) {
    337             ret = open(name, flags & ~O_DIRECT, mode);
    338             if (ret != -1) {
    339                 close(ret);
    340                 error_setg(errp, "Could not %s '%s': "
    341                            "filesystem does not support O_DIRECT",
    342                            action, name);
    343                 errno = EINVAL; /* restore first open()'s errno */
    344                 return -1;
    345             }
    346         }
    347 #endif /* O_DIRECT */
    348         error_setg_errno(errp, errno, "Could not %s '%s'",
    349                          action, name);
    350     }
    351 
    352     return ret;
    353 }
    354 
    355 
    356 int qemu_open(const char *name, int flags, Error **errp)
    357 {
    358     assert(!(flags & O_CREAT));
    359 
    360     return qemu_open_internal(name, flags, 0, errp);
    361 }
    362 
    363 
    364 int qemu_create(const char *name, int flags, mode_t mode, Error **errp)
    365 {
    366     assert(!(flags & O_CREAT));
    367 
    368     return qemu_open_internal(name, flags | O_CREAT, mode, errp);
    369 }
    370 
    371 
    372 int qemu_open_old(const char *name, int flags, ...)
    373 {
    374     va_list ap;
    375     mode_t mode = 0;
    376     int ret;
    377 
    378     va_start(ap, flags);
    379     if (flags & O_CREAT) {
    380         mode = va_arg(ap, int);
    381     }
    382     va_end(ap);
    383 
    384     ret = qemu_open_internal(name, flags, mode, NULL);
    385 
    386 #ifdef O_DIRECT
    387     if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
    388         error_report("file system may not support O_DIRECT");
    389         errno = EINVAL; /* in case it was clobbered */
    390     }
    391 #endif /* O_DIRECT */
    392 
    393     return ret;
    394 }
    395 
    396 int qemu_close(int fd)
    397 {
    398     int64_t fdset_id;
    399 
    400     /* Close fd that was dup'd from an fdset */
    401     fdset_id = monitor_fdset_dup_fd_find(fd);
    402     if (fdset_id != -1) {
    403         int ret;
    404 
    405         ret = close(fd);
    406         if (ret == 0) {
    407             monitor_fdset_dup_fd_remove(fd);
    408         }
    409 
    410         return ret;
    411     }
    412 
    413     return close(fd);
    414 }
    415 
    416 /*
    417  * Delete a file from the filesystem, unless the filename is /dev/fdset/...
    418  *
    419  * Returns: On success, zero is returned.  On error, -1 is returned,
    420  * and errno is set appropriately.
    421  */
    422 int qemu_unlink(const char *name)
    423 {
    424     if (g_str_has_prefix(name, "/dev/fdset/")) {
    425         return 0;
    426     }
    427 
    428     return unlink(name);
    429 }
    430 
    431 /*
    432  * A variant of write(2) which handles partial write.
    433  *
    434  * Return the number of bytes transferred.
    435  * Set errno if fewer than `count' bytes are written.
    436  *
    437  * This function don't work with non-blocking fd's.
    438  * Any of the possibilities with non-blocking fd's is bad:
    439  *   - return a short write (then name is wrong)
    440  *   - busy wait adding (errno == EAGAIN) to the loop
    441  */
    442 ssize_t qemu_write_full(int fd, const void *buf, size_t count)
    443 {
    444     ssize_t ret = 0;
    445     ssize_t total = 0;
    446 
    447     while (count) {
    448         ret = write(fd, buf, count);
    449         if (ret < 0) {
    450             if (errno == EINTR)
    451                 continue;
    452             break;
    453         }
    454 
    455         count -= ret;
    456         buf += ret;
    457         total += ret;
    458     }
    459 
    460     return total;
    461 }
    462 
    463 /*
    464  * Opens a socket with FD_CLOEXEC set
    465  */
    466 int qemu_socket(int domain, int type, int protocol)
    467 {
    468     int ret;
    469 
    470 #ifdef SOCK_CLOEXEC
    471     ret = socket(domain, type | SOCK_CLOEXEC, protocol);
    472     if (ret != -1 || errno != EINVAL) {
    473         return ret;
    474     }
    475 #endif
    476     ret = socket(domain, type, protocol);
    477     if (ret >= 0) {
    478         qemu_set_cloexec(ret);
    479     }
    480 
    481     return ret;
    482 }
    483 
    484 /*
    485  * Accept a connection and set FD_CLOEXEC
    486  */
    487 int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
    488 {
    489     int ret;
    490 
    491 #ifdef CONFIG_ACCEPT4
    492     ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
    493     if (ret != -1 || errno != ENOSYS) {
    494         return ret;
    495     }
    496 #endif
    497     ret = accept(s, addr, addrlen);
    498     if (ret >= 0) {
    499         qemu_set_cloexec(ret);
    500     }
    501 
    502     return ret;
    503 }
    504 
    505 ssize_t qemu_send_full(int s, const void *buf, size_t count)
    506 {
    507     ssize_t ret = 0;
    508     ssize_t total = 0;
    509 
    510     while (count) {
    511         ret = send(s, buf, count, 0);
    512         if (ret < 0) {
    513             if (errno == EINTR) {
    514                 continue;
    515             }
    516             break;
    517         }
    518 
    519         count -= ret;
    520         buf += ret;
    521         total += ret;
    522     }
    523 
    524     return total;
    525 }
    526 
    527 void qemu_set_hw_version(const char *version)
    528 {
    529     hw_version = version;
    530 }
    531 
    532 const char *qemu_hw_version(void)
    533 {
    534     return hw_version;
    535 }
    536 
    537 #ifdef _WIN32
    538 static void socket_cleanup(void)
    539 {
    540     WSACleanup();
    541 }
    542 #endif
    543 
    544 int socket_init(void)
    545 {
    546 #ifdef _WIN32
    547     WSADATA Data;
    548     int ret, err;
    549 
    550     ret = WSAStartup(MAKEWORD(2, 2), &Data);
    551     if (ret != 0) {
    552         err = WSAGetLastError();
    553         fprintf(stderr, "WSAStartup: %d\n", err);
    554         return -1;
    555     }
    556     atexit(socket_cleanup);
    557 #endif
    558     return 0;
    559 }
    560 
    561 
    562 #ifndef CONFIG_IOVEC
    563 static ssize_t
    564 readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
    565 {
    566     unsigned i = 0;
    567     ssize_t ret = 0;
    568     ssize_t off = 0;
    569     while (i < iov_cnt) {
    570         ssize_t r = do_write
    571             ? write(fd, iov[i].iov_base + off, iov[i].iov_len - off)
    572             : read(fd, iov[i].iov_base + off, iov[i].iov_len - off);
    573         if (r > 0) {
    574             ret += r;
    575             off += r;
    576             if (off < iov[i].iov_len) {
    577                 continue;
    578             }
    579         } else if (!r) {
    580             break;
    581         } else if (errno == EINTR) {
    582             continue;
    583         } else {
    584             /* else it is some "other" error,
    585              * only return if there was no data processed. */
    586             if (ret == 0) {
    587                 ret = -1;
    588             }
    589             break;
    590         }
    591         off = 0;
    592         i++;
    593     }
    594     return ret;
    595 }
    596 
    597 ssize_t
    598 readv(int fd, const struct iovec *iov, int iov_cnt)
    599 {
    600     return readv_writev(fd, iov, iov_cnt, false);
    601 }
    602 
    603 ssize_t
    604 writev(int fd, const struct iovec *iov, int iov_cnt)
    605 {
    606     return readv_writev(fd, iov, iov_cnt, true);
    607 }
    608 #endif
    609 
    610 /*
    611  * Make sure data goes on disk, but if possible do not bother to
    612  * write out the inode just for timestamp updates.
    613  *
    614  * Unfortunately even in 2009 many operating systems do not support
    615  * fdatasync and have to fall back to fsync.
    616  */
    617 int qemu_fdatasync(int fd)
    618 {
    619 #ifdef CONFIG_FDATASYNC
    620     return fdatasync(fd);
    621 #else
    622     return fsync(fd);
    623 #endif
    624 }