passthrough_ll.c (121610B)
1 /* 2 * FUSE: Filesystem in Userspace 3 * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 * 5 * This program can be distributed under the terms of the GNU GPLv2. 6 * See the file COPYING. 7 */ 8 9 /* 10 * 11 * This file system mirrors the existing file system hierarchy of the 12 * system, starting at the root file system. This is implemented by 13 * just "passing through" all requests to the corresponding user-space 14 * libc functions. In contrast to passthrough.c and passthrough_fh.c, 15 * this implementation uses the low-level API. Its performance should 16 * be the least bad among the three, but many operations are not 17 * implemented. In particular, it is not possible to remove files (or 18 * directories) because the code necessary to defer actual removal 19 * until the file is not opened anymore would make the example much 20 * more complicated. 21 * 22 * When writeback caching is enabled (-o writeback mount option), it 23 * is only possible to write to files for which the mounting user has 24 * read permissions. This is because the writeback cache requires the 25 * kernel to be able to issue read requests for all files (which the 26 * passthrough filesystem cannot satisfy if it can't read the file in 27 * the underlying filesystem). 28 * 29 * Compile with: 30 * 31 * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o 32 * passthrough_ll 33 * 34 * ## Source code ## 35 * \include passthrough_ll.c 36 */ 37 38 #include "qemu/osdep.h" 39 #include "qemu/timer.h" 40 #include "qemu-version.h" 41 #include "qemu/help-texts.h" 42 #include "fuse_virtio.h" 43 #include "fuse_log.h" 44 #include "fuse_lowlevel.h" 45 #include "standard-headers/linux/fuse.h" 46 #include <cap-ng.h> 47 #include <dirent.h> 48 #include <pthread.h> 49 #include <sys/file.h> 50 #include <sys/mount.h> 51 #include <sys/prctl.h> 52 #include <sys/resource.h> 53 #include <sys/syscall.h> 54 #include <sys/wait.h> 55 #include <sys/xattr.h> 56 #include <syslog.h> 57 #include <grp.h> 58 59 #include "qemu/cutils.h" 60 #include "passthrough_helpers.h" 61 #include "passthrough_seccomp.h" 62 63 /* Keep track of inode posix locks for each owner. */ 64 struct lo_inode_plock { 65 uint64_t lock_owner; 66 int fd; /* fd for OFD locks */ 67 }; 68 69 struct lo_map_elem { 70 union { 71 struct lo_inode *inode; 72 struct lo_dirp *dirp; 73 int fd; 74 ssize_t freelist; 75 }; 76 bool in_use; 77 }; 78 79 /* Maps FUSE fh or ino values to internal objects */ 80 struct lo_map { 81 struct lo_map_elem *elems; 82 size_t nelems; 83 ssize_t freelist; 84 }; 85 86 struct lo_key { 87 ino_t ino; 88 dev_t dev; 89 uint64_t mnt_id; 90 }; 91 92 struct lo_inode { 93 int fd; 94 95 /* 96 * Atomic reference count for this object. The nlookup field holds a 97 * reference and release it when nlookup reaches 0. 98 */ 99 gint refcount; 100 101 struct lo_key key; 102 103 /* 104 * This counter keeps the inode alive during the FUSE session. 105 * Incremented when the FUSE inode number is sent in a reply 106 * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is 107 * released by a FUSE_FORGET request. 108 * 109 * Note that this value is untrusted because the client can manipulate 110 * it arbitrarily using FUSE_FORGET requests. 111 * 112 * Protected by lo->mutex. 113 */ 114 uint64_t nlookup; 115 116 fuse_ino_t fuse_ino; 117 pthread_mutex_t plock_mutex; 118 GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ 119 120 mode_t filetype; 121 }; 122 123 struct lo_cred { 124 uid_t euid; 125 gid_t egid; 126 mode_t umask; 127 }; 128 129 enum { 130 CACHE_NONE, 131 CACHE_AUTO, 132 CACHE_ALWAYS, 133 }; 134 135 enum { 136 SANDBOX_NAMESPACE, 137 SANDBOX_CHROOT, 138 }; 139 140 typedef struct xattr_map_entry { 141 char *key; 142 char *prepend; 143 unsigned int flags; 144 } XattrMapEntry; 145 146 struct lo_data { 147 pthread_mutex_t mutex; 148 int sandbox; 149 int debug; 150 int writeback; 151 int flock; 152 int posix_lock; 153 int xattr; 154 char *xattrmap; 155 char *xattr_security_capability; 156 char *source; 157 char *modcaps; 158 double timeout; 159 int cache; 160 int timeout_set; 161 int readdirplus_set; 162 int readdirplus_clear; 163 int allow_direct_io; 164 int announce_submounts; 165 bool use_statx; 166 struct lo_inode root; 167 GHashTable *inodes; /* protected by lo->mutex */ 168 struct lo_map ino_map; /* protected by lo->mutex */ 169 struct lo_map dirp_map; /* protected by lo->mutex */ 170 struct lo_map fd_map; /* protected by lo->mutex */ 171 XattrMapEntry *xattr_map_list; 172 size_t xattr_map_nentries; 173 174 /* An O_PATH file descriptor to /proc/self/fd/ */ 175 int proc_self_fd; 176 /* An O_PATH file descriptor to /proc/self/task/ */ 177 int proc_self_task; 178 int user_killpriv_v2, killpriv_v2; 179 /* If set, virtiofsd is responsible for setting umask during creation */ 180 bool change_umask; 181 int user_posix_acl, posix_acl; 182 /* Keeps track if /proc/<pid>/attr/fscreate should be used or not */ 183 bool use_fscreate; 184 int user_security_label; 185 }; 186 187 static const struct fuse_opt lo_opts[] = { 188 { "sandbox=namespace", 189 offsetof(struct lo_data, sandbox), 190 SANDBOX_NAMESPACE }, 191 { "sandbox=chroot", 192 offsetof(struct lo_data, sandbox), 193 SANDBOX_CHROOT }, 194 { "writeback", offsetof(struct lo_data, writeback), 1 }, 195 { "no_writeback", offsetof(struct lo_data, writeback), 0 }, 196 { "source=%s", offsetof(struct lo_data, source), 0 }, 197 { "flock", offsetof(struct lo_data, flock), 1 }, 198 { "no_flock", offsetof(struct lo_data, flock), 0 }, 199 { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, 200 { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, 201 { "xattr", offsetof(struct lo_data, xattr), 1 }, 202 { "no_xattr", offsetof(struct lo_data, xattr), 0 }, 203 { "xattrmap=%s", offsetof(struct lo_data, xattrmap), 0 }, 204 { "modcaps=%s", offsetof(struct lo_data, modcaps), 0 }, 205 { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, 206 { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, 207 { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, 208 { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, 209 { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, 210 { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, 211 { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, 212 { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, 213 { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, 214 { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 }, 215 { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 }, 216 { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 }, 217 { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 }, 218 { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 }, 219 { "security_label", offsetof(struct lo_data, user_security_label), 1 }, 220 { "no_security_label", offsetof(struct lo_data, user_security_label), 0 }, 221 FUSE_OPT_END 222 }; 223 static bool use_syslog = false; 224 static int current_log_level; 225 static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, 226 uint64_t n); 227 228 static struct { 229 pthread_mutex_t mutex; 230 void *saved; 231 } cap; 232 /* That we loaded cap-ng in the current thread from the saved */ 233 static __thread bool cap_loaded = 0; 234 235 static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, 236 uint64_t mnt_id); 237 static int xattr_map_client(const struct lo_data *lo, const char *client_name, 238 char **out_name); 239 240 #define FCHDIR_NOFAIL(fd) do { \ 241 int fchdir_res = fchdir(fd); \ 242 assert(fchdir_res == 0); \ 243 } while (0) 244 245 static bool is_dot_or_dotdot(const char *name) 246 { 247 return name[0] == '.' && 248 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); 249 } 250 251 /* Is `path` a single path component that is not "." or ".."? */ 252 static bool is_safe_path_component(const char *path) 253 { 254 if (strchr(path, '/')) { 255 return false; 256 } 257 258 return !is_dot_or_dotdot(path); 259 } 260 261 static bool is_empty(const char *name) 262 { 263 return name[0] == '\0'; 264 } 265 266 static struct lo_data *lo_data(fuse_req_t req) 267 { 268 return (struct lo_data *)fuse_req_userdata(req); 269 } 270 271 /* 272 * Tries to figure out if /proc/<pid>/attr/fscreate is usable or not. With 273 * selinux=0, read from fscreate returns -EINVAL. 274 * 275 * TODO: Link with libselinux and use is_selinux_enabled() instead down 276 * the line. It probably will be more reliable indicator. 277 */ 278 static bool is_fscreate_usable(struct lo_data *lo) 279 { 280 char procname[64]; 281 int fscreate_fd; 282 size_t bytes_read; 283 284 sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); 285 fscreate_fd = openat(lo->proc_self_task, procname, O_RDWR); 286 if (fscreate_fd == -1) { 287 return false; 288 } 289 290 bytes_read = read(fscreate_fd, procname, 64); 291 close(fscreate_fd); 292 if (bytes_read == -1) { 293 return false; 294 } 295 return true; 296 } 297 298 /* Helpers to set/reset fscreate */ 299 static int open_set_proc_fscreate(struct lo_data *lo, const void *ctx, 300 size_t ctxlen, int *fd) 301 { 302 char procname[64]; 303 int fscreate_fd, err = 0; 304 size_t written; 305 306 sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); 307 fscreate_fd = openat(lo->proc_self_task, procname, O_WRONLY); 308 err = fscreate_fd == -1 ? errno : 0; 309 if (err) { 310 return err; 311 } 312 313 written = write(fscreate_fd, ctx, ctxlen); 314 err = written == -1 ? errno : 0; 315 if (err) { 316 goto out; 317 } 318 319 *fd = fscreate_fd; 320 return 0; 321 out: 322 close(fscreate_fd); 323 return err; 324 } 325 326 static void close_reset_proc_fscreate(int fd) 327 { 328 if ((write(fd, NULL, 0)) == -1) { 329 fuse_log(FUSE_LOG_WARNING, "Failed to reset fscreate. err=%d\n", errno); 330 } 331 close(fd); 332 return; 333 } 334 335 /* 336 * Load capng's state from our saved state if the current thread 337 * hadn't previously been loaded. 338 * returns 0 on success 339 */ 340 static int load_capng(void) 341 { 342 if (!cap_loaded) { 343 pthread_mutex_lock(&cap.mutex); 344 capng_restore_state(&cap.saved); 345 /* 346 * restore_state free's the saved copy 347 * so make another. 348 */ 349 cap.saved = capng_save_state(); 350 if (!cap.saved) { 351 pthread_mutex_unlock(&cap.mutex); 352 fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); 353 return -EINVAL; 354 } 355 pthread_mutex_unlock(&cap.mutex); 356 357 /* 358 * We want to use the loaded state for our pid, 359 * not the original 360 */ 361 capng_setpid(syscall(SYS_gettid)); 362 cap_loaded = true; 363 } 364 return 0; 365 } 366 367 /* 368 * Helpers for dropping and regaining effective capabilities. Returns 0 369 * on success, error otherwise 370 */ 371 static int drop_effective_cap(const char *cap_name, bool *cap_dropped) 372 { 373 int cap, ret; 374 375 cap = capng_name_to_capability(cap_name); 376 if (cap < 0) { 377 ret = errno; 378 fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", 379 cap_name, strerror(errno)); 380 goto out; 381 } 382 383 if (load_capng()) { 384 ret = errno; 385 fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); 386 goto out; 387 } 388 389 /* We dont have this capability in effective set already. */ 390 if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { 391 ret = 0; 392 goto out; 393 } 394 395 if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { 396 ret = errno; 397 fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); 398 goto out; 399 } 400 401 if (capng_apply(CAPNG_SELECT_CAPS)) { 402 ret = errno; 403 fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); 404 goto out; 405 } 406 407 ret = 0; 408 if (cap_dropped) { 409 *cap_dropped = true; 410 } 411 412 out: 413 return ret; 414 } 415 416 static int gain_effective_cap(const char *cap_name) 417 { 418 int cap; 419 int ret = 0; 420 421 cap = capng_name_to_capability(cap_name); 422 if (cap < 0) { 423 ret = errno; 424 fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", 425 cap_name, strerror(errno)); 426 goto out; 427 } 428 429 if (load_capng()) { 430 ret = errno; 431 fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); 432 goto out; 433 } 434 435 if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { 436 ret = errno; 437 fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); 438 goto out; 439 } 440 441 if (capng_apply(CAPNG_SELECT_CAPS)) { 442 ret = errno; 443 fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); 444 goto out; 445 } 446 ret = 0; 447 448 out: 449 return ret; 450 } 451 452 /* 453 * The host kernel normally drops security.capability xattr's on 454 * any write, however if we're remapping xattr names we need to drop 455 * whatever the clients security.capability is actually stored as. 456 */ 457 static int drop_security_capability(const struct lo_data *lo, int fd) 458 { 459 if (!lo->xattr_security_capability) { 460 /* We didn't remap the name, let the host kernel do it */ 461 return 0; 462 } 463 if (!fremovexattr(fd, lo->xattr_security_capability)) { 464 /* All good */ 465 return 0; 466 } 467 468 switch (errno) { 469 case ENODATA: 470 /* Attribute didn't exist, that's fine */ 471 return 0; 472 473 case ENOTSUP: 474 /* FS didn't support attribute anyway, also fine */ 475 return 0; 476 477 default: 478 /* Hmm other error */ 479 return errno; 480 } 481 } 482 483 static void lo_map_init(struct lo_map *map) 484 { 485 map->elems = NULL; 486 map->nelems = 0; 487 map->freelist = -1; 488 } 489 490 static void lo_map_destroy(struct lo_map *map) 491 { 492 g_free(map->elems); 493 } 494 495 static int lo_map_grow(struct lo_map *map, size_t new_nelems) 496 { 497 struct lo_map_elem *new_elems; 498 size_t i; 499 500 if (new_nelems <= map->nelems) { 501 return 1; 502 } 503 504 new_elems = g_try_realloc_n(map->elems, new_nelems, sizeof(map->elems[0])); 505 if (!new_elems) { 506 return 0; 507 } 508 509 for (i = map->nelems; i < new_nelems; i++) { 510 new_elems[i].freelist = i + 1; 511 new_elems[i].in_use = false; 512 } 513 new_elems[new_nelems - 1].freelist = -1; 514 515 map->elems = new_elems; 516 map->freelist = map->nelems; 517 map->nelems = new_nelems; 518 return 1; 519 } 520 521 static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) 522 { 523 struct lo_map_elem *elem; 524 525 if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { 526 return NULL; 527 } 528 529 elem = &map->elems[map->freelist]; 530 map->freelist = elem->freelist; 531 532 elem->in_use = true; 533 534 return elem; 535 } 536 537 static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) 538 { 539 ssize_t *prev; 540 541 if (!lo_map_grow(map, key + 1)) { 542 return NULL; 543 } 544 545 for (prev = &map->freelist; *prev != -1; 546 prev = &map->elems[*prev].freelist) { 547 if (*prev == key) { 548 struct lo_map_elem *elem = &map->elems[key]; 549 550 *prev = elem->freelist; 551 elem->in_use = true; 552 return elem; 553 } 554 } 555 return NULL; 556 } 557 558 static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) 559 { 560 if (key >= map->nelems) { 561 return NULL; 562 } 563 if (!map->elems[key].in_use) { 564 return NULL; 565 } 566 return &map->elems[key]; 567 } 568 569 static void lo_map_remove(struct lo_map *map, size_t key) 570 { 571 struct lo_map_elem *elem; 572 573 if (key >= map->nelems) { 574 return; 575 } 576 577 elem = &map->elems[key]; 578 if (!elem->in_use) { 579 return; 580 } 581 582 elem->in_use = false; 583 584 elem->freelist = map->freelist; 585 map->freelist = key; 586 } 587 588 /* Assumes lo->mutex is held */ 589 static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) 590 { 591 struct lo_map_elem *elem; 592 593 elem = lo_map_alloc_elem(&lo->fd_map); 594 if (!elem) { 595 return -1; 596 } 597 598 elem->fd = fd; 599 return elem - lo->fd_map.elems; 600 } 601 602 /* Assumes lo->mutex is held */ 603 static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) 604 { 605 struct lo_map_elem *elem; 606 607 elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); 608 if (!elem) { 609 return -1; 610 } 611 612 elem->dirp = dirp; 613 return elem - lo_data(req)->dirp_map.elems; 614 } 615 616 /* Assumes lo->mutex is held */ 617 static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) 618 { 619 struct lo_map_elem *elem; 620 621 elem = lo_map_alloc_elem(&lo_data(req)->ino_map); 622 if (!elem) { 623 return -1; 624 } 625 626 elem->inode = inode; 627 return elem - lo_data(req)->ino_map.elems; 628 } 629 630 static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) 631 { 632 struct lo_inode *inode = *inodep; 633 634 if (!inode) { 635 return; 636 } 637 638 *inodep = NULL; 639 640 if (g_atomic_int_dec_and_test(&inode->refcount)) { 641 close(inode->fd); 642 free(inode); 643 } 644 } 645 646 /* Caller must release refcount using lo_inode_put() */ 647 static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) 648 { 649 struct lo_data *lo = lo_data(req); 650 struct lo_map_elem *elem; 651 652 pthread_mutex_lock(&lo->mutex); 653 elem = lo_map_get(&lo->ino_map, ino); 654 if (elem) { 655 g_atomic_int_inc(&elem->inode->refcount); 656 } 657 pthread_mutex_unlock(&lo->mutex); 658 659 if (!elem) { 660 return NULL; 661 } 662 663 return elem->inode; 664 } 665 666 /* 667 * TODO Remove this helper and force callers to hold an inode refcount until 668 * they are done with the fd. This will be done in a later patch to make 669 * review easier. 670 */ 671 static int lo_fd(fuse_req_t req, fuse_ino_t ino) 672 { 673 struct lo_inode *inode = lo_inode(req, ino); 674 int fd; 675 676 if (!inode) { 677 return -1; 678 } 679 680 fd = inode->fd; 681 lo_inode_put(lo_data(req), &inode); 682 return fd; 683 } 684 685 /* 686 * Open a file descriptor for an inode. Returns -EBADF if the inode is not a 687 * regular file or a directory. 688 * 689 * Use this helper function instead of raw openat(2) to prevent security issues 690 * when a malicious client opens special files such as block device nodes. 691 * Symlink inodes are also rejected since symlinks must already have been 692 * traversed on the client side. 693 */ 694 static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, 695 int open_flags) 696 { 697 g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); 698 int fd; 699 700 if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { 701 return -EBADF; 702 } 703 704 /* 705 * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier 706 * that the inode is not a special file but if an external process races 707 * with us then symlinks are traversed here. It is not possible to escape 708 * the shared directory since it is mounted as "/" though. 709 */ 710 fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); 711 if (fd < 0) { 712 return -errno; 713 } 714 return fd; 715 } 716 717 static void lo_init(void *userdata, struct fuse_conn_info *conn) 718 { 719 struct lo_data *lo = (struct lo_data *)userdata; 720 721 if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { 722 conn->want |= FUSE_CAP_EXPORT_SUPPORT; 723 } 724 725 if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { 726 fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); 727 conn->want |= FUSE_CAP_WRITEBACK_CACHE; 728 } 729 if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { 730 if (lo->flock) { 731 fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); 732 conn->want |= FUSE_CAP_FLOCK_LOCKS; 733 } else { 734 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); 735 conn->want &= ~FUSE_CAP_FLOCK_LOCKS; 736 } 737 } 738 739 if (conn->capable & FUSE_CAP_POSIX_LOCKS) { 740 if (lo->posix_lock) { 741 fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); 742 conn->want |= FUSE_CAP_POSIX_LOCKS; 743 } else { 744 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); 745 conn->want &= ~FUSE_CAP_POSIX_LOCKS; 746 } 747 } 748 749 if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || 750 lo->readdirplus_clear) { 751 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); 752 conn->want &= ~FUSE_CAP_READDIRPLUS; 753 } 754 755 if (!(conn->capable & FUSE_CAP_SUBMOUNTS) && lo->announce_submounts) { 756 fuse_log(FUSE_LOG_WARNING, "lo_init: Cannot announce submounts, client " 757 "does not support it\n"); 758 lo->announce_submounts = false; 759 } 760 761 if (lo->user_killpriv_v2 == 1) { 762 /* 763 * User explicitly asked for this option. Enable it unconditionally. 764 * If connection does not have this capability, it should fail 765 * in fuse_lowlevel.c 766 */ 767 fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n"); 768 conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2; 769 lo->killpriv_v2 = 1; 770 } else { 771 /* 772 * Either user specified to disable killpriv_v2, or did not 773 * specify anything. Disable killpriv_v2 in both the cases. 774 */ 775 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n"); 776 conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2; 777 lo->killpriv_v2 = 0; 778 } 779 780 if (lo->user_posix_acl == 1) { 781 /* 782 * User explicitly asked for this option. Enable it unconditionally. 783 * If connection does not have this capability, print error message 784 * now. It will fail later in fuse_lowlevel.c 785 */ 786 if (!(conn->capable & FUSE_CAP_POSIX_ACL) || 787 !(conn->capable & FUSE_CAP_DONT_MASK) || 788 !(conn->capable & FUSE_CAP_SETXATTR_EXT)) { 789 fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable posix acl." 790 " kernel does not support FUSE_POSIX_ACL, FUSE_DONT_MASK" 791 " or FUSE_SETXATTR_EXT capability.\n"); 792 } else { 793 fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling posix acl\n"); 794 } 795 796 conn->want |= FUSE_CAP_POSIX_ACL | FUSE_CAP_DONT_MASK | 797 FUSE_CAP_SETXATTR_EXT; 798 lo->change_umask = true; 799 lo->posix_acl = true; 800 } else { 801 /* User either did not specify anything or wants it disabled */ 802 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n"); 803 conn->want &= ~FUSE_CAP_POSIX_ACL; 804 } 805 806 if (lo->user_security_label == 1) { 807 if (!(conn->capable & FUSE_CAP_SECURITY_CTX)) { 808 fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable security label." 809 " kernel does not support FUSE_SECURITY_CTX capability.\n"); 810 } 811 conn->want |= FUSE_CAP_SECURITY_CTX; 812 } else { 813 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling security label\n"); 814 conn->want &= ~FUSE_CAP_SECURITY_CTX; 815 } 816 } 817 818 static void lo_getattr(fuse_req_t req, fuse_ino_t ino, 819 struct fuse_file_info *fi) 820 { 821 int res; 822 struct stat buf; 823 struct lo_data *lo = lo_data(req); 824 825 (void)fi; 826 827 res = 828 fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 829 if (res == -1) { 830 return (void)fuse_reply_err(req, errno); 831 } 832 833 fuse_reply_attr(req, &buf, lo->timeout); 834 } 835 836 static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) 837 { 838 struct lo_data *lo = lo_data(req); 839 struct lo_map_elem *elem; 840 841 pthread_mutex_lock(&lo->mutex); 842 elem = lo_map_get(&lo->fd_map, fi->fh); 843 pthread_mutex_unlock(&lo->mutex); 844 845 if (!elem) { 846 return -1; 847 } 848 849 return elem->fd; 850 } 851 852 static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, 853 int valid, struct fuse_file_info *fi) 854 { 855 int saverr; 856 char procname[64]; 857 struct lo_data *lo = lo_data(req); 858 struct lo_inode *inode; 859 int ifd; 860 int res; 861 int fd = -1; 862 863 inode = lo_inode(req, ino); 864 if (!inode) { 865 fuse_reply_err(req, EBADF); 866 return; 867 } 868 869 ifd = inode->fd; 870 871 /* If fi->fh is invalid we'll report EBADF later */ 872 if (fi) { 873 fd = lo_fi_fd(req, fi); 874 } 875 876 if (valid & FUSE_SET_ATTR_MODE) { 877 if (fi) { 878 res = fchmod(fd, attr->st_mode); 879 } else { 880 sprintf(procname, "%i", ifd); 881 res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); 882 } 883 if (res == -1) { 884 saverr = errno; 885 goto out_err; 886 } 887 } 888 if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { 889 uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; 890 gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; 891 892 saverr = drop_security_capability(lo, ifd); 893 if (saverr) { 894 goto out_err; 895 } 896 897 res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 898 if (res == -1) { 899 saverr = errno; 900 goto out_err; 901 } 902 } 903 if (valid & FUSE_SET_ATTR_SIZE) { 904 int truncfd; 905 bool kill_suidgid; 906 bool cap_fsetid_dropped = false; 907 908 kill_suidgid = lo->killpriv_v2 && (valid & FUSE_SET_ATTR_KILL_SUIDGID); 909 if (fi) { 910 truncfd = fd; 911 } else { 912 truncfd = lo_inode_open(lo, inode, O_RDWR); 913 if (truncfd < 0) { 914 saverr = -truncfd; 915 goto out_err; 916 } 917 } 918 919 saverr = drop_security_capability(lo, truncfd); 920 if (saverr) { 921 if (!fi) { 922 close(truncfd); 923 } 924 goto out_err; 925 } 926 927 if (kill_suidgid) { 928 res = drop_effective_cap("FSETID", &cap_fsetid_dropped); 929 if (res != 0) { 930 saverr = res; 931 if (!fi) { 932 close(truncfd); 933 } 934 goto out_err; 935 } 936 } 937 938 res = ftruncate(truncfd, attr->st_size); 939 saverr = res == -1 ? errno : 0; 940 941 if (cap_fsetid_dropped) { 942 if (gain_effective_cap("FSETID")) { 943 fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); 944 } 945 } 946 if (!fi) { 947 close(truncfd); 948 } 949 if (res == -1) { 950 goto out_err; 951 } 952 } 953 if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { 954 struct timespec tv[2]; 955 956 tv[0].tv_sec = 0; 957 tv[1].tv_sec = 0; 958 tv[0].tv_nsec = UTIME_OMIT; 959 tv[1].tv_nsec = UTIME_OMIT; 960 961 if (valid & FUSE_SET_ATTR_ATIME_NOW) { 962 tv[0].tv_nsec = UTIME_NOW; 963 } else if (valid & FUSE_SET_ATTR_ATIME) { 964 tv[0] = attr->st_atim; 965 } 966 967 if (valid & FUSE_SET_ATTR_MTIME_NOW) { 968 tv[1].tv_nsec = UTIME_NOW; 969 } else if (valid & FUSE_SET_ATTR_MTIME) { 970 tv[1] = attr->st_mtim; 971 } 972 973 if (fi) { 974 res = futimens(fd, tv); 975 } else { 976 sprintf(procname, "%i", inode->fd); 977 res = utimensat(lo->proc_self_fd, procname, tv, 0); 978 } 979 if (res == -1) { 980 saverr = errno; 981 goto out_err; 982 } 983 } 984 lo_inode_put(lo, &inode); 985 986 return lo_getattr(req, ino, fi); 987 988 out_err: 989 lo_inode_put(lo, &inode); 990 fuse_reply_err(req, saverr); 991 } 992 993 static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, 994 uint64_t mnt_id) 995 { 996 struct lo_inode *p; 997 struct lo_key key = { 998 .ino = st->st_ino, 999 .dev = st->st_dev, 1000 .mnt_id = mnt_id, 1001 }; 1002 1003 pthread_mutex_lock(&lo->mutex); 1004 p = g_hash_table_lookup(lo->inodes, &key); 1005 if (p) { 1006 assert(p->nlookup > 0); 1007 p->nlookup++; 1008 g_atomic_int_inc(&p->refcount); 1009 } 1010 pthread_mutex_unlock(&lo->mutex); 1011 1012 return p; 1013 } 1014 1015 /* value_destroy_func for posix_locks GHashTable */ 1016 static void posix_locks_value_destroy(gpointer data) 1017 { 1018 struct lo_inode_plock *plock = data; 1019 1020 /* 1021 * We had used open() for locks and had only one fd. So 1022 * closing this fd should release all OFD locks. 1023 */ 1024 close(plock->fd); 1025 free(plock); 1026 } 1027 1028 static int do_statx(struct lo_data *lo, int dirfd, const char *pathname, 1029 struct stat *statbuf, int flags, uint64_t *mnt_id) 1030 { 1031 int res; 1032 1033 #if defined(CONFIG_STATX) && defined(CONFIG_STATX_MNT_ID) 1034 if (lo->use_statx) { 1035 struct statx statxbuf; 1036 1037 res = statx(dirfd, pathname, flags, STATX_BASIC_STATS | STATX_MNT_ID, 1038 &statxbuf); 1039 if (!res) { 1040 memset(statbuf, 0, sizeof(*statbuf)); 1041 statbuf->st_dev = makedev(statxbuf.stx_dev_major, 1042 statxbuf.stx_dev_minor); 1043 statbuf->st_ino = statxbuf.stx_ino; 1044 statbuf->st_mode = statxbuf.stx_mode; 1045 statbuf->st_nlink = statxbuf.stx_nlink; 1046 statbuf->st_uid = statxbuf.stx_uid; 1047 statbuf->st_gid = statxbuf.stx_gid; 1048 statbuf->st_rdev = makedev(statxbuf.stx_rdev_major, 1049 statxbuf.stx_rdev_minor); 1050 statbuf->st_size = statxbuf.stx_size; 1051 statbuf->st_blksize = statxbuf.stx_blksize; 1052 statbuf->st_blocks = statxbuf.stx_blocks; 1053 statbuf->st_atim.tv_sec = statxbuf.stx_atime.tv_sec; 1054 statbuf->st_atim.tv_nsec = statxbuf.stx_atime.tv_nsec; 1055 statbuf->st_mtim.tv_sec = statxbuf.stx_mtime.tv_sec; 1056 statbuf->st_mtim.tv_nsec = statxbuf.stx_mtime.tv_nsec; 1057 statbuf->st_ctim.tv_sec = statxbuf.stx_ctime.tv_sec; 1058 statbuf->st_ctim.tv_nsec = statxbuf.stx_ctime.tv_nsec; 1059 1060 if (statxbuf.stx_mask & STATX_MNT_ID) { 1061 *mnt_id = statxbuf.stx_mnt_id; 1062 } else { 1063 *mnt_id = 0; 1064 } 1065 return 0; 1066 } else if (errno != ENOSYS) { 1067 return -1; 1068 } 1069 lo->use_statx = false; 1070 /* fallback */ 1071 } 1072 #endif 1073 res = fstatat(dirfd, pathname, statbuf, flags); 1074 if (res == -1) { 1075 return -1; 1076 } 1077 *mnt_id = 0; 1078 1079 return 0; 1080 } 1081 1082 /* 1083 * Increments nlookup on the inode on success. unref_inode_lolocked() must be 1084 * called eventually to decrement nlookup again. If inodep is non-NULL, the 1085 * inode pointer is stored and the caller must call lo_inode_put(). 1086 */ 1087 static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, 1088 struct fuse_entry_param *e, 1089 struct lo_inode **inodep) 1090 { 1091 int newfd; 1092 int res; 1093 int saverr; 1094 uint64_t mnt_id; 1095 struct lo_data *lo = lo_data(req); 1096 struct lo_inode *inode = NULL; 1097 struct lo_inode *dir = lo_inode(req, parent); 1098 1099 if (inodep) { 1100 *inodep = NULL; /* in case there is an error */ 1101 } 1102 1103 /* 1104 * name_to_handle_at() and open_by_handle_at() can reach here with fuse 1105 * mount point in guest, but we don't have its inode info in the 1106 * ino_map. 1107 */ 1108 if (!dir) { 1109 return ENOENT; 1110 } 1111 1112 memset(e, 0, sizeof(*e)); 1113 e->attr_timeout = lo->timeout; 1114 e->entry_timeout = lo->timeout; 1115 1116 /* Do not allow escaping root directory */ 1117 if (dir == &lo->root && strcmp(name, "..") == 0) { 1118 name = "."; 1119 } 1120 1121 newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); 1122 if (newfd == -1) { 1123 goto out_err; 1124 } 1125 1126 res = do_statx(lo, newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, 1127 &mnt_id); 1128 if (res == -1) { 1129 goto out_err; 1130 } 1131 1132 if (S_ISDIR(e->attr.st_mode) && lo->announce_submounts && 1133 (e->attr.st_dev != dir->key.dev || mnt_id != dir->key.mnt_id)) { 1134 e->attr_flags |= FUSE_ATTR_SUBMOUNT; 1135 } 1136 1137 inode = lo_find(lo, &e->attr, mnt_id); 1138 if (inode) { 1139 close(newfd); 1140 } else { 1141 inode = calloc(1, sizeof(struct lo_inode)); 1142 if (!inode) { 1143 goto out_err; 1144 } 1145 1146 /* cache only filetype */ 1147 inode->filetype = (e->attr.st_mode & S_IFMT); 1148 1149 /* 1150 * One for the caller and one for nlookup (released in 1151 * unref_inode_lolocked()) 1152 */ 1153 g_atomic_int_set(&inode->refcount, 2); 1154 1155 inode->nlookup = 1; 1156 inode->fd = newfd; 1157 inode->key.ino = e->attr.st_ino; 1158 inode->key.dev = e->attr.st_dev; 1159 inode->key.mnt_id = mnt_id; 1160 if (lo->posix_lock) { 1161 pthread_mutex_init(&inode->plock_mutex, NULL); 1162 inode->posix_locks = g_hash_table_new_full( 1163 g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); 1164 } 1165 pthread_mutex_lock(&lo->mutex); 1166 inode->fuse_ino = lo_add_inode_mapping(req, inode); 1167 g_hash_table_insert(lo->inodes, &inode->key, inode); 1168 pthread_mutex_unlock(&lo->mutex); 1169 } 1170 e->ino = inode->fuse_ino; 1171 1172 /* Transfer ownership of inode pointer to caller or drop it */ 1173 if (inodep) { 1174 *inodep = inode; 1175 } else { 1176 lo_inode_put(lo, &inode); 1177 } 1178 1179 lo_inode_put(lo, &dir); 1180 1181 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, 1182 name, (unsigned long long)e->ino); 1183 1184 return 0; 1185 1186 out_err: 1187 saverr = errno; 1188 if (newfd != -1) { 1189 close(newfd); 1190 } 1191 lo_inode_put(lo, &inode); 1192 lo_inode_put(lo, &dir); 1193 return saverr; 1194 } 1195 1196 static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) 1197 { 1198 struct fuse_entry_param e; 1199 int err; 1200 1201 fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, 1202 name); 1203 1204 if (is_empty(name)) { 1205 fuse_reply_err(req, ENOENT); 1206 return; 1207 } 1208 1209 /* 1210 * Don't use is_safe_path_component(), allow "." and ".." for NFS export 1211 * support. 1212 */ 1213 if (strchr(name, '/')) { 1214 fuse_reply_err(req, EINVAL); 1215 return; 1216 } 1217 1218 err = lo_do_lookup(req, parent, name, &e, NULL); 1219 if (err) { 1220 fuse_reply_err(req, err); 1221 } else { 1222 fuse_reply_entry(req, &e); 1223 } 1224 } 1225 1226 /* 1227 * On some archs, setres*id is limited to 2^16 but they 1228 * provide setres*id32 variants that allow 2^32. 1229 * Others just let setres*id do 2^32 anyway. 1230 */ 1231 #ifdef SYS_setresgid32 1232 #define OURSYS_setresgid SYS_setresgid32 1233 #else 1234 #define OURSYS_setresgid SYS_setresgid 1235 #endif 1236 1237 #ifdef SYS_setresuid32 1238 #define OURSYS_setresuid SYS_setresuid32 1239 #else 1240 #define OURSYS_setresuid SYS_setresuid 1241 #endif 1242 1243 static void drop_supplementary_groups(void) 1244 { 1245 int ret; 1246 1247 ret = getgroups(0, NULL); 1248 if (ret == -1) { 1249 fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", 1250 errno, strerror(errno)); 1251 exit(1); 1252 } 1253 1254 if (!ret) { 1255 return; 1256 } 1257 1258 /* Drop all supplementary groups. We should not need it */ 1259 ret = setgroups(0, NULL); 1260 if (ret == -1) { 1261 fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", 1262 errno, strerror(errno)); 1263 exit(1); 1264 } 1265 } 1266 1267 /* 1268 * Change to uid/gid of caller so that file is created with 1269 * ownership of caller. 1270 * TODO: What about selinux context? 1271 */ 1272 static int lo_change_cred(fuse_req_t req, struct lo_cred *old, 1273 bool change_umask) 1274 { 1275 int res; 1276 1277 old->euid = geteuid(); 1278 old->egid = getegid(); 1279 1280 res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); 1281 if (res == -1) { 1282 return errno; 1283 } 1284 1285 res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); 1286 if (res == -1) { 1287 int errno_save = errno; 1288 1289 syscall(OURSYS_setresgid, -1, old->egid, -1); 1290 return errno_save; 1291 } 1292 1293 if (change_umask) { 1294 old->umask = umask(req->ctx.umask); 1295 } 1296 return 0; 1297 } 1298 1299 /* Regain Privileges */ 1300 static void lo_restore_cred(struct lo_cred *old, bool restore_umask) 1301 { 1302 int res; 1303 1304 res = syscall(OURSYS_setresuid, -1, old->euid, -1); 1305 if (res == -1) { 1306 fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); 1307 exit(1); 1308 } 1309 1310 res = syscall(OURSYS_setresgid, -1, old->egid, -1); 1311 if (res == -1) { 1312 fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); 1313 exit(1); 1314 } 1315 1316 if (restore_umask) 1317 umask(old->umask); 1318 } 1319 1320 /* 1321 * A helper to change cred and drop capability. Returns 0 on success and 1322 * errno on error 1323 */ 1324 static int lo_drop_cap_change_cred(fuse_req_t req, struct lo_cred *old, 1325 bool change_umask, const char *cap_name, 1326 bool *cap_dropped) 1327 { 1328 int ret; 1329 bool __cap_dropped; 1330 1331 assert(cap_name); 1332 1333 ret = drop_effective_cap(cap_name, &__cap_dropped); 1334 if (ret) { 1335 return ret; 1336 } 1337 1338 ret = lo_change_cred(req, old, change_umask); 1339 if (ret) { 1340 if (__cap_dropped) { 1341 if (gain_effective_cap(cap_name)) { 1342 fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name); 1343 } 1344 } 1345 } 1346 1347 if (cap_dropped) { 1348 *cap_dropped = __cap_dropped; 1349 } 1350 return ret; 1351 } 1352 1353 static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask, 1354 const char *cap_name) 1355 { 1356 assert(cap_name); 1357 1358 lo_restore_cred(old, restore_umask); 1359 1360 if (gain_effective_cap(cap_name)) { 1361 fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name); 1362 } 1363 } 1364 1365 static int do_mknod_symlink_secctx(fuse_req_t req, struct lo_inode *dir, 1366 const char *name, const char *secctx_name) 1367 { 1368 int path_fd, err; 1369 char procname[64]; 1370 struct lo_data *lo = lo_data(req); 1371 1372 if (!req->secctx.ctxlen) { 1373 return 0; 1374 } 1375 1376 /* Open newly created element with O_PATH */ 1377 path_fd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); 1378 err = path_fd == -1 ? errno : 0; 1379 if (err) { 1380 return err; 1381 } 1382 sprintf(procname, "%i", path_fd); 1383 FCHDIR_NOFAIL(lo->proc_self_fd); 1384 /* Set security context. This is not atomic w.r.t file creation */ 1385 err = setxattr(procname, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 1386 0); 1387 if (err) { 1388 err = errno; 1389 } 1390 FCHDIR_NOFAIL(lo->root.fd); 1391 close(path_fd); 1392 return err; 1393 } 1394 1395 static int do_mknod_symlink(fuse_req_t req, struct lo_inode *dir, 1396 const char *name, mode_t mode, dev_t rdev, 1397 const char *link) 1398 { 1399 int err, fscreate_fd = -1; 1400 const char *secctx_name = req->secctx.name; 1401 struct lo_cred old = {}; 1402 struct lo_data *lo = lo_data(req); 1403 char *mapped_name = NULL; 1404 bool secctx_enabled = req->secctx.ctxlen; 1405 bool do_fscreate = false; 1406 1407 if (secctx_enabled && lo->xattrmap) { 1408 err = xattr_map_client(lo, req->secctx.name, &mapped_name); 1409 if (err < 0) { 1410 return -err; 1411 } 1412 secctx_name = mapped_name; 1413 } 1414 1415 /* 1416 * If security xattr has not been remapped and selinux is enabled on 1417 * host, set fscreate and no need to do a setxattr() after file creation 1418 */ 1419 if (secctx_enabled && !mapped_name && lo->use_fscreate) { 1420 do_fscreate = true; 1421 err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, 1422 &fscreate_fd); 1423 if (err) { 1424 goto out; 1425 } 1426 } 1427 1428 err = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode)); 1429 if (err) { 1430 goto out; 1431 } 1432 1433 err = mknod_wrapper(dir->fd, name, link, mode, rdev); 1434 err = err == -1 ? errno : 0; 1435 lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode)); 1436 if (err) { 1437 goto out; 1438 } 1439 1440 if (!do_fscreate) { 1441 err = do_mknod_symlink_secctx(req, dir, name, secctx_name); 1442 if (err) { 1443 unlinkat(dir->fd, name, S_ISDIR(mode) ? AT_REMOVEDIR : 0); 1444 } 1445 } 1446 out: 1447 if (fscreate_fd != -1) { 1448 close_reset_proc_fscreate(fscreate_fd); 1449 } 1450 g_free(mapped_name); 1451 return err; 1452 } 1453 1454 static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, 1455 const char *name, mode_t mode, dev_t rdev, 1456 const char *link) 1457 { 1458 int saverr; 1459 struct lo_data *lo = lo_data(req); 1460 struct lo_inode *dir; 1461 struct fuse_entry_param e; 1462 1463 if (is_empty(name)) { 1464 fuse_reply_err(req, ENOENT); 1465 return; 1466 } 1467 1468 if (!is_safe_path_component(name)) { 1469 fuse_reply_err(req, EINVAL); 1470 return; 1471 } 1472 1473 dir = lo_inode(req, parent); 1474 if (!dir) { 1475 fuse_reply_err(req, EBADF); 1476 return; 1477 } 1478 1479 saverr = do_mknod_symlink(req, dir, name, mode, rdev, link); 1480 if (saverr) { 1481 goto out; 1482 } 1483 1484 saverr = lo_do_lookup(req, parent, name, &e, NULL); 1485 if (saverr) { 1486 goto out; 1487 } 1488 1489 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, 1490 name, (unsigned long long)e.ino); 1491 1492 fuse_reply_entry(req, &e); 1493 lo_inode_put(lo, &dir); 1494 return; 1495 1496 out: 1497 lo_inode_put(lo, &dir); 1498 fuse_reply_err(req, saverr); 1499 } 1500 1501 static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, 1502 mode_t mode, dev_t rdev) 1503 { 1504 lo_mknod_symlink(req, parent, name, mode, rdev, NULL); 1505 } 1506 1507 static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, 1508 mode_t mode) 1509 { 1510 lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); 1511 } 1512 1513 static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, 1514 const char *name) 1515 { 1516 lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); 1517 } 1518 1519 static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, 1520 const char *name) 1521 { 1522 int res; 1523 struct lo_data *lo = lo_data(req); 1524 struct lo_inode *parent_inode; 1525 struct lo_inode *inode; 1526 struct fuse_entry_param e; 1527 char procname[64]; 1528 int saverr; 1529 1530 if (is_empty(name)) { 1531 fuse_reply_err(req, ENOENT); 1532 return; 1533 } 1534 1535 if (!is_safe_path_component(name)) { 1536 fuse_reply_err(req, EINVAL); 1537 return; 1538 } 1539 1540 parent_inode = lo_inode(req, parent); 1541 inode = lo_inode(req, ino); 1542 if (!parent_inode || !inode) { 1543 errno = EBADF; 1544 goto out_err; 1545 } 1546 1547 memset(&e, 0, sizeof(struct fuse_entry_param)); 1548 e.attr_timeout = lo->timeout; 1549 e.entry_timeout = lo->timeout; 1550 1551 sprintf(procname, "%i", inode->fd); 1552 res = linkat(lo->proc_self_fd, procname, parent_inode->fd, name, 1553 AT_SYMLINK_FOLLOW); 1554 if (res == -1) { 1555 goto out_err; 1556 } 1557 1558 res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 1559 if (res == -1) { 1560 goto out_err; 1561 } 1562 1563 pthread_mutex_lock(&lo->mutex); 1564 inode->nlookup++; 1565 pthread_mutex_unlock(&lo->mutex); 1566 e.ino = inode->fuse_ino; 1567 1568 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, 1569 name, (unsigned long long)e.ino); 1570 1571 fuse_reply_entry(req, &e); 1572 lo_inode_put(lo, &parent_inode); 1573 lo_inode_put(lo, &inode); 1574 return; 1575 1576 out_err: 1577 saverr = errno; 1578 lo_inode_put(lo, &parent_inode); 1579 lo_inode_put(lo, &inode); 1580 fuse_reply_err(req, saverr); 1581 } 1582 1583 /* Increments nlookup and caller must release refcount using lo_inode_put() */ 1584 static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, 1585 const char *name) 1586 { 1587 int res; 1588 uint64_t mnt_id; 1589 struct stat attr; 1590 struct lo_data *lo = lo_data(req); 1591 struct lo_inode *dir = lo_inode(req, parent); 1592 1593 if (!dir) { 1594 return NULL; 1595 } 1596 1597 res = do_statx(lo, dir->fd, name, &attr, AT_SYMLINK_NOFOLLOW, &mnt_id); 1598 lo_inode_put(lo, &dir); 1599 if (res == -1) { 1600 return NULL; 1601 } 1602 1603 return lo_find(lo, &attr, mnt_id); 1604 } 1605 1606 static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) 1607 { 1608 int res; 1609 struct lo_inode *inode; 1610 struct lo_data *lo = lo_data(req); 1611 1612 if (is_empty(name)) { 1613 fuse_reply_err(req, ENOENT); 1614 return; 1615 } 1616 1617 if (!is_safe_path_component(name)) { 1618 fuse_reply_err(req, EINVAL); 1619 return; 1620 } 1621 1622 inode = lookup_name(req, parent, name); 1623 if (!inode) { 1624 fuse_reply_err(req, EIO); 1625 return; 1626 } 1627 1628 res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); 1629 1630 fuse_reply_err(req, res == -1 ? errno : 0); 1631 unref_inode_lolocked(lo, inode, 1); 1632 lo_inode_put(lo, &inode); 1633 } 1634 1635 static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, 1636 fuse_ino_t newparent, const char *newname, 1637 unsigned int flags) 1638 { 1639 int res; 1640 struct lo_inode *parent_inode; 1641 struct lo_inode *newparent_inode; 1642 struct lo_inode *oldinode = NULL; 1643 struct lo_inode *newinode = NULL; 1644 struct lo_data *lo = lo_data(req); 1645 1646 if (is_empty(name) || is_empty(newname)) { 1647 fuse_reply_err(req, ENOENT); 1648 return; 1649 } 1650 1651 if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { 1652 fuse_reply_err(req, EINVAL); 1653 return; 1654 } 1655 1656 parent_inode = lo_inode(req, parent); 1657 newparent_inode = lo_inode(req, newparent); 1658 if (!parent_inode || !newparent_inode) { 1659 fuse_reply_err(req, EBADF); 1660 goto out; 1661 } 1662 1663 oldinode = lookup_name(req, parent, name); 1664 newinode = lookup_name(req, newparent, newname); 1665 1666 if (!oldinode) { 1667 fuse_reply_err(req, EIO); 1668 goto out; 1669 } 1670 1671 if (flags) { 1672 #ifndef SYS_renameat2 1673 fuse_reply_err(req, EINVAL); 1674 #else 1675 res = syscall(SYS_renameat2, parent_inode->fd, name, 1676 newparent_inode->fd, newname, flags); 1677 if (res == -1 && errno == ENOSYS) { 1678 fuse_reply_err(req, EINVAL); 1679 } else { 1680 fuse_reply_err(req, res == -1 ? errno : 0); 1681 } 1682 #endif 1683 goto out; 1684 } 1685 1686 res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); 1687 1688 fuse_reply_err(req, res == -1 ? errno : 0); 1689 out: 1690 unref_inode_lolocked(lo, oldinode, 1); 1691 unref_inode_lolocked(lo, newinode, 1); 1692 lo_inode_put(lo, &oldinode); 1693 lo_inode_put(lo, &newinode); 1694 lo_inode_put(lo, &parent_inode); 1695 lo_inode_put(lo, &newparent_inode); 1696 } 1697 1698 static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) 1699 { 1700 int res; 1701 struct lo_inode *inode; 1702 struct lo_data *lo = lo_data(req); 1703 1704 if (is_empty(name)) { 1705 fuse_reply_err(req, ENOENT); 1706 return; 1707 } 1708 1709 if (!is_safe_path_component(name)) { 1710 fuse_reply_err(req, EINVAL); 1711 return; 1712 } 1713 1714 inode = lookup_name(req, parent, name); 1715 if (!inode) { 1716 fuse_reply_err(req, EIO); 1717 return; 1718 } 1719 1720 res = unlinkat(lo_fd(req, parent), name, 0); 1721 1722 fuse_reply_err(req, res == -1 ? errno : 0); 1723 unref_inode_lolocked(lo, inode, 1); 1724 lo_inode_put(lo, &inode); 1725 } 1726 1727 /* To be called with lo->mutex held */ 1728 static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) 1729 { 1730 if (!inode) { 1731 return; 1732 } 1733 1734 assert(inode->nlookup >= n); 1735 inode->nlookup -= n; 1736 if (!inode->nlookup) { 1737 lo_map_remove(&lo->ino_map, inode->fuse_ino); 1738 g_hash_table_remove(lo->inodes, &inode->key); 1739 if (lo->posix_lock) { 1740 if (g_hash_table_size(inode->posix_locks)) { 1741 fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); 1742 } 1743 g_hash_table_destroy(inode->posix_locks); 1744 pthread_mutex_destroy(&inode->plock_mutex); 1745 } 1746 /* Drop our refcount from lo_do_lookup() */ 1747 lo_inode_put(lo, &inode); 1748 } 1749 } 1750 1751 static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, 1752 uint64_t n) 1753 { 1754 if (!inode) { 1755 return; 1756 } 1757 1758 pthread_mutex_lock(&lo->mutex); 1759 unref_inode(lo, inode, n); 1760 pthread_mutex_unlock(&lo->mutex); 1761 } 1762 1763 static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) 1764 { 1765 struct lo_data *lo = lo_data(req); 1766 struct lo_inode *inode; 1767 1768 inode = lo_inode(req, ino); 1769 if (!inode) { 1770 return; 1771 } 1772 1773 fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", 1774 (unsigned long long)ino, (unsigned long long)inode->nlookup, 1775 (unsigned long long)nlookup); 1776 1777 unref_inode_lolocked(lo, inode, nlookup); 1778 lo_inode_put(lo, &inode); 1779 } 1780 1781 static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) 1782 { 1783 lo_forget_one(req, ino, nlookup); 1784 fuse_reply_none(req); 1785 } 1786 1787 static void lo_forget_multi(fuse_req_t req, size_t count, 1788 struct fuse_forget_data *forgets) 1789 { 1790 int i; 1791 1792 for (i = 0; i < count; i++) { 1793 lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); 1794 } 1795 fuse_reply_none(req); 1796 } 1797 1798 static void lo_readlink(fuse_req_t req, fuse_ino_t ino) 1799 { 1800 char buf[PATH_MAX + 1]; 1801 int res; 1802 1803 res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); 1804 if (res == -1) { 1805 return (void)fuse_reply_err(req, errno); 1806 } 1807 1808 if (res == sizeof(buf)) { 1809 return (void)fuse_reply_err(req, ENAMETOOLONG); 1810 } 1811 1812 buf[res] = '\0'; 1813 1814 fuse_reply_readlink(req, buf); 1815 } 1816 1817 struct lo_dirp { 1818 gint refcount; 1819 DIR *dp; 1820 struct dirent *entry; 1821 off_t offset; 1822 }; 1823 1824 static void lo_dirp_put(struct lo_dirp **dp) 1825 { 1826 struct lo_dirp *d = *dp; 1827 1828 if (!d) { 1829 return; 1830 } 1831 *dp = NULL; 1832 1833 if (g_atomic_int_dec_and_test(&d->refcount)) { 1834 closedir(d->dp); 1835 free(d); 1836 } 1837 } 1838 1839 /* Call lo_dirp_put() on the return value when no longer needed */ 1840 static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) 1841 { 1842 struct lo_data *lo = lo_data(req); 1843 struct lo_map_elem *elem; 1844 1845 pthread_mutex_lock(&lo->mutex); 1846 elem = lo_map_get(&lo->dirp_map, fi->fh); 1847 if (elem) { 1848 g_atomic_int_inc(&elem->dirp->refcount); 1849 } 1850 pthread_mutex_unlock(&lo->mutex); 1851 if (!elem) { 1852 return NULL; 1853 } 1854 1855 return elem->dirp; 1856 } 1857 1858 static void lo_opendir(fuse_req_t req, fuse_ino_t ino, 1859 struct fuse_file_info *fi) 1860 { 1861 int error = ENOMEM; 1862 struct lo_data *lo = lo_data(req); 1863 struct lo_dirp *d; 1864 int fd; 1865 ssize_t fh; 1866 1867 d = calloc(1, sizeof(struct lo_dirp)); 1868 if (d == NULL) { 1869 goto out_err; 1870 } 1871 1872 fd = openat(lo_fd(req, ino), ".", O_RDONLY); 1873 if (fd == -1) { 1874 goto out_errno; 1875 } 1876 1877 d->dp = fdopendir(fd); 1878 if (d->dp == NULL) { 1879 goto out_errno; 1880 } 1881 1882 d->offset = 0; 1883 d->entry = NULL; 1884 1885 g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ 1886 pthread_mutex_lock(&lo->mutex); 1887 fh = lo_add_dirp_mapping(req, d); 1888 pthread_mutex_unlock(&lo->mutex); 1889 if (fh == -1) { 1890 goto out_err; 1891 } 1892 1893 fi->fh = fh; 1894 if (lo->cache == CACHE_ALWAYS) { 1895 fi->cache_readdir = 1; 1896 } 1897 fuse_reply_open(req, fi); 1898 return; 1899 1900 out_errno: 1901 error = errno; 1902 out_err: 1903 if (d) { 1904 if (d->dp) { 1905 closedir(d->dp); 1906 } else if (fd != -1) { 1907 close(fd); 1908 } 1909 free(d); 1910 } 1911 fuse_reply_err(req, error); 1912 } 1913 1914 static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, 1915 off_t offset, struct fuse_file_info *fi, int plus) 1916 { 1917 struct lo_data *lo = lo_data(req); 1918 struct lo_dirp *d = NULL; 1919 struct lo_inode *dinode; 1920 g_autofree char *buf = NULL; 1921 char *p; 1922 size_t rem = size; 1923 int err = EBADF; 1924 1925 dinode = lo_inode(req, ino); 1926 if (!dinode) { 1927 goto error; 1928 } 1929 1930 d = lo_dirp(req, fi); 1931 if (!d) { 1932 goto error; 1933 } 1934 1935 err = ENOMEM; 1936 buf = g_try_malloc0(size); 1937 if (!buf) { 1938 goto error; 1939 } 1940 p = buf; 1941 1942 if (offset != d->offset) { 1943 seekdir(d->dp, offset); 1944 d->entry = NULL; 1945 d->offset = offset; 1946 } 1947 while (1) { 1948 size_t entsize; 1949 off_t nextoff; 1950 const char *name; 1951 1952 if (!d->entry) { 1953 errno = 0; 1954 d->entry = readdir(d->dp); 1955 if (!d->entry) { 1956 if (errno) { /* Error */ 1957 err = errno; 1958 goto error; 1959 } else { /* End of stream */ 1960 break; 1961 } 1962 } 1963 } 1964 nextoff = d->entry->d_off; 1965 name = d->entry->d_name; 1966 1967 fuse_ino_t entry_ino = 0; 1968 struct fuse_entry_param e = (struct fuse_entry_param){ 1969 .attr.st_ino = d->entry->d_ino, 1970 .attr.st_mode = d->entry->d_type << 12, 1971 }; 1972 1973 /* Hide root's parent directory */ 1974 if (dinode == &lo->root && strcmp(name, "..") == 0) { 1975 e.attr.st_ino = lo->root.key.ino; 1976 e.attr.st_mode = DT_DIR << 12; 1977 } 1978 1979 if (plus) { 1980 if (!is_dot_or_dotdot(name)) { 1981 err = lo_do_lookup(req, ino, name, &e, NULL); 1982 if (err) { 1983 goto error; 1984 } 1985 entry_ino = e.ino; 1986 } 1987 1988 entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); 1989 } else { 1990 entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); 1991 } 1992 if (entsize > rem) { 1993 if (entry_ino != 0) { 1994 lo_forget_one(req, entry_ino, 1); 1995 } 1996 break; 1997 } 1998 1999 p += entsize; 2000 rem -= entsize; 2001 2002 d->entry = NULL; 2003 d->offset = nextoff; 2004 } 2005 2006 err = 0; 2007 error: 2008 lo_dirp_put(&d); 2009 lo_inode_put(lo, &dinode); 2010 2011 /* 2012 * If there's an error, we can only signal it if we haven't stored 2013 * any entries yet - otherwise we'd end up with wrong lookup 2014 * counts for the entries that are already in the buffer. So we 2015 * return what we've collected until that point. 2016 */ 2017 if (err && rem == size) { 2018 fuse_reply_err(req, err); 2019 } else { 2020 fuse_reply_buf(req, buf, size - rem); 2021 } 2022 } 2023 2024 static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, 2025 off_t offset, struct fuse_file_info *fi) 2026 { 2027 lo_do_readdir(req, ino, size, offset, fi, 0); 2028 } 2029 2030 static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, 2031 off_t offset, struct fuse_file_info *fi) 2032 { 2033 lo_do_readdir(req, ino, size, offset, fi, 1); 2034 } 2035 2036 static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, 2037 struct fuse_file_info *fi) 2038 { 2039 struct lo_data *lo = lo_data(req); 2040 struct lo_map_elem *elem; 2041 struct lo_dirp *d; 2042 2043 (void)ino; 2044 2045 pthread_mutex_lock(&lo->mutex); 2046 elem = lo_map_get(&lo->dirp_map, fi->fh); 2047 if (!elem) { 2048 pthread_mutex_unlock(&lo->mutex); 2049 fuse_reply_err(req, EBADF); 2050 return; 2051 } 2052 2053 d = elem->dirp; 2054 lo_map_remove(&lo->dirp_map, fi->fh); 2055 pthread_mutex_unlock(&lo->mutex); 2056 2057 lo_dirp_put(&d); /* paired with lo_opendir() */ 2058 2059 fuse_reply_err(req, 0); 2060 } 2061 2062 static void update_open_flags(int writeback, int allow_direct_io, 2063 struct fuse_file_info *fi) 2064 { 2065 /* 2066 * With writeback cache, kernel may send read requests even 2067 * when userspace opened write-only 2068 */ 2069 if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { 2070 fi->flags &= ~O_ACCMODE; 2071 fi->flags |= O_RDWR; 2072 } 2073 2074 /* 2075 * With writeback cache, O_APPEND is handled by the kernel. 2076 * This breaks atomicity (since the file may change in the 2077 * underlying filesystem, so that the kernel's idea of the 2078 * end of the file isn't accurate anymore). In this example, 2079 * we just accept that. A more rigorous filesystem may want 2080 * to return an error here 2081 */ 2082 if (writeback && (fi->flags & O_APPEND)) { 2083 fi->flags &= ~O_APPEND; 2084 } 2085 2086 /* 2087 * O_DIRECT in guest should not necessarily mean bypassing page 2088 * cache on host as well. Therefore, we discard it by default 2089 * ('-o no_allow_direct_io'). If somebody needs that behavior, 2090 * the '-o allow_direct_io' option should be set. 2091 */ 2092 if (!allow_direct_io) { 2093 fi->flags &= ~O_DIRECT; 2094 } 2095 } 2096 2097 /* 2098 * Open a regular file, set up an fd mapping, and fill out the struct 2099 * fuse_file_info for it. If existing_fd is not negative, use that fd instead 2100 * opening a new one. Takes ownership of existing_fd. 2101 * 2102 * Returns 0 on success or a positive errno. 2103 */ 2104 static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, 2105 int existing_fd, struct fuse_file_info *fi) 2106 { 2107 ssize_t fh; 2108 int fd = existing_fd; 2109 int err; 2110 bool cap_fsetid_dropped = false; 2111 bool kill_suidgid = lo->killpriv_v2 && fi->kill_priv; 2112 2113 update_open_flags(lo->writeback, lo->allow_direct_io, fi); 2114 2115 if (fd < 0) { 2116 if (kill_suidgid) { 2117 err = drop_effective_cap("FSETID", &cap_fsetid_dropped); 2118 if (err) { 2119 return err; 2120 } 2121 } 2122 2123 fd = lo_inode_open(lo, inode, fi->flags); 2124 2125 if (cap_fsetid_dropped) { 2126 if (gain_effective_cap("FSETID")) { 2127 fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); 2128 } 2129 } 2130 if (fd < 0) { 2131 return -fd; 2132 } 2133 if (fi->flags & (O_TRUNC)) { 2134 int err = drop_security_capability(lo, fd); 2135 if (err) { 2136 close(fd); 2137 return err; 2138 } 2139 } 2140 } 2141 2142 pthread_mutex_lock(&lo->mutex); 2143 fh = lo_add_fd_mapping(lo, fd); 2144 pthread_mutex_unlock(&lo->mutex); 2145 if (fh == -1) { 2146 close(fd); 2147 return ENOMEM; 2148 } 2149 2150 fi->fh = fh; 2151 if (lo->cache == CACHE_NONE) { 2152 fi->direct_io = 1; 2153 } else if (lo->cache == CACHE_ALWAYS) { 2154 fi->keep_cache = 1; 2155 } 2156 return 0; 2157 } 2158 2159 static int do_create_nosecctx(fuse_req_t req, struct lo_inode *parent_inode, 2160 const char *name, mode_t mode, 2161 struct fuse_file_info *fi, int *open_fd, 2162 bool tmpfile) 2163 { 2164 int err, fd; 2165 struct lo_cred old = {}; 2166 struct lo_data *lo = lo_data(req); 2167 int flags; 2168 2169 if (tmpfile) { 2170 flags = fi->flags | O_TMPFILE; 2171 /* 2172 * Don't use O_EXCL as we want to link file later. Also reset O_CREAT 2173 * otherwise openat() returns -EINVAL. 2174 */ 2175 flags &= ~(O_CREAT | O_EXCL); 2176 2177 /* O_TMPFILE needs either O_RDWR or O_WRONLY */ 2178 if ((flags & O_ACCMODE) == O_RDONLY) { 2179 flags |= O_RDWR; 2180 } 2181 } else { 2182 flags = fi->flags | O_CREAT | O_EXCL; 2183 } 2184 2185 err = lo_change_cred(req, &old, lo->change_umask); 2186 if (err) { 2187 return err; 2188 } 2189 2190 /* Try to create a new file but don't open existing files */ 2191 fd = openat(parent_inode->fd, name, flags, mode); 2192 err = fd == -1 ? errno : 0; 2193 lo_restore_cred(&old, lo->change_umask); 2194 if (!err) { 2195 *open_fd = fd; 2196 } 2197 return err; 2198 } 2199 2200 static int do_create_secctx_fscreate(fuse_req_t req, 2201 struct lo_inode *parent_inode, 2202 const char *name, mode_t mode, 2203 struct fuse_file_info *fi, int *open_fd) 2204 { 2205 int err = 0, fd = -1, fscreate_fd = -1; 2206 struct lo_data *lo = lo_data(req); 2207 2208 err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, 2209 &fscreate_fd); 2210 if (err) { 2211 return err; 2212 } 2213 2214 err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); 2215 2216 close_reset_proc_fscreate(fscreate_fd); 2217 if (!err) { 2218 *open_fd = fd; 2219 } 2220 return err; 2221 } 2222 2223 static int do_create_secctx_tmpfile(fuse_req_t req, 2224 struct lo_inode *parent_inode, 2225 const char *name, mode_t mode, 2226 struct fuse_file_info *fi, 2227 const char *secctx_name, int *open_fd) 2228 { 2229 int err, fd = -1; 2230 struct lo_data *lo = lo_data(req); 2231 char procname[64]; 2232 2233 err = do_create_nosecctx(req, parent_inode, ".", mode, fi, &fd, true); 2234 if (err) { 2235 return err; 2236 } 2237 2238 err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); 2239 if (err) { 2240 err = errno; 2241 goto out; 2242 } 2243 2244 /* Security context set on file. Link it in place */ 2245 sprintf(procname, "%d", fd); 2246 FCHDIR_NOFAIL(lo->proc_self_fd); 2247 err = linkat(AT_FDCWD, procname, parent_inode->fd, name, 2248 AT_SYMLINK_FOLLOW); 2249 err = err == -1 ? errno : 0; 2250 FCHDIR_NOFAIL(lo->root.fd); 2251 2252 out: 2253 if (!err) { 2254 *open_fd = fd; 2255 } else if (fd != -1) { 2256 close(fd); 2257 } 2258 return err; 2259 } 2260 2261 static int do_create_secctx_noatomic(fuse_req_t req, 2262 struct lo_inode *parent_inode, 2263 const char *name, mode_t mode, 2264 struct fuse_file_info *fi, 2265 const char *secctx_name, int *open_fd) 2266 { 2267 int err = 0, fd = -1; 2268 2269 err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); 2270 if (err) { 2271 goto out; 2272 } 2273 2274 /* Set security context. This is not atomic w.r.t file creation */ 2275 err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); 2276 err = err == -1 ? errno : 0; 2277 out: 2278 if (!err) { 2279 *open_fd = fd; 2280 } else { 2281 if (fd != -1) { 2282 close(fd); 2283 unlinkat(parent_inode->fd, name, 0); 2284 } 2285 } 2286 return err; 2287 } 2288 2289 static int do_lo_create(fuse_req_t req, struct lo_inode *parent_inode, 2290 const char *name, mode_t mode, 2291 struct fuse_file_info *fi, int *open_fd) 2292 { 2293 struct lo_data *lo = lo_data(req); 2294 char *mapped_name = NULL; 2295 int err; 2296 const char *ctxname = req->secctx.name; 2297 bool secctx_enabled = req->secctx.ctxlen; 2298 2299 if (secctx_enabled && lo->xattrmap) { 2300 err = xattr_map_client(lo, req->secctx.name, &mapped_name); 2301 if (err < 0) { 2302 return -err; 2303 } 2304 2305 ctxname = mapped_name; 2306 } 2307 2308 if (secctx_enabled) { 2309 /* 2310 * If security.selinux has not been remapped and selinux is enabled, 2311 * use fscreate to set context before file creation. If not, use 2312 * tmpfile method for regular files. Otherwise fallback to 2313 * non-atomic method of file creation and xattr setting. 2314 */ 2315 if (!mapped_name && lo->use_fscreate) { 2316 err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi, 2317 open_fd); 2318 goto out; 2319 } else if (S_ISREG(mode)) { 2320 err = do_create_secctx_tmpfile(req, parent_inode, name, mode, fi, 2321 ctxname, open_fd); 2322 /* 2323 * If filesystem does not support O_TMPFILE, fallback to non-atomic 2324 * method. 2325 */ 2326 if (!err || err != EOPNOTSUPP) { 2327 goto out; 2328 } 2329 } 2330 2331 err = do_create_secctx_noatomic(req, parent_inode, name, mode, fi, 2332 ctxname, open_fd); 2333 } else { 2334 err = do_create_nosecctx(req, parent_inode, name, mode, fi, open_fd, 2335 false); 2336 } 2337 2338 out: 2339 g_free(mapped_name); 2340 return err; 2341 } 2342 2343 static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, 2344 mode_t mode, struct fuse_file_info *fi) 2345 { 2346 int fd = -1; 2347 struct lo_data *lo = lo_data(req); 2348 struct lo_inode *parent_inode; 2349 struct lo_inode *inode = NULL; 2350 struct fuse_entry_param e; 2351 int err; 2352 2353 fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)" 2354 " kill_priv=%d\n", parent, name, fi->kill_priv); 2355 2356 if (!is_safe_path_component(name)) { 2357 fuse_reply_err(req, EINVAL); 2358 return; 2359 } 2360 2361 parent_inode = lo_inode(req, parent); 2362 if (!parent_inode) { 2363 fuse_reply_err(req, EBADF); 2364 return; 2365 } 2366 2367 update_open_flags(lo->writeback, lo->allow_direct_io, fi); 2368 2369 err = do_lo_create(req, parent_inode, name, mode, fi, &fd); 2370 2371 /* Ignore the error if file exists and O_EXCL was not given */ 2372 if (err && (err != EEXIST || (fi->flags & O_EXCL))) { 2373 goto out; 2374 } 2375 2376 err = lo_do_lookup(req, parent, name, &e, &inode); 2377 if (err) { 2378 goto out; 2379 } 2380 2381 err = lo_do_open(lo, inode, fd, fi); 2382 fd = -1; /* lo_do_open() takes ownership of fd */ 2383 if (err) { 2384 /* Undo lo_do_lookup() nlookup ref */ 2385 unref_inode_lolocked(lo, inode, 1); 2386 } 2387 2388 out: 2389 lo_inode_put(lo, &inode); 2390 lo_inode_put(lo, &parent_inode); 2391 2392 if (err) { 2393 if (fd >= 0) { 2394 close(fd); 2395 } 2396 2397 fuse_reply_err(req, err); 2398 } else { 2399 fuse_reply_create(req, &e, fi); 2400 } 2401 } 2402 2403 /* Should be called with inode->plock_mutex held */ 2404 static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, 2405 struct lo_inode *inode, 2406 uint64_t lock_owner, 2407 pid_t pid, int *err) 2408 { 2409 struct lo_inode_plock *plock; 2410 int fd; 2411 2412 plock = 2413 g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); 2414 2415 if (plock) { 2416 return plock; 2417 } 2418 2419 plock = malloc(sizeof(struct lo_inode_plock)); 2420 if (!plock) { 2421 *err = ENOMEM; 2422 return NULL; 2423 } 2424 2425 /* Open another instance of file which can be used for ofd locks. */ 2426 /* TODO: What if file is not writable? */ 2427 fd = lo_inode_open(lo, inode, O_RDWR); 2428 if (fd < 0) { 2429 *err = -fd; 2430 free(plock); 2431 return NULL; 2432 } 2433 2434 plock->lock_owner = lock_owner; 2435 plock->fd = fd; 2436 g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), 2437 plock); 2438 return plock; 2439 } 2440 2441 static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 2442 struct flock *lock) 2443 { 2444 struct lo_data *lo = lo_data(req); 2445 struct lo_inode *inode; 2446 struct lo_inode_plock *plock; 2447 int ret, saverr = 0; 2448 2449 fuse_log(FUSE_LOG_DEBUG, 2450 "lo_getlk(ino=%" PRIu64 ", flags=%d)" 2451 " owner=0x%" PRIx64 ", l_type=%d l_start=0x%" PRIx64 2452 " l_len=0x%" PRIx64 "\n", 2453 ino, fi->flags, fi->lock_owner, lock->l_type, 2454 (uint64_t)lock->l_start, (uint64_t)lock->l_len); 2455 2456 if (!lo->posix_lock) { 2457 fuse_reply_err(req, ENOSYS); 2458 return; 2459 } 2460 2461 inode = lo_inode(req, ino); 2462 if (!inode) { 2463 fuse_reply_err(req, EBADF); 2464 return; 2465 } 2466 2467 pthread_mutex_lock(&inode->plock_mutex); 2468 plock = 2469 lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); 2470 if (!plock) { 2471 saverr = ret; 2472 goto out; 2473 } 2474 2475 ret = fcntl(plock->fd, F_OFD_GETLK, lock); 2476 if (ret == -1) { 2477 saverr = errno; 2478 } 2479 2480 out: 2481 pthread_mutex_unlock(&inode->plock_mutex); 2482 lo_inode_put(lo, &inode); 2483 2484 if (saverr) { 2485 fuse_reply_err(req, saverr); 2486 } else { 2487 fuse_reply_lock(req, lock); 2488 } 2489 } 2490 2491 static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 2492 struct flock *lock, int sleep) 2493 { 2494 struct lo_data *lo = lo_data(req); 2495 struct lo_inode *inode; 2496 struct lo_inode_plock *plock; 2497 int ret, saverr = 0; 2498 2499 fuse_log(FUSE_LOG_DEBUG, 2500 "lo_setlk(ino=%" PRIu64 ", flags=%d)" 2501 " cmd=%d pid=%d owner=0x%" PRIx64 " sleep=%d l_whence=%d" 2502 " l_start=0x%" PRIx64 " l_len=0x%" PRIx64 "\n", 2503 ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, 2504 lock->l_whence, (uint64_t)lock->l_start, (uint64_t)lock->l_len); 2505 2506 if (!lo->posix_lock) { 2507 fuse_reply_err(req, ENOSYS); 2508 return; 2509 } 2510 2511 if (sleep) { 2512 fuse_reply_err(req, EOPNOTSUPP); 2513 return; 2514 } 2515 2516 inode = lo_inode(req, ino); 2517 if (!inode) { 2518 fuse_reply_err(req, EBADF); 2519 return; 2520 } 2521 2522 pthread_mutex_lock(&inode->plock_mutex); 2523 plock = 2524 lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); 2525 2526 if (!plock) { 2527 saverr = ret; 2528 goto out; 2529 } 2530 2531 /* TODO: Is it alright to modify flock? */ 2532 lock->l_pid = 0; 2533 ret = fcntl(plock->fd, F_OFD_SETLK, lock); 2534 if (ret == -1) { 2535 saverr = errno; 2536 } 2537 2538 out: 2539 pthread_mutex_unlock(&inode->plock_mutex); 2540 lo_inode_put(lo, &inode); 2541 2542 fuse_reply_err(req, saverr); 2543 } 2544 2545 static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, 2546 struct fuse_file_info *fi) 2547 { 2548 int res; 2549 struct lo_dirp *d; 2550 int fd; 2551 2552 (void)ino; 2553 2554 d = lo_dirp(req, fi); 2555 if (!d) { 2556 fuse_reply_err(req, EBADF); 2557 return; 2558 } 2559 2560 fd = dirfd(d->dp); 2561 if (datasync) { 2562 res = fdatasync(fd); 2563 } else { 2564 res = fsync(fd); 2565 } 2566 2567 lo_dirp_put(&d); 2568 2569 fuse_reply_err(req, res == -1 ? errno : 0); 2570 } 2571 2572 static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) 2573 { 2574 struct lo_data *lo = lo_data(req); 2575 struct lo_inode *inode = lo_inode(req, ino); 2576 int err; 2577 2578 fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d, kill_priv=%d)" 2579 "\n", ino, fi->flags, fi->kill_priv); 2580 2581 if (!inode) { 2582 fuse_reply_err(req, EBADF); 2583 return; 2584 } 2585 2586 err = lo_do_open(lo, inode, -1, fi); 2587 lo_inode_put(lo, &inode); 2588 if (err) { 2589 fuse_reply_err(req, err); 2590 } else { 2591 fuse_reply_open(req, fi); 2592 } 2593 } 2594 2595 static void lo_release(fuse_req_t req, fuse_ino_t ino, 2596 struct fuse_file_info *fi) 2597 { 2598 struct lo_data *lo = lo_data(req); 2599 struct lo_map_elem *elem; 2600 int fd = -1; 2601 2602 (void)ino; 2603 2604 pthread_mutex_lock(&lo->mutex); 2605 elem = lo_map_get(&lo->fd_map, fi->fh); 2606 if (elem) { 2607 fd = elem->fd; 2608 elem = NULL; 2609 lo_map_remove(&lo->fd_map, fi->fh); 2610 } 2611 pthread_mutex_unlock(&lo->mutex); 2612 2613 close(fd); 2614 fuse_reply_err(req, 0); 2615 } 2616 2617 static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) 2618 { 2619 int res; 2620 (void)ino; 2621 struct lo_inode *inode; 2622 struct lo_data *lo = lo_data(req); 2623 2624 inode = lo_inode(req, ino); 2625 if (!inode) { 2626 fuse_reply_err(req, EBADF); 2627 return; 2628 } 2629 2630 if (!S_ISREG(inode->filetype)) { 2631 lo_inode_put(lo, &inode); 2632 fuse_reply_err(req, EBADF); 2633 return; 2634 } 2635 2636 /* An fd is going away. Cleanup associated posix locks */ 2637 if (lo->posix_lock) { 2638 pthread_mutex_lock(&inode->plock_mutex); 2639 g_hash_table_remove(inode->posix_locks, 2640 GUINT_TO_POINTER(fi->lock_owner)); 2641 pthread_mutex_unlock(&inode->plock_mutex); 2642 } 2643 res = close(dup(lo_fi_fd(req, fi))); 2644 lo_inode_put(lo, &inode); 2645 fuse_reply_err(req, res == -1 ? errno : 0); 2646 } 2647 2648 static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, 2649 struct fuse_file_info *fi) 2650 { 2651 struct lo_inode *inode = lo_inode(req, ino); 2652 struct lo_data *lo = lo_data(req); 2653 int res; 2654 int fd; 2655 2656 fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, 2657 (void *)fi); 2658 2659 if (!inode) { 2660 fuse_reply_err(req, EBADF); 2661 return; 2662 } 2663 2664 if (!fi) { 2665 fd = lo_inode_open(lo, inode, O_RDWR); 2666 if (fd < 0) { 2667 res = -fd; 2668 goto out; 2669 } 2670 } else { 2671 fd = lo_fi_fd(req, fi); 2672 } 2673 2674 if (datasync) { 2675 res = fdatasync(fd) == -1 ? errno : 0; 2676 } else { 2677 res = fsync(fd) == -1 ? errno : 0; 2678 } 2679 if (!fi) { 2680 close(fd); 2681 } 2682 out: 2683 lo_inode_put(lo, &inode); 2684 fuse_reply_err(req, res); 2685 } 2686 2687 static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, 2688 struct fuse_file_info *fi) 2689 { 2690 struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); 2691 2692 fuse_log(FUSE_LOG_DEBUG, 2693 "lo_read(ino=%" PRIu64 ", size=%zd, " 2694 "off=%lu)\n", 2695 ino, size, (unsigned long)offset); 2696 2697 buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; 2698 buf.buf[0].fd = lo_fi_fd(req, fi); 2699 buf.buf[0].pos = offset; 2700 2701 fuse_reply_data(req, &buf); 2702 } 2703 2704 static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, 2705 struct fuse_bufvec *in_buf, off_t off, 2706 struct fuse_file_info *fi) 2707 { 2708 (void)ino; 2709 ssize_t res; 2710 struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); 2711 bool cap_fsetid_dropped = false; 2712 2713 out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; 2714 out_buf.buf[0].fd = lo_fi_fd(req, fi); 2715 out_buf.buf[0].pos = off; 2716 2717 fuse_log(FUSE_LOG_DEBUG, 2718 "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu kill_priv=%d)\n", 2719 ino, out_buf.buf[0].size, (unsigned long)off, fi->kill_priv); 2720 2721 res = drop_security_capability(lo_data(req), out_buf.buf[0].fd); 2722 if (res) { 2723 fuse_reply_err(req, res); 2724 return; 2725 } 2726 2727 /* 2728 * If kill_priv is set, drop CAP_FSETID which should lead to kernel 2729 * clearing setuid/setgid on file. Note, for WRITE, we need to do 2730 * this even if killpriv_v2 is not enabled. fuse direct write path 2731 * relies on this. 2732 */ 2733 if (fi->kill_priv) { 2734 res = drop_effective_cap("FSETID", &cap_fsetid_dropped); 2735 if (res != 0) { 2736 fuse_reply_err(req, res); 2737 return; 2738 } 2739 } 2740 2741 res = fuse_buf_copy(&out_buf, in_buf); 2742 if (res < 0) { 2743 fuse_reply_err(req, -res); 2744 } else { 2745 fuse_reply_write(req, (size_t)res); 2746 } 2747 2748 if (cap_fsetid_dropped) { 2749 res = gain_effective_cap("FSETID"); 2750 if (res) { 2751 fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); 2752 } 2753 } 2754 } 2755 2756 static void lo_statfs(fuse_req_t req, fuse_ino_t ino) 2757 { 2758 int res; 2759 struct statvfs stbuf; 2760 2761 res = fstatvfs(lo_fd(req, ino), &stbuf); 2762 if (res == -1) { 2763 fuse_reply_err(req, errno); 2764 } else { 2765 fuse_reply_statfs(req, &stbuf); 2766 } 2767 } 2768 2769 static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, 2770 off_t length, struct fuse_file_info *fi) 2771 { 2772 int err = EOPNOTSUPP; 2773 (void)ino; 2774 2775 #ifdef CONFIG_FALLOCATE 2776 err = fallocate(lo_fi_fd(req, fi), mode, offset, length); 2777 if (err < 0) { 2778 err = errno; 2779 } 2780 2781 #elif defined(CONFIG_POSIX_FALLOCATE) 2782 if (mode) { 2783 fuse_reply_err(req, EOPNOTSUPP); 2784 return; 2785 } 2786 2787 err = posix_fallocate(lo_fi_fd(req, fi), offset, length); 2788 #endif 2789 2790 fuse_reply_err(req, err); 2791 } 2792 2793 static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, 2794 int op) 2795 { 2796 int res; 2797 (void)ino; 2798 2799 if (!(op & LOCK_NB)) { 2800 /* 2801 * Blocking flock can deadlock as there is only one thread 2802 * serving the queue. 2803 */ 2804 fuse_reply_err(req, EOPNOTSUPP); 2805 return; 2806 } 2807 2808 res = flock(lo_fi_fd(req, fi), op); 2809 2810 fuse_reply_err(req, res == -1 ? errno : 0); 2811 } 2812 2813 /* types */ 2814 /* 2815 * Exit; process attribute unmodified if matched. 2816 * An empty key applies to all. 2817 */ 2818 #define XATTR_MAP_FLAG_OK (1 << 0) 2819 /* 2820 * The attribute is unwanted; 2821 * EPERM on write, hidden on read. 2822 */ 2823 #define XATTR_MAP_FLAG_BAD (1 << 1) 2824 /* 2825 * For attr that start with 'key' prepend 'prepend' 2826 * 'key' may be empty to prepend for all attrs 2827 * key is defined from set/remove point of view. 2828 * Automatically reversed on read 2829 */ 2830 #define XATTR_MAP_FLAG_PREFIX (1 << 2) 2831 /* 2832 * The attribute is unsupported; 2833 * ENOTSUP on write, hidden on read. 2834 */ 2835 #define XATTR_MAP_FLAG_UNSUPPORTED (1 << 3) 2836 2837 /* scopes */ 2838 /* Apply rule to get/set/remove */ 2839 #define XATTR_MAP_FLAG_CLIENT (1 << 16) 2840 /* Apply rule to list */ 2841 #define XATTR_MAP_FLAG_SERVER (1 << 17) 2842 /* Apply rule to all */ 2843 #define XATTR_MAP_FLAG_ALL (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT) 2844 2845 static void add_xattrmap_entry(struct lo_data *lo, 2846 const XattrMapEntry *new_entry) 2847 { 2848 XattrMapEntry *res = g_realloc_n(lo->xattr_map_list, 2849 lo->xattr_map_nentries + 1, 2850 sizeof(XattrMapEntry)); 2851 res[lo->xattr_map_nentries++] = *new_entry; 2852 2853 lo->xattr_map_list = res; 2854 } 2855 2856 static void free_xattrmap(struct lo_data *lo) 2857 { 2858 XattrMapEntry *map = lo->xattr_map_list; 2859 size_t i; 2860 2861 if (!map) { 2862 return; 2863 } 2864 2865 for (i = 0; i < lo->xattr_map_nentries; i++) { 2866 g_free(map[i].key); 2867 g_free(map[i].prepend); 2868 }; 2869 2870 g_free(map); 2871 lo->xattr_map_list = NULL; 2872 lo->xattr_map_nentries = -1; 2873 } 2874 2875 /* 2876 * Handle the 'map' type, which is sugar for a set of commands 2877 * for the common case of prefixing a subset or everything, 2878 * and allowing anything not prefixed through. 2879 * It must be the last entry in the stream, although there 2880 * can be other entries before it. 2881 * The form is: 2882 * :map:key:prefix: 2883 * 2884 * key maybe empty in which case all entries are prefixed. 2885 */ 2886 static void parse_xattrmap_map(struct lo_data *lo, 2887 const char *rule, char sep) 2888 { 2889 const char *tmp; 2890 char *key; 2891 char *prefix; 2892 XattrMapEntry tmp_entry; 2893 2894 if (*rule != sep) { 2895 fuse_log(FUSE_LOG_ERR, 2896 "%s: Expecting '%c' after 'map' keyword, found '%c'\n", 2897 __func__, sep, *rule); 2898 exit(1); 2899 } 2900 2901 rule++; 2902 2903 /* At start of 'key' field */ 2904 tmp = strchr(rule, sep); 2905 if (!tmp) { 2906 fuse_log(FUSE_LOG_ERR, 2907 "%s: Missing '%c' at end of key field in map rule\n", 2908 __func__, sep); 2909 exit(1); 2910 } 2911 2912 key = g_strndup(rule, tmp - rule); 2913 rule = tmp + 1; 2914 2915 /* At start of prefix field */ 2916 tmp = strchr(rule, sep); 2917 if (!tmp) { 2918 fuse_log(FUSE_LOG_ERR, 2919 "%s: Missing '%c' at end of prefix field in map rule\n", 2920 __func__, sep); 2921 exit(1); 2922 } 2923 2924 prefix = g_strndup(rule, tmp - rule); 2925 rule = tmp + 1; 2926 2927 /* 2928 * This should be the end of the string, we don't allow 2929 * any more commands after 'map'. 2930 */ 2931 if (*rule) { 2932 fuse_log(FUSE_LOG_ERR, 2933 "%s: Expecting end of command after map, found '%c'\n", 2934 __func__, *rule); 2935 exit(1); 2936 } 2937 2938 /* 1st: Prefix matches/everything */ 2939 tmp_entry.flags = XATTR_MAP_FLAG_PREFIX | XATTR_MAP_FLAG_ALL; 2940 tmp_entry.key = g_strdup(key); 2941 tmp_entry.prepend = g_strdup(prefix); 2942 add_xattrmap_entry(lo, &tmp_entry); 2943 2944 if (!*key) { 2945 /* Prefix all case */ 2946 2947 /* 2nd: Hide any non-prefixed entries on the host */ 2948 tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_ALL; 2949 tmp_entry.key = g_strdup(""); 2950 tmp_entry.prepend = g_strdup(""); 2951 add_xattrmap_entry(lo, &tmp_entry); 2952 } else { 2953 /* Prefix matching case */ 2954 2955 /* 2nd: Hide non-prefixed but matching entries on the host */ 2956 tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_SERVER; 2957 tmp_entry.key = g_strdup(""); /* Not used */ 2958 tmp_entry.prepend = g_strdup(key); 2959 add_xattrmap_entry(lo, &tmp_entry); 2960 2961 /* 3rd: Stop the client accessing prefixed attributes directly */ 2962 tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_CLIENT; 2963 tmp_entry.key = g_strdup(prefix); 2964 tmp_entry.prepend = g_strdup(""); /* Not used */ 2965 add_xattrmap_entry(lo, &tmp_entry); 2966 2967 /* 4th: Everything else is OK */ 2968 tmp_entry.flags = XATTR_MAP_FLAG_OK | XATTR_MAP_FLAG_ALL; 2969 tmp_entry.key = g_strdup(""); 2970 tmp_entry.prepend = g_strdup(""); 2971 add_xattrmap_entry(lo, &tmp_entry); 2972 } 2973 2974 g_free(key); 2975 g_free(prefix); 2976 } 2977 2978 static void parse_xattrmap(struct lo_data *lo) 2979 { 2980 const char *map = lo->xattrmap; 2981 const char *tmp; 2982 int ret; 2983 2984 lo->xattr_map_nentries = 0; 2985 while (*map) { 2986 XattrMapEntry tmp_entry; 2987 char sep; 2988 2989 if (isspace(*map)) { 2990 map++; 2991 continue; 2992 } 2993 /* The separator is the first non-space of the rule */ 2994 sep = *map++; 2995 if (!sep) { 2996 break; 2997 } 2998 2999 tmp_entry.flags = 0; 3000 /* Start of 'type' */ 3001 if (strstart(map, "prefix", &map)) { 3002 tmp_entry.flags |= XATTR_MAP_FLAG_PREFIX; 3003 } else if (strstart(map, "ok", &map)) { 3004 tmp_entry.flags |= XATTR_MAP_FLAG_OK; 3005 } else if (strstart(map, "bad", &map)) { 3006 tmp_entry.flags |= XATTR_MAP_FLAG_BAD; 3007 } else if (strstart(map, "unsupported", &map)) { 3008 tmp_entry.flags |= XATTR_MAP_FLAG_UNSUPPORTED; 3009 } else if (strstart(map, "map", &map)) { 3010 /* 3011 * map is sugar that adds a number of rules, and must be 3012 * the last entry. 3013 */ 3014 parse_xattrmap_map(lo, map, sep); 3015 break; 3016 } else { 3017 fuse_log(FUSE_LOG_ERR, 3018 "%s: Unexpected type;" 3019 "Expecting 'prefix', 'ok', 'bad', 'unsupported' or 'map'" 3020 " in rule %zu\n", __func__, lo->xattr_map_nentries); 3021 exit(1); 3022 } 3023 3024 if (*map++ != sep) { 3025 fuse_log(FUSE_LOG_ERR, 3026 "%s: Missing '%c' at end of type field of rule %zu\n", 3027 __func__, sep, lo->xattr_map_nentries); 3028 exit(1); 3029 } 3030 3031 /* Start of 'scope' */ 3032 if (strstart(map, "client", &map)) { 3033 tmp_entry.flags |= XATTR_MAP_FLAG_CLIENT; 3034 } else if (strstart(map, "server", &map)) { 3035 tmp_entry.flags |= XATTR_MAP_FLAG_SERVER; 3036 } else if (strstart(map, "all", &map)) { 3037 tmp_entry.flags |= XATTR_MAP_FLAG_ALL; 3038 } else { 3039 fuse_log(FUSE_LOG_ERR, 3040 "%s: Unexpected scope;" 3041 " Expecting 'client', 'server', or 'all', in rule %zu\n", 3042 __func__, lo->xattr_map_nentries); 3043 exit(1); 3044 } 3045 3046 if (*map++ != sep) { 3047 fuse_log(FUSE_LOG_ERR, 3048 "%s: Expecting '%c' found '%c'" 3049 " after scope in rule %zu\n", 3050 __func__, sep, *map, lo->xattr_map_nentries); 3051 exit(1); 3052 } 3053 3054 /* At start of 'key' field */ 3055 tmp = strchr(map, sep); 3056 if (!tmp) { 3057 fuse_log(FUSE_LOG_ERR, 3058 "%s: Missing '%c' at end of key field of rule %zu", 3059 __func__, sep, lo->xattr_map_nentries); 3060 exit(1); 3061 } 3062 tmp_entry.key = g_strndup(map, tmp - map); 3063 map = tmp + 1; 3064 3065 /* At start of 'prepend' field */ 3066 tmp = strchr(map, sep); 3067 if (!tmp) { 3068 fuse_log(FUSE_LOG_ERR, 3069 "%s: Missing '%c' at end of prepend field of rule %zu", 3070 __func__, sep, lo->xattr_map_nentries); 3071 exit(1); 3072 } 3073 tmp_entry.prepend = g_strndup(map, tmp - map); 3074 map = tmp + 1; 3075 3076 add_xattrmap_entry(lo, &tmp_entry); 3077 /* End of rule - go around again for another rule */ 3078 } 3079 3080 if (!lo->xattr_map_nentries) { 3081 fuse_log(FUSE_LOG_ERR, "Empty xattr map\n"); 3082 exit(1); 3083 } 3084 3085 ret = xattr_map_client(lo, "security.capability", 3086 &lo->xattr_security_capability); 3087 if (ret) { 3088 fuse_log(FUSE_LOG_ERR, "Failed to map security.capability: %s\n", 3089 strerror(ret)); 3090 exit(1); 3091 } 3092 if (!lo->xattr_security_capability || 3093 !strcmp(lo->xattr_security_capability, "security.capability")) { 3094 /* 1-1 mapping, don't need to do anything */ 3095 free(lo->xattr_security_capability); 3096 lo->xattr_security_capability = NULL; 3097 } 3098 } 3099 3100 /* 3101 * For use with getxattr/setxattr/removexattr, where the client 3102 * gives us a name and we may need to choose a different one. 3103 * Allocates a buffer for the result placing it in *out_name. 3104 * If there's no change then *out_name is not set. 3105 * Returns 0 on success 3106 * Can return -EPERM to indicate we block a given attribute 3107 * (in which case out_name is not allocated) 3108 * Can return -ENOMEM to indicate out_name couldn't be allocated. 3109 */ 3110 static int xattr_map_client(const struct lo_data *lo, const char *client_name, 3111 char **out_name) 3112 { 3113 size_t i; 3114 for (i = 0; i < lo->xattr_map_nentries; i++) { 3115 const XattrMapEntry *cur_entry = lo->xattr_map_list + i; 3116 3117 if ((cur_entry->flags & XATTR_MAP_FLAG_CLIENT) && 3118 (strstart(client_name, cur_entry->key, NULL))) { 3119 if (cur_entry->flags & XATTR_MAP_FLAG_BAD) { 3120 return -EPERM; 3121 } 3122 if (cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) { 3123 return -ENOTSUP; 3124 } 3125 if (cur_entry->flags & XATTR_MAP_FLAG_OK) { 3126 /* Unmodified name */ 3127 return 0; 3128 } 3129 if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { 3130 *out_name = g_try_malloc(strlen(client_name) + 3131 strlen(cur_entry->prepend) + 1); 3132 if (!*out_name) { 3133 return -ENOMEM; 3134 } 3135 sprintf(*out_name, "%s%s", cur_entry->prepend, client_name); 3136 return 0; 3137 } 3138 } 3139 } 3140 3141 return -EPERM; 3142 } 3143 3144 /* 3145 * For use with listxattr where the server fs gives us a name and we may need 3146 * to sanitize this for the client. 3147 * Returns a pointer to the result in *out_name 3148 * This is always the original string or the current string with some prefix 3149 * removed; no reallocation is done. 3150 * Returns 0 on success 3151 * Can return -ENODATA to indicate the name should be dropped from the list. 3152 */ 3153 static int xattr_map_server(const struct lo_data *lo, const char *server_name, 3154 const char **out_name) 3155 { 3156 size_t i; 3157 const char *end; 3158 3159 for (i = 0; i < lo->xattr_map_nentries; i++) { 3160 const XattrMapEntry *cur_entry = lo->xattr_map_list + i; 3161 3162 if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) && 3163 (strstart(server_name, cur_entry->prepend, &end))) { 3164 if (cur_entry->flags & XATTR_MAP_FLAG_BAD || 3165 cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) { 3166 return -ENODATA; 3167 } 3168 if (cur_entry->flags & XATTR_MAP_FLAG_OK) { 3169 *out_name = server_name; 3170 return 0; 3171 } 3172 if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { 3173 /* Remove prefix */ 3174 *out_name = end; 3175 return 0; 3176 } 3177 } 3178 } 3179 3180 return -ENODATA; 3181 } 3182 3183 static bool block_xattr(struct lo_data *lo, const char *name) 3184 { 3185 /* 3186 * If user explicitly enabled posix_acl or did not provide any option, 3187 * do not block acl. Otherwise block system.posix_acl_access and 3188 * system.posix_acl_default xattrs. 3189 */ 3190 if (lo->user_posix_acl) { 3191 return false; 3192 } 3193 if (!strcmp(name, "system.posix_acl_access") || 3194 !strcmp(name, "system.posix_acl_default")) 3195 return true; 3196 3197 return false; 3198 } 3199 3200 /* 3201 * Returns number of bytes in xattr_list after filtering on success. This 3202 * could be zero as well if nothing is left after filtering. 3203 * 3204 * Returns negative error code on failure. 3205 * xattr_list is modified in place. 3206 */ 3207 static int remove_blocked_xattrs(struct lo_data *lo, char *xattr_list, 3208 unsigned in_size) 3209 { 3210 size_t out_index, in_index; 3211 3212 /* 3213 * As of now we only filter out acl xattrs. If acls are enabled or 3214 * they have not been explicitly disabled, there is nothing to 3215 * filter. 3216 */ 3217 if (lo->user_posix_acl) { 3218 return in_size; 3219 } 3220 3221 out_index = 0; 3222 in_index = 0; 3223 while (in_index < in_size) { 3224 char *in_ptr = xattr_list + in_index; 3225 3226 /* Length of current attribute name */ 3227 size_t in_len = strlen(xattr_list + in_index) + 1; 3228 3229 if (!block_xattr(lo, in_ptr)) { 3230 if (in_index != out_index) { 3231 memmove(xattr_list + out_index, xattr_list + in_index, in_len); 3232 } 3233 out_index += in_len; 3234 } 3235 in_index += in_len; 3236 } 3237 return out_index; 3238 } 3239 3240 static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, 3241 size_t size) 3242 { 3243 struct lo_data *lo = lo_data(req); 3244 g_autofree char *value = NULL; 3245 char procname[64]; 3246 const char *name; 3247 char *mapped_name; 3248 struct lo_inode *inode; 3249 ssize_t ret; 3250 int saverr; 3251 int fd = -1; 3252 3253 if (block_xattr(lo, in_name)) { 3254 fuse_reply_err(req, EOPNOTSUPP); 3255 return; 3256 } 3257 3258 mapped_name = NULL; 3259 name = in_name; 3260 if (lo->xattrmap) { 3261 ret = xattr_map_client(lo, in_name, &mapped_name); 3262 if (ret < 0) { 3263 if (ret == -EPERM) { 3264 ret = -ENODATA; 3265 } 3266 fuse_reply_err(req, -ret); 3267 return; 3268 } 3269 if (mapped_name) { 3270 name = mapped_name; 3271 } 3272 } 3273 3274 inode = lo_inode(req, ino); 3275 if (!inode) { 3276 fuse_reply_err(req, EBADF); 3277 g_free(mapped_name); 3278 return; 3279 } 3280 3281 saverr = ENOSYS; 3282 if (!lo_data(req)->xattr) { 3283 goto out; 3284 } 3285 3286 fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", 3287 ino, name, size); 3288 3289 if (size) { 3290 value = g_try_malloc(size); 3291 if (!value) { 3292 goto out_err; 3293 } 3294 } 3295 3296 sprintf(procname, "%i", inode->fd); 3297 /* 3298 * It is not safe to open() non-regular/non-dir files in file server 3299 * unless O_PATH is used, so use that method for regular files/dir 3300 * only (as it seems giving less performance overhead). 3301 * Otherwise, call fchdir() to avoid open(). 3302 */ 3303 if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { 3304 fd = openat(lo->proc_self_fd, procname, O_RDONLY); 3305 if (fd < 0) { 3306 goto out_err; 3307 } 3308 ret = fgetxattr(fd, name, value, size); 3309 saverr = ret == -1 ? errno : 0; 3310 } else { 3311 /* fchdir should not fail here */ 3312 FCHDIR_NOFAIL(lo->proc_self_fd); 3313 ret = getxattr(procname, name, value, size); 3314 saverr = ret == -1 ? errno : 0; 3315 FCHDIR_NOFAIL(lo->root.fd); 3316 } 3317 3318 if (ret == -1) { 3319 goto out; 3320 } 3321 if (size) { 3322 saverr = 0; 3323 if (ret == 0) { 3324 goto out; 3325 } 3326 fuse_reply_buf(req, value, ret); 3327 } else { 3328 fuse_reply_xattr(req, ret); 3329 } 3330 out_free: 3331 if (fd >= 0) { 3332 close(fd); 3333 } 3334 3335 lo_inode_put(lo, &inode); 3336 return; 3337 3338 out_err: 3339 saverr = errno; 3340 out: 3341 fuse_reply_err(req, saverr); 3342 g_free(mapped_name); 3343 goto out_free; 3344 } 3345 3346 static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) 3347 { 3348 struct lo_data *lo = lo_data(req); 3349 g_autofree char *value = NULL; 3350 char procname[64]; 3351 struct lo_inode *inode; 3352 ssize_t ret; 3353 int saverr; 3354 int fd = -1; 3355 3356 inode = lo_inode(req, ino); 3357 if (!inode) { 3358 fuse_reply_err(req, EBADF); 3359 return; 3360 } 3361 3362 saverr = ENOSYS; 3363 if (!lo_data(req)->xattr) { 3364 goto out; 3365 } 3366 3367 fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, 3368 size); 3369 3370 if (size) { 3371 value = g_try_malloc(size); 3372 if (!value) { 3373 goto out_err; 3374 } 3375 } 3376 3377 sprintf(procname, "%i", inode->fd); 3378 if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { 3379 fd = openat(lo->proc_self_fd, procname, O_RDONLY); 3380 if (fd < 0) { 3381 goto out_err; 3382 } 3383 ret = flistxattr(fd, value, size); 3384 saverr = ret == -1 ? errno : 0; 3385 } else { 3386 /* fchdir should not fail here */ 3387 FCHDIR_NOFAIL(lo->proc_self_fd); 3388 ret = listxattr(procname, value, size); 3389 saverr = ret == -1 ? errno : 0; 3390 FCHDIR_NOFAIL(lo->root.fd); 3391 } 3392 3393 if (ret == -1) { 3394 goto out; 3395 } 3396 if (size) { 3397 saverr = 0; 3398 if (ret == 0) { 3399 goto out; 3400 } 3401 3402 if (lo->xattr_map_list) { 3403 /* 3404 * Map the names back, some attributes might be dropped, 3405 * some shortened, but not increased, so we shouldn't 3406 * run out of room. 3407 */ 3408 size_t out_index, in_index; 3409 out_index = 0; 3410 in_index = 0; 3411 while (in_index < ret) { 3412 const char *map_out; 3413 char *in_ptr = value + in_index; 3414 /* Length of current attribute name */ 3415 size_t in_len = strlen(value + in_index) + 1; 3416 3417 int mapret = xattr_map_server(lo, in_ptr, &map_out); 3418 if (mapret != -ENODATA && mapret != 0) { 3419 /* Shouldn't happen */ 3420 saverr = -mapret; 3421 goto out; 3422 } 3423 if (mapret == 0) { 3424 /* Either unchanged, or truncated */ 3425 size_t out_len; 3426 if (map_out != in_ptr) { 3427 /* +1 copies the NIL */ 3428 out_len = strlen(map_out) + 1; 3429 } else { 3430 /* No change */ 3431 out_len = in_len; 3432 } 3433 /* 3434 * Move result along, may still be needed for an unchanged 3435 * entry if a previous entry was changed. 3436 */ 3437 memmove(value + out_index, map_out, out_len); 3438 3439 out_index += out_len; 3440 } 3441 in_index += in_len; 3442 } 3443 ret = out_index; 3444 if (ret == 0) { 3445 goto out; 3446 } 3447 } 3448 3449 ret = remove_blocked_xattrs(lo, value, ret); 3450 if (ret <= 0) { 3451 saverr = -ret; 3452 goto out; 3453 } 3454 fuse_reply_buf(req, value, ret); 3455 } else { 3456 /* 3457 * xattrmap only ever shortens the result, 3458 * so we don't need to do anything clever with the 3459 * allocation length here. 3460 */ 3461 fuse_reply_xattr(req, ret); 3462 } 3463 out_free: 3464 if (fd >= 0) { 3465 close(fd); 3466 } 3467 3468 lo_inode_put(lo, &inode); 3469 return; 3470 3471 out_err: 3472 saverr = errno; 3473 out: 3474 fuse_reply_err(req, saverr); 3475 goto out_free; 3476 } 3477 3478 static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, 3479 const char *value, size_t size, int flags, 3480 uint32_t extra_flags) 3481 { 3482 char procname[64]; 3483 const char *name; 3484 char *mapped_name; 3485 struct lo_data *lo = lo_data(req); 3486 struct lo_inode *inode; 3487 ssize_t ret; 3488 int saverr; 3489 int fd = -1; 3490 bool switched_creds = false; 3491 bool cap_fsetid_dropped = false; 3492 struct lo_cred old = {}; 3493 3494 if (block_xattr(lo, in_name)) { 3495 fuse_reply_err(req, EOPNOTSUPP); 3496 return; 3497 } 3498 3499 mapped_name = NULL; 3500 name = in_name; 3501 if (lo->xattrmap) { 3502 ret = xattr_map_client(lo, in_name, &mapped_name); 3503 if (ret < 0) { 3504 fuse_reply_err(req, -ret); 3505 return; 3506 } 3507 if (mapped_name) { 3508 name = mapped_name; 3509 } 3510 } 3511 3512 inode = lo_inode(req, ino); 3513 if (!inode) { 3514 fuse_reply_err(req, EBADF); 3515 g_free(mapped_name); 3516 return; 3517 } 3518 3519 saverr = ENOSYS; 3520 if (!lo_data(req)->xattr) { 3521 goto out; 3522 } 3523 3524 fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 3525 ", name=%s value=%s size=%zd)\n", ino, name, value, size); 3526 3527 sprintf(procname, "%i", inode->fd); 3528 /* 3529 * If we are setting posix access acl and if SGID needs to be 3530 * cleared, then switch to caller's gid and drop CAP_FSETID 3531 * and that should make sure host kernel clears SGID. 3532 * 3533 * This probably will not work when we support idmapped mounts. 3534 * In that case we will need to find a non-root gid and switch 3535 * to it. (Instead of gid in request). Fix it when we support 3536 * idmapped mounts. 3537 */ 3538 if (lo->posix_acl && !strcmp(name, "system.posix_acl_access") 3539 && (extra_flags & FUSE_SETXATTR_ACL_KILL_SGID)) { 3540 ret = lo_drop_cap_change_cred(req, &old, false, "FSETID", 3541 &cap_fsetid_dropped); 3542 if (ret) { 3543 saverr = ret; 3544 goto out; 3545 } 3546 switched_creds = true; 3547 } 3548 if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { 3549 fd = openat(lo->proc_self_fd, procname, O_RDONLY); 3550 if (fd < 0) { 3551 saverr = errno; 3552 goto out; 3553 } 3554 ret = fsetxattr(fd, name, value, size, flags); 3555 saverr = ret == -1 ? errno : 0; 3556 } else { 3557 /* fchdir should not fail here */ 3558 FCHDIR_NOFAIL(lo->proc_self_fd); 3559 ret = setxattr(procname, name, value, size, flags); 3560 saverr = ret == -1 ? errno : 0; 3561 FCHDIR_NOFAIL(lo->root.fd); 3562 } 3563 if (switched_creds) { 3564 if (cap_fsetid_dropped) 3565 lo_restore_cred_gain_cap(&old, false, "FSETID"); 3566 else 3567 lo_restore_cred(&old, false); 3568 } 3569 3570 out: 3571 if (fd >= 0) { 3572 close(fd); 3573 } 3574 3575 lo_inode_put(lo, &inode); 3576 g_free(mapped_name); 3577 fuse_reply_err(req, saverr); 3578 } 3579 3580 static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name) 3581 { 3582 char procname[64]; 3583 const char *name; 3584 char *mapped_name; 3585 struct lo_data *lo = lo_data(req); 3586 struct lo_inode *inode; 3587 ssize_t ret; 3588 int saverr; 3589 int fd = -1; 3590 3591 if (block_xattr(lo, in_name)) { 3592 fuse_reply_err(req, EOPNOTSUPP); 3593 return; 3594 } 3595 3596 mapped_name = NULL; 3597 name = in_name; 3598 if (lo->xattrmap) { 3599 ret = xattr_map_client(lo, in_name, &mapped_name); 3600 if (ret < 0) { 3601 fuse_reply_err(req, -ret); 3602 return; 3603 } 3604 if (mapped_name) { 3605 name = mapped_name; 3606 } 3607 } 3608 3609 inode = lo_inode(req, ino); 3610 if (!inode) { 3611 fuse_reply_err(req, EBADF); 3612 g_free(mapped_name); 3613 return; 3614 } 3615 3616 saverr = ENOSYS; 3617 if (!lo_data(req)->xattr) { 3618 goto out; 3619 } 3620 3621 fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, 3622 name); 3623 3624 sprintf(procname, "%i", inode->fd); 3625 if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { 3626 fd = openat(lo->proc_self_fd, procname, O_RDONLY); 3627 if (fd < 0) { 3628 saverr = errno; 3629 goto out; 3630 } 3631 ret = fremovexattr(fd, name); 3632 saverr = ret == -1 ? errno : 0; 3633 } else { 3634 /* fchdir should not fail here */ 3635 FCHDIR_NOFAIL(lo->proc_self_fd); 3636 ret = removexattr(procname, name); 3637 saverr = ret == -1 ? errno : 0; 3638 FCHDIR_NOFAIL(lo->root.fd); 3639 } 3640 3641 out: 3642 if (fd >= 0) { 3643 close(fd); 3644 } 3645 3646 lo_inode_put(lo, &inode); 3647 g_free(mapped_name); 3648 fuse_reply_err(req, saverr); 3649 } 3650 3651 #ifdef HAVE_COPY_FILE_RANGE 3652 static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, 3653 struct fuse_file_info *fi_in, fuse_ino_t ino_out, 3654 off_t off_out, struct fuse_file_info *fi_out, 3655 size_t len, int flags) 3656 { 3657 int in_fd, out_fd; 3658 ssize_t res; 3659 3660 in_fd = lo_fi_fd(req, fi_in); 3661 out_fd = lo_fi_fd(req, fi_out); 3662 3663 fuse_log(FUSE_LOG_DEBUG, 3664 "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " 3665 "off=%ju, ino=%" PRIu64 "/fd=%d, " 3666 "off=%ju, size=%zd, flags=0x%x)\n", 3667 ino_in, in_fd, (intmax_t)off_in, 3668 ino_out, out_fd, (intmax_t)off_out, len, flags); 3669 3670 res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); 3671 if (res < 0) { 3672 fuse_reply_err(req, errno); 3673 } else { 3674 fuse_reply_write(req, res); 3675 } 3676 } 3677 #endif 3678 3679 static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, 3680 struct fuse_file_info *fi) 3681 { 3682 off_t res; 3683 3684 (void)ino; 3685 res = lseek(lo_fi_fd(req, fi), off, whence); 3686 if (res != -1) { 3687 fuse_reply_lseek(req, res); 3688 } else { 3689 fuse_reply_err(req, errno); 3690 } 3691 } 3692 3693 static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode) 3694 { 3695 int fd, ret = 0; 3696 3697 fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n", 3698 inode->fuse_ino); 3699 3700 fd = lo_inode_open(lo, inode, O_RDONLY); 3701 if (fd < 0) { 3702 return -fd; 3703 } 3704 3705 if (syncfs(fd) < 0) { 3706 ret = errno; 3707 } 3708 3709 close(fd); 3710 return ret; 3711 } 3712 3713 static void lo_syncfs(fuse_req_t req, fuse_ino_t ino) 3714 { 3715 struct lo_data *lo = lo_data(req); 3716 struct lo_inode *inode = lo_inode(req, ino); 3717 int err; 3718 3719 if (!inode) { 3720 fuse_reply_err(req, EBADF); 3721 return; 3722 } 3723 3724 err = lo_do_syncfs(lo, inode); 3725 lo_inode_put(lo, &inode); 3726 3727 /* 3728 * If submounts aren't announced, the client only sends a request to 3729 * sync the root inode. TODO: Track submounts internally and iterate 3730 * over them as well. 3731 */ 3732 3733 fuse_reply_err(req, err); 3734 } 3735 3736 static void lo_destroy(void *userdata) 3737 { 3738 struct lo_data *lo = (struct lo_data *)userdata; 3739 3740 pthread_mutex_lock(&lo->mutex); 3741 while (true) { 3742 GHashTableIter iter; 3743 gpointer key, value; 3744 3745 g_hash_table_iter_init(&iter, lo->inodes); 3746 if (!g_hash_table_iter_next(&iter, &key, &value)) { 3747 break; 3748 } 3749 3750 struct lo_inode *inode = value; 3751 unref_inode(lo, inode, inode->nlookup); 3752 } 3753 pthread_mutex_unlock(&lo->mutex); 3754 } 3755 3756 static struct fuse_lowlevel_ops lo_oper = { 3757 .init = lo_init, 3758 .lookup = lo_lookup, 3759 .mkdir = lo_mkdir, 3760 .mknod = lo_mknod, 3761 .symlink = lo_symlink, 3762 .link = lo_link, 3763 .unlink = lo_unlink, 3764 .rmdir = lo_rmdir, 3765 .rename = lo_rename, 3766 .forget = lo_forget, 3767 .forget_multi = lo_forget_multi, 3768 .getattr = lo_getattr, 3769 .setattr = lo_setattr, 3770 .readlink = lo_readlink, 3771 .opendir = lo_opendir, 3772 .readdir = lo_readdir, 3773 .readdirplus = lo_readdirplus, 3774 .releasedir = lo_releasedir, 3775 .fsyncdir = lo_fsyncdir, 3776 .create = lo_create, 3777 .getlk = lo_getlk, 3778 .setlk = lo_setlk, 3779 .open = lo_open, 3780 .release = lo_release, 3781 .flush = lo_flush, 3782 .fsync = lo_fsync, 3783 .read = lo_read, 3784 .write_buf = lo_write_buf, 3785 .statfs = lo_statfs, 3786 .fallocate = lo_fallocate, 3787 .flock = lo_flock, 3788 .getxattr = lo_getxattr, 3789 .listxattr = lo_listxattr, 3790 .setxattr = lo_setxattr, 3791 .removexattr = lo_removexattr, 3792 #ifdef HAVE_COPY_FILE_RANGE 3793 .copy_file_range = lo_copy_file_range, 3794 #endif 3795 .lseek = lo_lseek, 3796 .syncfs = lo_syncfs, 3797 .destroy = lo_destroy, 3798 }; 3799 3800 /* Print vhost-user.json backend program capabilities */ 3801 static void print_capabilities(void) 3802 { 3803 printf("{\n"); 3804 printf(" \"type\": \"fs\"\n"); 3805 printf("}\n"); 3806 } 3807 3808 /* 3809 * Drop all Linux capabilities because the wait parent process only needs to 3810 * sit in waitpid(2) and terminate. 3811 */ 3812 static void setup_wait_parent_capabilities(void) 3813 { 3814 capng_setpid(syscall(SYS_gettid)); 3815 capng_clear(CAPNG_SELECT_BOTH); 3816 capng_apply(CAPNG_SELECT_BOTH); 3817 } 3818 3819 /* 3820 * Move to a new mount, net, and pid namespaces to isolate this process. 3821 */ 3822 static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) 3823 { 3824 pid_t child; 3825 3826 /* 3827 * Create a new pid namespace for *child* processes. We'll have to 3828 * fork in order to enter the new pid namespace. A new mount namespace 3829 * is also needed so that we can remount /proc for the new pid 3830 * namespace. 3831 * 3832 * Our UNIX domain sockets have been created. Now we can move to 3833 * an empty network namespace to prevent TCP/IP and other network 3834 * activity in case this process is compromised. 3835 */ 3836 if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { 3837 fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); 3838 exit(1); 3839 } 3840 3841 child = fork(); 3842 if (child < 0) { 3843 fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); 3844 exit(1); 3845 } 3846 if (child > 0) { 3847 pid_t waited; 3848 int wstatus; 3849 3850 setup_wait_parent_capabilities(); 3851 3852 /* The parent waits for the child */ 3853 do { 3854 waited = waitpid(child, &wstatus, 0); 3855 } while (waited < 0 && errno == EINTR && !se->exited); 3856 3857 /* We were terminated by a signal, see fuse_signals.c */ 3858 if (se->exited) { 3859 exit(0); 3860 } 3861 3862 if (WIFEXITED(wstatus)) { 3863 exit(WEXITSTATUS(wstatus)); 3864 } 3865 3866 exit(1); 3867 } 3868 3869 /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ 3870 prctl(PR_SET_PDEATHSIG, SIGTERM); 3871 3872 /* 3873 * If the mounts have shared propagation then we want to opt out so our 3874 * mount changes don't affect the parent mount namespace. 3875 */ 3876 if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { 3877 fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); 3878 exit(1); 3879 } 3880 3881 /* The child must remount /proc to use the new pid namespace */ 3882 if (mount("proc", "/proc", "proc", 3883 MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { 3884 fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); 3885 exit(1); 3886 } 3887 3888 /* Get the /proc/self/task descriptor */ 3889 lo->proc_self_task = open("/proc/self/task/", O_PATH); 3890 if (lo->proc_self_task == -1) { 3891 fuse_log(FUSE_LOG_ERR, "open(/proc/self/task, O_PATH): %m\n"); 3892 exit(1); 3893 } 3894 3895 lo->use_fscreate = is_fscreate_usable(lo); 3896 3897 /* 3898 * We only need /proc/self/fd. Prevent ".." from accessing parent 3899 * directories of /proc/self/fd by bind-mounting it over /proc. Since / was 3900 * previously remounted with MS_REC | MS_SLAVE this mount change only 3901 * affects our process. 3902 */ 3903 if (mount("/proc/self/fd", "/proc", NULL, MS_BIND, NULL) < 0) { 3904 fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, MS_BIND): %m\n"); 3905 exit(1); 3906 } 3907 3908 /* Get the /proc (actually /proc/self/fd, see above) file descriptor */ 3909 lo->proc_self_fd = open("/proc", O_PATH); 3910 if (lo->proc_self_fd == -1) { 3911 fuse_log(FUSE_LOG_ERR, "open(/proc, O_PATH): %m\n"); 3912 exit(1); 3913 } 3914 } 3915 3916 /* 3917 * Capture the capability state, we'll need to restore this for individual 3918 * threads later; see load_capng. 3919 */ 3920 static void setup_capng(void) 3921 { 3922 /* Note this accesses /proc so has to happen before the sandbox */ 3923 if (capng_get_caps_process()) { 3924 fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); 3925 exit(1); 3926 } 3927 pthread_mutex_init(&cap.mutex, NULL); 3928 pthread_mutex_lock(&cap.mutex); 3929 cap.saved = capng_save_state(); 3930 if (!cap.saved) { 3931 fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); 3932 exit(1); 3933 } 3934 pthread_mutex_unlock(&cap.mutex); 3935 } 3936 3937 static void cleanup_capng(void) 3938 { 3939 free(cap.saved); 3940 cap.saved = NULL; 3941 pthread_mutex_destroy(&cap.mutex); 3942 } 3943 3944 3945 /* 3946 * Make the source directory our root so symlinks cannot escape and no other 3947 * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. 3948 */ 3949 static void setup_mounts(const char *source) 3950 { 3951 int oldroot; 3952 int newroot; 3953 3954 if (mount(source, source, NULL, MS_BIND | MS_REC, NULL) < 0) { 3955 fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); 3956 exit(1); 3957 } 3958 3959 /* This magic is based on lxc's lxc_pivot_root() */ 3960 oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); 3961 if (oldroot < 0) { 3962 fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); 3963 exit(1); 3964 } 3965 3966 newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); 3967 if (newroot < 0) { 3968 fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); 3969 exit(1); 3970 } 3971 3972 if (fchdir(newroot) < 0) { 3973 fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); 3974 exit(1); 3975 } 3976 3977 if (syscall(__NR_pivot_root, ".", ".") < 0) { 3978 fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); 3979 exit(1); 3980 } 3981 3982 if (fchdir(oldroot) < 0) { 3983 fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); 3984 exit(1); 3985 } 3986 3987 if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { 3988 fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); 3989 exit(1); 3990 } 3991 3992 if (umount2(".", MNT_DETACH) < 0) { 3993 fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); 3994 exit(1); 3995 } 3996 3997 if (fchdir(newroot) < 0) { 3998 fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); 3999 exit(1); 4000 } 4001 4002 close(newroot); 4003 close(oldroot); 4004 } 4005 4006 /* 4007 * Only keep capabilities in allowlist that are needed for file system operation 4008 * The (possibly NULL) modcaps_in string passed in is free'd before exit. 4009 */ 4010 static void setup_capabilities(char *modcaps_in) 4011 { 4012 char *modcaps = modcaps_in; 4013 pthread_mutex_lock(&cap.mutex); 4014 capng_restore_state(&cap.saved); 4015 4016 /* 4017 * Add to allowlist file system-related capabilities that are needed for a 4018 * file server to act like root. Drop everything else like networking and 4019 * sysadmin capabilities. 4020 * 4021 * Exclusions: 4022 * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl 4023 * and we don't support that. 4024 * 2. CAP_MAC_OVERRIDE is not included because it only seems to be 4025 * used by the Smack LSM. Omit it until there is demand for it. 4026 */ 4027 capng_setpid(syscall(SYS_gettid)); 4028 capng_clear(CAPNG_SELECT_BOTH); 4029 if (capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, 4030 CAP_CHOWN, 4031 CAP_DAC_OVERRIDE, 4032 CAP_FOWNER, 4033 CAP_FSETID, 4034 CAP_SETGID, 4035 CAP_SETUID, 4036 CAP_MKNOD, 4037 CAP_SETFCAP, 4038 -1)) { 4039 fuse_log(FUSE_LOG_ERR, "%s: capng_updatev failed\n", __func__); 4040 exit(1); 4041 } 4042 4043 /* 4044 * The modcaps option is a colon separated list of caps, 4045 * each preceded by either + or -. 4046 */ 4047 while (modcaps) { 4048 capng_act_t action; 4049 int cap; 4050 4051 char *next = strchr(modcaps, ':'); 4052 if (next) { 4053 *next = '\0'; 4054 next++; 4055 } 4056 4057 switch (modcaps[0]) { 4058 case '+': 4059 action = CAPNG_ADD; 4060 break; 4061 4062 case '-': 4063 action = CAPNG_DROP; 4064 break; 4065 4066 default: 4067 fuse_log(FUSE_LOG_ERR, 4068 "%s: Expecting '+'/'-' in modcaps but found '%c'\n", 4069 __func__, modcaps[0]); 4070 exit(1); 4071 } 4072 cap = capng_name_to_capability(modcaps + 1); 4073 if (cap < 0) { 4074 fuse_log(FUSE_LOG_ERR, "%s: Unknown capability '%s'\n", __func__, 4075 modcaps); 4076 exit(1); 4077 } 4078 if (capng_update(action, CAPNG_PERMITTED | CAPNG_EFFECTIVE, cap)) { 4079 fuse_log(FUSE_LOG_ERR, "%s: capng_update failed for '%s'\n", 4080 __func__, modcaps); 4081 exit(1); 4082 } 4083 4084 modcaps = next; 4085 } 4086 g_free(modcaps_in); 4087 4088 if (capng_apply(CAPNG_SELECT_BOTH)) { 4089 fuse_log(FUSE_LOG_ERR, "%s: capng_apply failed\n", __func__); 4090 exit(1); 4091 } 4092 4093 cap.saved = capng_save_state(); 4094 if (!cap.saved) { 4095 fuse_log(FUSE_LOG_ERR, "%s: capng_save_state failed\n", __func__); 4096 exit(1); 4097 } 4098 pthread_mutex_unlock(&cap.mutex); 4099 } 4100 4101 /* 4102 * Use chroot as a weaker sandbox for environments where the process is 4103 * launched without CAP_SYS_ADMIN. 4104 */ 4105 static void setup_chroot(struct lo_data *lo) 4106 { 4107 lo->proc_self_fd = open("/proc/self/fd", O_PATH); 4108 if (lo->proc_self_fd == -1) { 4109 fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n"); 4110 exit(1); 4111 } 4112 4113 lo->proc_self_task = open("/proc/self/task", O_PATH); 4114 if (lo->proc_self_fd == -1) { 4115 fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/task\", O_PATH): %m\n"); 4116 exit(1); 4117 } 4118 4119 lo->use_fscreate = is_fscreate_usable(lo); 4120 4121 /* 4122 * Make the shared directory the file system root so that FUSE_OPEN 4123 * (lo_open()) cannot escape the shared directory by opening a symlink. 4124 * 4125 * The chroot(2) syscall is later disabled by seccomp and the 4126 * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot 4127 * is not possible. 4128 * 4129 * However, it's still possible to escape the chroot via lo->proc_self_fd 4130 * but that requires first gaining control of the process. 4131 */ 4132 if (chroot(lo->source) != 0) { 4133 fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source); 4134 exit(1); 4135 } 4136 4137 /* Move into the chroot */ 4138 if (chdir("/") != 0) { 4139 fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n"); 4140 exit(1); 4141 } 4142 } 4143 4144 /* 4145 * Lock down this process to prevent access to other processes or files outside 4146 * source directory. This reduces the impact of arbitrary code execution bugs. 4147 */ 4148 static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, 4149 bool enable_syslog) 4150 { 4151 if (lo->sandbox == SANDBOX_NAMESPACE) { 4152 setup_namespaces(lo, se); 4153 setup_mounts(lo->source); 4154 } else { 4155 setup_chroot(lo); 4156 } 4157 4158 setup_seccomp(enable_syslog); 4159 setup_capabilities(g_strdup(lo->modcaps)); 4160 } 4161 4162 /* Set the maximum number of open file descriptors */ 4163 static void setup_nofile_rlimit(unsigned long rlimit_nofile) 4164 { 4165 struct rlimit rlim = { 4166 .rlim_cur = rlimit_nofile, 4167 .rlim_max = rlimit_nofile, 4168 }; 4169 4170 if (rlimit_nofile == 0) { 4171 return; /* nothing to do */ 4172 } 4173 4174 if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { 4175 /* Ignore SELinux denials */ 4176 if (errno == EPERM) { 4177 return; 4178 } 4179 4180 fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); 4181 exit(1); 4182 } 4183 } 4184 4185 static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) 4186 { 4187 g_autofree char *localfmt = NULL; 4188 char buf[64]; 4189 4190 if (current_log_level < level) { 4191 return; 4192 } 4193 4194 if (current_log_level == FUSE_LOG_DEBUG) { 4195 if (use_syslog) { 4196 /* no timestamp needed */ 4197 localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), 4198 fmt); 4199 } else { 4200 g_autoptr(GDateTime) now = g_date_time_new_now_utc(); 4201 g_autofree char *nowstr = g_date_time_format(now, 4202 "%Y-%m-%d %H:%M:%S.%%06d%z"); 4203 snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now)); 4204 localfmt = g_strdup_printf("[%s] [ID: %08ld] %s", 4205 buf, syscall(__NR_gettid), fmt); 4206 } 4207 fmt = localfmt; 4208 } 4209 4210 if (use_syslog) { 4211 int priority = LOG_ERR; 4212 switch (level) { 4213 case FUSE_LOG_EMERG: 4214 priority = LOG_EMERG; 4215 break; 4216 case FUSE_LOG_ALERT: 4217 priority = LOG_ALERT; 4218 break; 4219 case FUSE_LOG_CRIT: 4220 priority = LOG_CRIT; 4221 break; 4222 case FUSE_LOG_ERR: 4223 priority = LOG_ERR; 4224 break; 4225 case FUSE_LOG_WARNING: 4226 priority = LOG_WARNING; 4227 break; 4228 case FUSE_LOG_NOTICE: 4229 priority = LOG_NOTICE; 4230 break; 4231 case FUSE_LOG_INFO: 4232 priority = LOG_INFO; 4233 break; 4234 case FUSE_LOG_DEBUG: 4235 priority = LOG_DEBUG; 4236 break; 4237 } 4238 vsyslog(priority, fmt, ap); 4239 } else { 4240 vfprintf(stderr, fmt, ap); 4241 } 4242 } 4243 4244 static void setup_root(struct lo_data *lo, struct lo_inode *root) 4245 { 4246 int fd, res; 4247 struct stat stat; 4248 uint64_t mnt_id; 4249 4250 fd = open("/", O_PATH); 4251 if (fd == -1) { 4252 fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); 4253 exit(1); 4254 } 4255 4256 res = do_statx(lo, fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, 4257 &mnt_id); 4258 if (res == -1) { 4259 fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); 4260 exit(1); 4261 } 4262 4263 root->filetype = S_IFDIR; 4264 root->fd = fd; 4265 root->key.ino = stat.st_ino; 4266 root->key.dev = stat.st_dev; 4267 root->key.mnt_id = mnt_id; 4268 root->nlookup = 2; 4269 g_atomic_int_set(&root->refcount, 2); 4270 if (lo->posix_lock) { 4271 pthread_mutex_init(&root->plock_mutex, NULL); 4272 root->posix_locks = g_hash_table_new_full( 4273 g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); 4274 } 4275 } 4276 4277 static guint lo_key_hash(gconstpointer key) 4278 { 4279 const struct lo_key *lkey = key; 4280 4281 return (guint)lkey->ino + (guint)lkey->dev + (guint)lkey->mnt_id; 4282 } 4283 4284 static gboolean lo_key_equal(gconstpointer a, gconstpointer b) 4285 { 4286 const struct lo_key *la = a; 4287 const struct lo_key *lb = b; 4288 4289 return la->ino == lb->ino && la->dev == lb->dev && la->mnt_id == lb->mnt_id; 4290 } 4291 4292 static void fuse_lo_data_cleanup(struct lo_data *lo) 4293 { 4294 if (lo->inodes) { 4295 g_hash_table_destroy(lo->inodes); 4296 } 4297 4298 if (lo->root.posix_locks) { 4299 g_hash_table_destroy(lo->root.posix_locks); 4300 } 4301 lo_map_destroy(&lo->fd_map); 4302 lo_map_destroy(&lo->dirp_map); 4303 lo_map_destroy(&lo->ino_map); 4304 4305 if (lo->proc_self_fd >= 0) { 4306 close(lo->proc_self_fd); 4307 } 4308 4309 if (lo->proc_self_task >= 0) { 4310 close(lo->proc_self_task); 4311 } 4312 4313 if (lo->root.fd >= 0) { 4314 close(lo->root.fd); 4315 } 4316 4317 free(lo->xattrmap); 4318 free_xattrmap(lo); 4319 free(lo->xattr_security_capability); 4320 free(lo->source); 4321 } 4322 4323 static void qemu_version(void) 4324 { 4325 printf("virtiofsd version " QEMU_FULL_VERSION "\n" QEMU_COPYRIGHT "\n"); 4326 } 4327 4328 int main(int argc, char *argv[]) 4329 { 4330 struct fuse_args args = FUSE_ARGS_INIT(argc, argv); 4331 struct fuse_session *se; 4332 struct fuse_cmdline_opts opts; 4333 struct lo_data lo = { 4334 .sandbox = SANDBOX_NAMESPACE, 4335 .debug = 0, 4336 .writeback = 0, 4337 .posix_lock = 0, 4338 .allow_direct_io = 0, 4339 .proc_self_fd = -1, 4340 .proc_self_task = -1, 4341 .user_killpriv_v2 = -1, 4342 .user_posix_acl = -1, 4343 .user_security_label = -1, 4344 }; 4345 struct lo_map_elem *root_elem; 4346 struct lo_map_elem *reserve_elem; 4347 int ret = -1; 4348 4349 /* Initialize time conversion information for localtime_r(). */ 4350 tzset(); 4351 4352 /* Don't mask creation mode, kernel already did that */ 4353 umask(0); 4354 4355 qemu_init_exec_dir(argv[0]); 4356 4357 drop_supplementary_groups(); 4358 4359 pthread_mutex_init(&lo.mutex, NULL); 4360 lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); 4361 lo.root.fd = -1; 4362 lo.root.fuse_ino = FUSE_ROOT_ID; 4363 lo.cache = CACHE_AUTO; 4364 4365 /* 4366 * Set up the ino map like this: 4367 * [0] Reserved (will not be used) 4368 * [1] Root inode 4369 */ 4370 lo_map_init(&lo.ino_map); 4371 reserve_elem = lo_map_reserve(&lo.ino_map, 0); 4372 if (!reserve_elem) { 4373 fuse_log(FUSE_LOG_ERR, "failed to alloc reserve_elem.\n"); 4374 goto err_out1; 4375 } 4376 reserve_elem->in_use = false; 4377 root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); 4378 if (!root_elem) { 4379 fuse_log(FUSE_LOG_ERR, "failed to alloc root_elem.\n"); 4380 goto err_out1; 4381 } 4382 root_elem->inode = &lo.root; 4383 4384 lo_map_init(&lo.dirp_map); 4385 lo_map_init(&lo.fd_map); 4386 4387 if (fuse_parse_cmdline(&args, &opts) != 0) { 4388 goto err_out1; 4389 } 4390 fuse_set_log_func(log_func); 4391 use_syslog = opts.syslog; 4392 if (use_syslog) { 4393 openlog("virtiofsd", LOG_PID, LOG_DAEMON); 4394 } 4395 4396 if (opts.show_help) { 4397 printf("usage: %s [options]\n\n", argv[0]); 4398 fuse_cmdline_help(); 4399 printf(" -o source=PATH shared directory tree\n"); 4400 fuse_lowlevel_help(); 4401 ret = 0; 4402 goto err_out1; 4403 } else if (opts.show_version) { 4404 qemu_version(); 4405 fuse_lowlevel_version(); 4406 ret = 0; 4407 goto err_out1; 4408 } else if (opts.print_capabilities) { 4409 print_capabilities(); 4410 ret = 0; 4411 goto err_out1; 4412 } 4413 4414 if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { 4415 goto err_out1; 4416 } 4417 4418 if (opts.log_level != 0) { 4419 current_log_level = opts.log_level; 4420 } else { 4421 /* default log level is INFO */ 4422 current_log_level = FUSE_LOG_INFO; 4423 } 4424 lo.debug = opts.debug; 4425 if (lo.debug) { 4426 current_log_level = FUSE_LOG_DEBUG; 4427 } 4428 if (lo.source) { 4429 struct stat stat; 4430 int res; 4431 4432 res = lstat(lo.source, &stat); 4433 if (res == -1) { 4434 fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", 4435 lo.source); 4436 exit(1); 4437 } 4438 if (!S_ISDIR(stat.st_mode)) { 4439 fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); 4440 exit(1); 4441 } 4442 } else { 4443 lo.source = strdup("/"); 4444 if (!lo.source) { 4445 fuse_log(FUSE_LOG_ERR, "failed to strdup source\n"); 4446 goto err_out1; 4447 } 4448 } 4449 4450 if (lo.xattrmap) { 4451 lo.xattr = 1; 4452 parse_xattrmap(&lo); 4453 } 4454 4455 if (!lo.timeout_set) { 4456 switch (lo.cache) { 4457 case CACHE_NONE: 4458 lo.timeout = 0.0; 4459 break; 4460 4461 case CACHE_AUTO: 4462 lo.timeout = 1.0; 4463 break; 4464 4465 case CACHE_ALWAYS: 4466 lo.timeout = 86400.0; 4467 break; 4468 } 4469 } else if (lo.timeout < 0) { 4470 fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); 4471 exit(1); 4472 } 4473 4474 if (lo.user_posix_acl == 1 && !lo.xattr) { 4475 fuse_log(FUSE_LOG_ERR, "Can't enable posix ACLs. xattrs are disabled." 4476 "\n"); 4477 exit(1); 4478 } 4479 4480 lo.use_statx = true; 4481 4482 se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); 4483 if (se == NULL) { 4484 goto err_out1; 4485 } 4486 4487 if (fuse_set_signal_handlers(se) != 0) { 4488 goto err_out2; 4489 } 4490 4491 if (fuse_session_mount(se) != 0) { 4492 goto err_out3; 4493 } 4494 4495 fuse_daemonize(opts.foreground); 4496 4497 setup_nofile_rlimit(opts.rlimit_nofile); 4498 4499 /* Must be before sandbox since it wants /proc */ 4500 setup_capng(); 4501 4502 setup_sandbox(&lo, se, opts.syslog); 4503 4504 setup_root(&lo, &lo.root); 4505 /* Block until ctrl+c or fusermount -u */ 4506 ret = virtio_loop(se); 4507 4508 fuse_session_unmount(se); 4509 cleanup_capng(); 4510 err_out3: 4511 fuse_remove_signal_handlers(se); 4512 err_out2: 4513 fuse_session_destroy(se); 4514 err_out1: 4515 fuse_opt_free_args(&args); 4516 4517 fuse_lo_data_cleanup(&lo); 4518 4519 return ret ? 1 : 0; 4520 }