qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

client.c (48932B)


      1 /*
      2  *  Copyright (C) 2016-2019 Red Hat, Inc.
      3  *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
      4  *
      5  *  Network Block Device Client Side
      6  *
      7  *  This program is free software; you can redistribute it and/or modify
      8  *  it under the terms of the GNU General Public License as published by
      9  *  the Free Software Foundation; under version 2 of the License.
     10  *
     11  *  This program is distributed in the hope that it will be useful,
     12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14  *  GNU General Public License for more details.
     15  *
     16  *  You should have received a copy of the GNU General Public License
     17  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
     18  */
     19 
     20 #include "qemu/osdep.h"
     21 #include "qapi/error.h"
     22 #include "qemu/queue.h"
     23 #include "trace.h"
     24 #include "nbd-internal.h"
     25 #include "qemu/cutils.h"
     26 
     27 /* Definitions for opaque data types */
     28 
     29 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
     30 
     31 /* That's all folks */
     32 
     33 /* Basic flow for negotiation
     34 
     35    Server         Client
     36    Negotiate
     37 
     38    or
     39 
     40    Server         Client
     41    Negotiate #1
     42                   Option
     43    Negotiate #2
     44 
     45    ----
     46 
     47    followed by
     48 
     49    Server         Client
     50                   Request
     51    Response
     52                   Request
     53    Response
     54                   ...
     55    ...
     56                   Request (type == 2)
     57 
     58 */
     59 
     60 /* Send an option request.
     61  *
     62  * The request is for option @opt, with @data containing @len bytes of
     63  * additional payload for the request (@len may be -1 to treat @data as
     64  * a C string; and @data may be NULL if @len is 0).
     65  * Return 0 if successful, -1 with errp set if it is impossible to
     66  * continue. */
     67 static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
     68                                    uint32_t len, const char *data,
     69                                    Error **errp)
     70 {
     71     ERRP_GUARD();
     72     NBDOption req;
     73     QEMU_BUILD_BUG_ON(sizeof(req) != 16);
     74 
     75     if (len == -1) {
     76         req.length = len = strlen(data);
     77     }
     78     trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
     79 
     80     stq_be_p(&req.magic, NBD_OPTS_MAGIC);
     81     stl_be_p(&req.option, opt);
     82     stl_be_p(&req.length, len);
     83 
     84     if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
     85         error_prepend(errp, "Failed to send option request header: ");
     86         return -1;
     87     }
     88 
     89     if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
     90         error_prepend(errp, "Failed to send option request data: ");
     91         return -1;
     92     }
     93 
     94     return 0;
     95 }
     96 
     97 /* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
     98  * not going to attempt further negotiation. */
     99 static void nbd_send_opt_abort(QIOChannel *ioc)
    100 {
    101     /* Technically, a compliant server is supposed to reply to us; but
    102      * older servers disconnected instead. At any rate, we're allowed
    103      * to disconnect without waiting for the server reply, so we don't
    104      * even care if the request makes it to the server, let alone
    105      * waiting around for whether the server replies. */
    106     nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
    107 }
    108 
    109 
    110 /* Receive the header of an option reply, which should match the given
    111  * opt.  Read through the length field, but NOT the length bytes of
    112  * payload. Return 0 if successful, -1 with errp set if it is
    113  * impossible to continue. */
    114 static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
    115                                     NBDOptionReply *reply, Error **errp)
    116 {
    117     QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
    118     if (nbd_read(ioc, reply, sizeof(*reply), "option reply", errp) < 0) {
    119         nbd_send_opt_abort(ioc);
    120         return -1;
    121     }
    122     reply->magic = be64_to_cpu(reply->magic);
    123     reply->option = be32_to_cpu(reply->option);
    124     reply->type = be32_to_cpu(reply->type);
    125     reply->length = be32_to_cpu(reply->length);
    126 
    127     trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
    128                                    reply->type, nbd_rep_lookup(reply->type),
    129                                    reply->length);
    130 
    131     if (reply->magic != NBD_REP_MAGIC) {
    132         error_setg(errp, "Unexpected option reply magic");
    133         nbd_send_opt_abort(ioc);
    134         return -1;
    135     }
    136     if (reply->option != opt) {
    137         error_setg(errp, "Unexpected option type %u (%s), expected %u (%s)",
    138                    reply->option, nbd_opt_lookup(reply->option),
    139                    opt, nbd_opt_lookup(opt));
    140         nbd_send_opt_abort(ioc);
    141         return -1;
    142     }
    143     return 0;
    144 }
    145 
    146 /*
    147  * If reply represents success, return 1 without further action.  If
    148  * reply represents an error, consume the optional payload of the
    149  * packet on ioc.  Then return 0 for unsupported (so the client can
    150  * fall back to other approaches), where @strict determines if only
    151  * ERR_UNSUP or all errors fit that category, or -1 with errp set for
    152  * other errors.
    153  */
    154 static int nbd_handle_reply_err(QIOChannel *ioc, NBDOptionReply *reply,
    155                                 bool strict, Error **errp)
    156 {
    157     ERRP_GUARD();
    158     g_autofree char *msg = NULL;
    159 
    160     if (!(reply->type & (1 << 31))) {
    161         return 1;
    162     }
    163 
    164     if (reply->length) {
    165         if (reply->length > NBD_MAX_BUFFER_SIZE) {
    166             error_setg(errp, "server error %" PRIu32
    167                        " (%s) message is too long",
    168                        reply->type, nbd_rep_lookup(reply->type));
    169             goto err;
    170         }
    171         msg = g_malloc(reply->length + 1);
    172         if (nbd_read(ioc, msg, reply->length, NULL, errp) < 0) {
    173             error_prepend(errp, "Failed to read option error %" PRIu32
    174                           " (%s) message: ",
    175                           reply->type, nbd_rep_lookup(reply->type));
    176             goto err;
    177         }
    178         msg[reply->length] = '\0';
    179         trace_nbd_server_error_msg(reply->type,
    180                                    nbd_reply_type_lookup(reply->type), msg);
    181     }
    182 
    183     if (reply->type == NBD_REP_ERR_UNSUP || !strict) {
    184         trace_nbd_reply_err_ignored(reply->option,
    185                                     nbd_opt_lookup(reply->option),
    186                                     reply->type, nbd_rep_lookup(reply->type));
    187         return 0;
    188     }
    189 
    190     switch (reply->type) {
    191     case NBD_REP_ERR_POLICY:
    192         error_setg(errp, "Denied by server for option %" PRIu32 " (%s)",
    193                    reply->option, nbd_opt_lookup(reply->option));
    194         break;
    195 
    196     case NBD_REP_ERR_INVALID:
    197         error_setg(errp, "Invalid parameters for option %" PRIu32 " (%s)",
    198                    reply->option, nbd_opt_lookup(reply->option));
    199         break;
    200 
    201     case NBD_REP_ERR_PLATFORM:
    202         error_setg(errp, "Server lacks support for option %" PRIu32 " (%s)",
    203                    reply->option, nbd_opt_lookup(reply->option));
    204         break;
    205 
    206     case NBD_REP_ERR_TLS_REQD:
    207         error_setg(errp, "TLS negotiation required before option %" PRIu32
    208                    " (%s)", reply->option, nbd_opt_lookup(reply->option));
    209         error_append_hint(errp, "Did you forget a valid tls-creds?\n");
    210         break;
    211 
    212     case NBD_REP_ERR_UNKNOWN:
    213         error_setg(errp, "Requested export not available");
    214         break;
    215 
    216     case NBD_REP_ERR_SHUTDOWN:
    217         error_setg(errp, "Server shutting down before option %" PRIu32 " (%s)",
    218                    reply->option, nbd_opt_lookup(reply->option));
    219         break;
    220 
    221     case NBD_REP_ERR_BLOCK_SIZE_REQD:
    222         error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIu32
    223                    " (%s)", reply->option, nbd_opt_lookup(reply->option));
    224         break;
    225 
    226     default:
    227         error_setg(errp, "Unknown error code when asking for option %" PRIu32
    228                    " (%s)", reply->option, nbd_opt_lookup(reply->option));
    229         break;
    230     }
    231 
    232     if (msg) {
    233         error_append_hint(errp, "server reported: %s\n", msg);
    234     }
    235 
    236  err:
    237     nbd_send_opt_abort(ioc);
    238     return -1;
    239 }
    240 
    241 /* nbd_receive_list:
    242  * Process another portion of the NBD_OPT_LIST reply, populating any
    243  * name received into *@name. If @description is non-NULL, and the
    244  * server provided a description, that is also populated. The caller
    245  * must eventually call g_free() on success.
    246  * Returns 1 if name and description were set and iteration must continue,
    247  *         0 if iteration is complete (including if OPT_LIST unsupported),
    248  *         -1 with @errp set if an unrecoverable error occurred.
    249  */
    250 static int nbd_receive_list(QIOChannel *ioc, char **name, char **description,
    251                             Error **errp)
    252 {
    253     NBDOptionReply reply;
    254     uint32_t len;
    255     uint32_t namelen;
    256     g_autofree char *local_name = NULL;
    257     g_autofree char *local_desc = NULL;
    258     int error;
    259 
    260     if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
    261         return -1;
    262     }
    263     error = nbd_handle_reply_err(ioc, &reply, true, errp);
    264     if (error <= 0) {
    265         return error;
    266     }
    267     len = reply.length;
    268 
    269     if (reply.type == NBD_REP_ACK) {
    270         if (len != 0) {
    271             error_setg(errp, "length too long for option end");
    272             nbd_send_opt_abort(ioc);
    273             return -1;
    274         }
    275         return 0;
    276     } else if (reply.type != NBD_REP_SERVER) {
    277         error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
    278                    reply.type, nbd_rep_lookup(reply.type),
    279                    NBD_REP_SERVER, nbd_rep_lookup(NBD_REP_SERVER));
    280         nbd_send_opt_abort(ioc);
    281         return -1;
    282     }
    283 
    284     if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
    285         error_setg(errp, "incorrect option length %" PRIu32, len);
    286         nbd_send_opt_abort(ioc);
    287         return -1;
    288     }
    289     if (nbd_read32(ioc, &namelen, "option name length", errp) < 0) {
    290         nbd_send_opt_abort(ioc);
    291         return -1;
    292     }
    293     len -= sizeof(namelen);
    294     if (len < namelen || namelen > NBD_MAX_STRING_SIZE) {
    295         error_setg(errp, "incorrect name length in server's list response");
    296         nbd_send_opt_abort(ioc);
    297         return -1;
    298     }
    299 
    300     local_name = g_malloc(namelen + 1);
    301     if (nbd_read(ioc, local_name, namelen, "export name", errp) < 0) {
    302         nbd_send_opt_abort(ioc);
    303         return -1;
    304     }
    305     local_name[namelen] = '\0';
    306     len -= namelen;
    307     if (len) {
    308         if (len > NBD_MAX_STRING_SIZE) {
    309             error_setg(errp, "incorrect description length in server's "
    310                        "list response");
    311             nbd_send_opt_abort(ioc);
    312             return -1;
    313         }
    314         local_desc = g_malloc(len + 1);
    315         if (nbd_read(ioc, local_desc, len, "export description", errp) < 0) {
    316             nbd_send_opt_abort(ioc);
    317             return -1;
    318         }
    319         local_desc[len] = '\0';
    320     }
    321 
    322     trace_nbd_receive_list(local_name, local_desc ?: "");
    323     *name = g_steal_pointer(&local_name);
    324     if (description) {
    325         *description = g_steal_pointer(&local_desc);
    326     }
    327     return 1;
    328 }
    329 
    330 
    331 /*
    332  * nbd_opt_info_or_go:
    333  * Send option for NBD_OPT_INFO or NBD_OPT_GO and parse the reply.
    334  * Returns -1 if the option proves the export @info->name cannot be
    335  * used, 0 if the option is unsupported (fall back to NBD_OPT_LIST and
    336  * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
    337  * go (with the rest of @info populated).
    338  */
    339 static int nbd_opt_info_or_go(QIOChannel *ioc, uint32_t opt,
    340                               NBDExportInfo *info, Error **errp)
    341 {
    342     ERRP_GUARD();
    343     NBDOptionReply reply;
    344     uint32_t len = strlen(info->name);
    345     uint16_t type;
    346     int error;
    347     char *buf;
    348 
    349     /* The protocol requires that the server send NBD_INFO_EXPORT with
    350      * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
    351      * flags still 0 is a witness of a broken server. */
    352     info->flags = 0;
    353 
    354     assert(opt == NBD_OPT_GO || opt == NBD_OPT_INFO);
    355     trace_nbd_opt_info_go_start(nbd_opt_lookup(opt), info->name);
    356     buf = g_malloc(4 + len + 2 + 2 * info->request_sizes + 1);
    357     stl_be_p(buf, len);
    358     memcpy(buf + 4, info->name, len);
    359     /* At most one request, everything else up to server */
    360     stw_be_p(buf + 4 + len, info->request_sizes);
    361     if (info->request_sizes) {
    362         stw_be_p(buf + 4 + len + 2, NBD_INFO_BLOCK_SIZE);
    363     }
    364     error = nbd_send_option_request(ioc, opt,
    365                                     4 + len + 2 + 2 * info->request_sizes,
    366                                     buf, errp);
    367     g_free(buf);
    368     if (error < 0) {
    369         return -1;
    370     }
    371 
    372     while (1) {
    373         if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
    374             return -1;
    375         }
    376         error = nbd_handle_reply_err(ioc, &reply, true, errp);
    377         if (error <= 0) {
    378             return error;
    379         }
    380         len = reply.length;
    381 
    382         if (reply.type == NBD_REP_ACK) {
    383             /*
    384              * Server is done sending info, and moved into transmission
    385              * phase for NBD_OPT_GO, but make sure it sent flags
    386              */
    387             if (len) {
    388                 error_setg(errp, "server sent invalid NBD_REP_ACK");
    389                 return -1;
    390             }
    391             if (!info->flags) {
    392                 error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
    393                 return -1;
    394             }
    395             trace_nbd_opt_info_go_success(nbd_opt_lookup(opt));
    396             return 1;
    397         }
    398         if (reply.type != NBD_REP_INFO) {
    399             error_setg(errp, "unexpected reply type %u (%s), expected %u (%s)",
    400                        reply.type, nbd_rep_lookup(reply.type),
    401                        NBD_REP_INFO, nbd_rep_lookup(NBD_REP_INFO));
    402             nbd_send_opt_abort(ioc);
    403             return -1;
    404         }
    405         if (len < sizeof(type)) {
    406             error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
    407                        len);
    408             nbd_send_opt_abort(ioc);
    409             return -1;
    410         }
    411         if (nbd_read16(ioc, &type, "info type", errp) < 0) {
    412             nbd_send_opt_abort(ioc);
    413             return -1;
    414         }
    415         len -= sizeof(type);
    416         switch (type) {
    417         case NBD_INFO_EXPORT:
    418             if (len != sizeof(info->size) + sizeof(info->flags)) {
    419                 error_setg(errp, "remaining export info len %" PRIu32
    420                            " is unexpected size", len);
    421                 nbd_send_opt_abort(ioc);
    422                 return -1;
    423             }
    424             if (nbd_read64(ioc, &info->size, "info size", errp) < 0) {
    425                 nbd_send_opt_abort(ioc);
    426                 return -1;
    427             }
    428             if (nbd_read16(ioc, &info->flags, "info flags", errp) < 0) {
    429                 nbd_send_opt_abort(ioc);
    430                 return -1;
    431             }
    432             if (info->min_block &&
    433                 !QEMU_IS_ALIGNED(info->size, info->min_block)) {
    434                 error_setg(errp, "export size %" PRIu64 " is not multiple of "
    435                            "minimum block size %" PRIu32, info->size,
    436                            info->min_block);
    437                 nbd_send_opt_abort(ioc);
    438                 return -1;
    439             }
    440             trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
    441             break;
    442 
    443         case NBD_INFO_BLOCK_SIZE:
    444             if (len != sizeof(info->min_block) * 3) {
    445                 error_setg(errp, "remaining export info len %" PRIu32
    446                            " is unexpected size", len);
    447                 nbd_send_opt_abort(ioc);
    448                 return -1;
    449             }
    450             if (nbd_read32(ioc, &info->min_block, "info minimum block size",
    451                            errp) < 0) {
    452                 nbd_send_opt_abort(ioc);
    453                 return -1;
    454             }
    455             if (!is_power_of_2(info->min_block)) {
    456                 error_setg(errp, "server minimum block size %" PRIu32
    457                            " is not a power of two", info->min_block);
    458                 nbd_send_opt_abort(ioc);
    459                 return -1;
    460             }
    461             if (nbd_read32(ioc, &info->opt_block, "info preferred block size",
    462                            errp) < 0)
    463             {
    464                 nbd_send_opt_abort(ioc);
    465                 return -1;
    466             }
    467             if (!is_power_of_2(info->opt_block) ||
    468                 info->opt_block < info->min_block) {
    469                 error_setg(errp, "server preferred block size %" PRIu32
    470                            " is not valid", info->opt_block);
    471                 nbd_send_opt_abort(ioc);
    472                 return -1;
    473             }
    474             if (nbd_read32(ioc, &info->max_block, "info maximum block size",
    475                            errp) < 0)
    476             {
    477                 nbd_send_opt_abort(ioc);
    478                 return -1;
    479             }
    480             if (info->max_block < info->min_block) {
    481                 error_setg(errp, "server maximum block size %" PRIu32
    482                            " is not valid", info->max_block);
    483                 nbd_send_opt_abort(ioc);
    484                 return -1;
    485             }
    486             trace_nbd_opt_info_block_size(info->min_block, info->opt_block,
    487                                           info->max_block);
    488             break;
    489 
    490         default:
    491             /*
    492              * Not worth the bother to check if NBD_INFO_NAME or
    493              * NBD_INFO_DESCRIPTION exceed NBD_MAX_STRING_SIZE.
    494              */
    495             trace_nbd_opt_info_unknown(type, nbd_info_lookup(type));
    496             if (nbd_drop(ioc, len, errp) < 0) {
    497                 error_prepend(errp, "Failed to read info payload: ");
    498                 nbd_send_opt_abort(ioc);
    499                 return -1;
    500             }
    501             break;
    502         }
    503     }
    504 }
    505 
    506 /* Return -1 on failure, 0 if wantname is an available export. */
    507 static int nbd_receive_query_exports(QIOChannel *ioc,
    508                                      const char *wantname,
    509                                      Error **errp)
    510 {
    511     bool list_empty = true;
    512     bool found_export = false;
    513 
    514     trace_nbd_receive_query_exports_start(wantname);
    515     if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
    516         return -1;
    517     }
    518 
    519     while (1) {
    520         char *name;
    521         int ret = nbd_receive_list(ioc, &name, NULL, errp);
    522 
    523         if (ret < 0) {
    524             /* Server gave unexpected reply */
    525             return -1;
    526         } else if (ret == 0) {
    527             /* Done iterating. */
    528             if (list_empty) {
    529                 /*
    530                  * We don't have enough context to tell a server that
    531                  * sent an empty list apart from a server that does
    532                  * not support the list command; but as this function
    533                  * is just used to trigger a nicer error message
    534                  * before trying NBD_OPT_EXPORT_NAME, assume the
    535                  * export is available.
    536                  */
    537                 return 0;
    538             } else if (!found_export) {
    539                 error_setg(errp, "No export with name '%s' available",
    540                            wantname);
    541                 nbd_send_opt_abort(ioc);
    542                 return -1;
    543             }
    544             trace_nbd_receive_query_exports_success(wantname);
    545             return 0;
    546         }
    547         list_empty = false;
    548         if (!strcmp(name, wantname)) {
    549             found_export = true;
    550         }
    551         g_free(name);
    552     }
    553 }
    554 
    555 /*
    556  * nbd_request_simple_option: Send an option request, and parse the reply.
    557  * @strict controls whether ERR_UNSUP or all errors produce 0 status.
    558  * return 1 for successful negotiation,
    559  *        0 if operation is unsupported,
    560  *        -1 with errp set for any other error
    561  */
    562 static int nbd_request_simple_option(QIOChannel *ioc, int opt, bool strict,
    563                                      Error **errp)
    564 {
    565     NBDOptionReply reply;
    566     int error;
    567 
    568     if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
    569         return -1;
    570     }
    571 
    572     if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
    573         return -1;
    574     }
    575     error = nbd_handle_reply_err(ioc, &reply, strict, errp);
    576     if (error <= 0) {
    577         return error;
    578     }
    579 
    580     if (reply.type != NBD_REP_ACK) {
    581         error_setg(errp, "Server answered option %d (%s) with unexpected "
    582                    "reply %" PRIu32 " (%s)", opt, nbd_opt_lookup(opt),
    583                    reply.type, nbd_rep_lookup(reply.type));
    584         nbd_send_opt_abort(ioc);
    585         return -1;
    586     }
    587 
    588     if (reply.length != 0) {
    589         error_setg(errp, "Option %d ('%s') response length is %" PRIu32
    590                    " (it should be zero)", opt, nbd_opt_lookup(opt),
    591                    reply.length);
    592         nbd_send_opt_abort(ioc);
    593         return -1;
    594     }
    595 
    596     return 1;
    597 }
    598 
    599 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
    600                                         QCryptoTLSCreds *tlscreds,
    601                                         const char *hostname, Error **errp)
    602 {
    603     int ret;
    604     QIOChannelTLS *tioc;
    605     struct NBDTLSHandshakeData data = { 0 };
    606 
    607     ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, true, errp);
    608     if (ret <= 0) {
    609         if (ret == 0) {
    610             error_setg(errp, "Server don't support STARTTLS option");
    611             nbd_send_opt_abort(ioc);
    612         }
    613         return NULL;
    614     }
    615 
    616     trace_nbd_receive_starttls_new_client();
    617     tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
    618     if (!tioc) {
    619         return NULL;
    620     }
    621     qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
    622     data.loop = g_main_loop_new(g_main_context_default(), FALSE);
    623     trace_nbd_receive_starttls_tls_handshake();
    624     qio_channel_tls_handshake(tioc,
    625                               nbd_tls_handshake,
    626                               &data,
    627                               NULL,
    628                               NULL);
    629 
    630     if (!data.complete) {
    631         g_main_loop_run(data.loop);
    632     }
    633     g_main_loop_unref(data.loop);
    634     if (data.error) {
    635         error_propagate(errp, data.error);
    636         object_unref(OBJECT(tioc));
    637         return NULL;
    638     }
    639 
    640     return QIO_CHANNEL(tioc);
    641 }
    642 
    643 /*
    644  * nbd_send_meta_query:
    645  * Send 0 or 1 set/list meta context queries.
    646  * Return 0 on success, -1 with errp set for any error
    647  */
    648 static int nbd_send_meta_query(QIOChannel *ioc, uint32_t opt,
    649                                const char *export, const char *query,
    650                                Error **errp)
    651 {
    652     int ret;
    653     uint32_t export_len = strlen(export);
    654     uint32_t queries = !!query;
    655     uint32_t query_len = 0;
    656     uint32_t data_len;
    657     char *data;
    658     char *p;
    659 
    660     data_len = sizeof(export_len) + export_len + sizeof(queries);
    661     assert(export_len <= NBD_MAX_STRING_SIZE);
    662     if (query) {
    663         query_len = strlen(query);
    664         data_len += sizeof(query_len) + query_len;
    665         assert(query_len <= NBD_MAX_STRING_SIZE);
    666     } else {
    667         assert(opt == NBD_OPT_LIST_META_CONTEXT);
    668     }
    669     p = data = g_malloc(data_len);
    670 
    671     trace_nbd_opt_meta_request(nbd_opt_lookup(opt), query ?: "(all)", export);
    672     stl_be_p(p, export_len);
    673     memcpy(p += sizeof(export_len), export, export_len);
    674     stl_be_p(p += export_len, queries);
    675     if (query) {
    676         stl_be_p(p += sizeof(queries), query_len);
    677         memcpy(p += sizeof(query_len), query, query_len);
    678     }
    679 
    680     ret = nbd_send_option_request(ioc, opt, data_len, data, errp);
    681     g_free(data);
    682     return ret;
    683 }
    684 
    685 /*
    686  * nbd_receive_one_meta_context:
    687  * Called in a loop to receive and trace one set/list meta context reply.
    688  * Pass non-NULL @name or @id to collect results back to the caller, which
    689  * must eventually call g_free().
    690  * return 1 if name is set and iteration must continue,
    691  *        0 if iteration is complete (including if option is unsupported),
    692  *        -1 with errp set for any error
    693  */
    694 static int nbd_receive_one_meta_context(QIOChannel *ioc,
    695                                         uint32_t opt,
    696                                         char **name,
    697                                         uint32_t *id,
    698                                         Error **errp)
    699 {
    700     int ret;
    701     NBDOptionReply reply;
    702     char *local_name = NULL;
    703     uint32_t local_id;
    704 
    705     if (nbd_receive_option_reply(ioc, opt, &reply, errp) < 0) {
    706         return -1;
    707     }
    708 
    709     ret = nbd_handle_reply_err(ioc, &reply, false, errp);
    710     if (ret <= 0) {
    711         return ret;
    712     }
    713 
    714     if (reply.type == NBD_REP_ACK) {
    715         if (reply.length != 0) {
    716             error_setg(errp, "Unexpected length to ACK response");
    717             nbd_send_opt_abort(ioc);
    718             return -1;
    719         }
    720         return 0;
    721     } else if (reply.type != NBD_REP_META_CONTEXT) {
    722         error_setg(errp, "Unexpected reply type %u (%s), expected %u (%s)",
    723                    reply.type, nbd_rep_lookup(reply.type),
    724                    NBD_REP_META_CONTEXT, nbd_rep_lookup(NBD_REP_META_CONTEXT));
    725         nbd_send_opt_abort(ioc);
    726         return -1;
    727     }
    728 
    729     if (reply.length <= sizeof(local_id) ||
    730         reply.length > NBD_MAX_BUFFER_SIZE) {
    731         error_setg(errp, "Failed to negotiate meta context, server "
    732                    "answered with unexpected length %" PRIu32,
    733                    reply.length);
    734         nbd_send_opt_abort(ioc);
    735         return -1;
    736     }
    737 
    738     if (nbd_read32(ioc, &local_id, "context id", errp) < 0) {
    739         return -1;
    740     }
    741 
    742     reply.length -= sizeof(local_id);
    743     local_name = g_malloc(reply.length + 1);
    744     if (nbd_read(ioc, local_name, reply.length, "context name", errp) < 0) {
    745         g_free(local_name);
    746         return -1;
    747     }
    748     local_name[reply.length] = '\0';
    749     trace_nbd_opt_meta_reply(nbd_opt_lookup(opt), local_name, local_id);
    750 
    751     if (name) {
    752         *name = local_name;
    753     } else {
    754         g_free(local_name);
    755     }
    756     if (id) {
    757         *id = local_id;
    758     }
    759     return 1;
    760 }
    761 
    762 /*
    763  * nbd_negotiate_simple_meta_context:
    764  * Request the server to set the meta context for export @info->name
    765  * using @info->x_dirty_bitmap with a fallback to "base:allocation",
    766  * setting @info->context_id to the resulting id. Fail if the server
    767  * responds with more than one context or with a context different
    768  * than the query.
    769  * return 1 for successful negotiation,
    770  *        0 if operation is unsupported,
    771  *        -1 with errp set for any other error
    772  */
    773 static int nbd_negotiate_simple_meta_context(QIOChannel *ioc,
    774                                              NBDExportInfo *info,
    775                                              Error **errp)
    776 {
    777     /*
    778      * TODO: Removing the x_dirty_bitmap hack will mean refactoring
    779      * this function to request and store ids for multiple contexts
    780      * (both base:allocation and a dirty bitmap), at which point this
    781      * function should lose the term _simple.
    782      */
    783     int ret;
    784     const char *context = info->x_dirty_bitmap ?: "base:allocation";
    785     bool received = false;
    786     char *name = NULL;
    787 
    788     if (nbd_send_meta_query(ioc, NBD_OPT_SET_META_CONTEXT,
    789                             info->name, context, errp) < 0) {
    790         return -1;
    791     }
    792 
    793     ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
    794                                        &name, &info->context_id, errp);
    795     if (ret < 0) {
    796         return -1;
    797     }
    798     if (ret == 1) {
    799         if (strcmp(context, name)) {
    800             error_setg(errp, "Failed to negotiate meta context '%s', server "
    801                        "answered with different context '%s'", context,
    802                        name);
    803             g_free(name);
    804             nbd_send_opt_abort(ioc);
    805             return -1;
    806         }
    807         g_free(name);
    808         received = true;
    809 
    810         ret = nbd_receive_one_meta_context(ioc, NBD_OPT_SET_META_CONTEXT,
    811                                            NULL, NULL, errp);
    812         if (ret < 0) {
    813             return -1;
    814         }
    815     }
    816     if (ret != 0) {
    817         error_setg(errp, "Server answered with more than one context");
    818         nbd_send_opt_abort(ioc);
    819         return -1;
    820     }
    821     return received;
    822 }
    823 
    824 /*
    825  * nbd_list_meta_contexts:
    826  * Request the server to list all meta contexts for export @info->name.
    827  * return 0 if list is complete (even if empty),
    828  *        -1 with errp set for any error
    829  */
    830 static int nbd_list_meta_contexts(QIOChannel *ioc,
    831                                   NBDExportInfo *info,
    832                                   Error **errp)
    833 {
    834     int ret;
    835     int seen_any = false;
    836     int seen_qemu = false;
    837 
    838     if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
    839                             info->name, NULL, errp) < 0) {
    840         return -1;
    841     }
    842 
    843     while (1) {
    844         char *context;
    845 
    846         ret = nbd_receive_one_meta_context(ioc, NBD_OPT_LIST_META_CONTEXT,
    847                                            &context, NULL, errp);
    848         if (ret == 0 && seen_any && !seen_qemu) {
    849             /*
    850              * Work around qemu 3.0 bug: the server forgot to send
    851              * "qemu:" replies to 0 queries. If we saw at least one
    852              * reply (probably base:allocation), but none of them were
    853              * qemu:, then run a more specific query to make sure.
    854              */
    855             seen_qemu = true;
    856             if (nbd_send_meta_query(ioc, NBD_OPT_LIST_META_CONTEXT,
    857                                     info->name, "qemu:", errp) < 0) {
    858                 return -1;
    859             }
    860             continue;
    861         }
    862         if (ret <= 0) {
    863             return ret;
    864         }
    865         seen_any = true;
    866         seen_qemu |= strstart(context, "qemu:", NULL);
    867         info->contexts = g_renew(char *, info->contexts, ++info->n_contexts);
    868         info->contexts[info->n_contexts - 1] = context;
    869     }
    870 }
    871 
    872 /*
    873  * nbd_start_negotiate:
    874  * Start the handshake to the server.  After a positive return, the server
    875  * is ready to accept additional NBD_OPT requests.
    876  * Returns: negative errno: failure talking to server
    877  *          0: server is oldstyle, must call nbd_negotiate_finish_oldstyle
    878  *          1: server is newstyle, but can only accept EXPORT_NAME
    879  *          2: server is newstyle, but lacks structured replies
    880  *          3: server is newstyle and set up for structured replies
    881  */
    882 static int nbd_start_negotiate(AioContext *aio_context, QIOChannel *ioc,
    883                                QCryptoTLSCreds *tlscreds,
    884                                const char *hostname, QIOChannel **outioc,
    885                                bool structured_reply, bool *zeroes,
    886                                Error **errp)
    887 {
    888     ERRP_GUARD();
    889     uint64_t magic;
    890 
    891     trace_nbd_start_negotiate(tlscreds, hostname ? hostname : "<null>");
    892 
    893     if (zeroes) {
    894         *zeroes = true;
    895     }
    896     if (outioc) {
    897         *outioc = NULL;
    898     }
    899     if (tlscreds && !outioc) {
    900         error_setg(errp, "Output I/O channel required for TLS");
    901         return -EINVAL;
    902     }
    903 
    904     if (nbd_read64(ioc, &magic, "initial magic", errp) < 0) {
    905         return -EINVAL;
    906     }
    907     trace_nbd_receive_negotiate_magic(magic);
    908 
    909     if (magic != NBD_INIT_MAGIC) {
    910         error_setg(errp, "Bad initial magic received: 0x%" PRIx64, magic);
    911         return -EINVAL;
    912     }
    913 
    914     if (nbd_read64(ioc, &magic, "server magic", errp) < 0) {
    915         return -EINVAL;
    916     }
    917     trace_nbd_receive_negotiate_magic(magic);
    918 
    919     if (magic == NBD_OPTS_MAGIC) {
    920         uint32_t clientflags = 0;
    921         uint16_t globalflags;
    922         bool fixedNewStyle = false;
    923 
    924         if (nbd_read16(ioc, &globalflags, "server flags", errp) < 0) {
    925             return -EINVAL;
    926         }
    927         trace_nbd_receive_negotiate_server_flags(globalflags);
    928         if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
    929             fixedNewStyle = true;
    930             clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
    931         }
    932         if (globalflags & NBD_FLAG_NO_ZEROES) {
    933             if (zeroes) {
    934                 *zeroes = false;
    935             }
    936             clientflags |= NBD_FLAG_C_NO_ZEROES;
    937         }
    938         /* client requested flags */
    939         clientflags = cpu_to_be32(clientflags);
    940         if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
    941             error_prepend(errp, "Failed to send clientflags field: ");
    942             return -EINVAL;
    943         }
    944         if (tlscreds) {
    945             if (fixedNewStyle) {
    946                 *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
    947                 if (!*outioc) {
    948                     return -EINVAL;
    949                 }
    950                 ioc = *outioc;
    951                 if (aio_context) {
    952                     qio_channel_set_blocking(ioc, false, NULL);
    953                     qio_channel_attach_aio_context(ioc, aio_context);
    954                 }
    955             } else {
    956                 error_setg(errp, "Server does not support STARTTLS");
    957                 return -EINVAL;
    958             }
    959         }
    960         if (fixedNewStyle) {
    961             int result = 0;
    962 
    963             if (structured_reply) {
    964                 result = nbd_request_simple_option(ioc,
    965                                                    NBD_OPT_STRUCTURED_REPLY,
    966                                                    false, errp);
    967                 if (result < 0) {
    968                     return -EINVAL;
    969                 }
    970             }
    971             return 2 + result;
    972         } else {
    973             return 1;
    974         }
    975     } else if (magic == NBD_CLIENT_MAGIC) {
    976         if (tlscreds) {
    977             error_setg(errp, "Server does not support STARTTLS");
    978             return -EINVAL;
    979         }
    980         return 0;
    981     } else {
    982         error_setg(errp, "Bad server magic received: 0x%" PRIx64, magic);
    983         return -EINVAL;
    984     }
    985 }
    986 
    987 /*
    988  * nbd_negotiate_finish_oldstyle:
    989  * Populate @info with the size and export flags from an oldstyle server,
    990  * but does not consume 124 bytes of reserved zero padding.
    991  * Returns 0 on success, -1 with @errp set on failure
    992  */
    993 static int nbd_negotiate_finish_oldstyle(QIOChannel *ioc, NBDExportInfo *info,
    994                                          Error **errp)
    995 {
    996     uint32_t oldflags;
    997 
    998     if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
    999         return -EINVAL;
   1000     }
   1001 
   1002     if (nbd_read32(ioc, &oldflags, "export flags", errp) < 0) {
   1003         return -EINVAL;
   1004     }
   1005     if (oldflags & ~0xffff) {
   1006         error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
   1007         return -EINVAL;
   1008     }
   1009     info->flags = oldflags;
   1010     return 0;
   1011 }
   1012 
   1013 /*
   1014  * nbd_receive_negotiate:
   1015  * Connect to server, complete negotiation, and move into transmission phase.
   1016  * Returns: negative errno: failure talking to server
   1017  *          0: server is connected
   1018  */
   1019 int nbd_receive_negotiate(AioContext *aio_context, QIOChannel *ioc,
   1020                           QCryptoTLSCreds *tlscreds,
   1021                           const char *hostname, QIOChannel **outioc,
   1022                           NBDExportInfo *info, Error **errp)
   1023 {
   1024     ERRP_GUARD();
   1025     int result;
   1026     bool zeroes;
   1027     bool base_allocation = info->base_allocation;
   1028 
   1029     assert(info->name && strlen(info->name) <= NBD_MAX_STRING_SIZE);
   1030     trace_nbd_receive_negotiate_name(info->name);
   1031 
   1032     result = nbd_start_negotiate(aio_context, ioc, tlscreds, hostname, outioc,
   1033                                  info->structured_reply, &zeroes, errp);
   1034 
   1035     info->structured_reply = false;
   1036     info->base_allocation = false;
   1037     if (tlscreds && *outioc) {
   1038         ioc = *outioc;
   1039     }
   1040 
   1041     switch (result) {
   1042     case 3: /* newstyle, with structured replies */
   1043         info->structured_reply = true;
   1044         if (base_allocation) {
   1045             result = nbd_negotiate_simple_meta_context(ioc, info, errp);
   1046             if (result < 0) {
   1047                 return -EINVAL;
   1048             }
   1049             info->base_allocation = result == 1;
   1050         }
   1051         /* fall through */
   1052     case 2: /* newstyle, try OPT_GO */
   1053         /* Try NBD_OPT_GO first - if it works, we are done (it
   1054          * also gives us a good message if the server requires
   1055          * TLS).  If it is not available, fall back to
   1056          * NBD_OPT_LIST for nicer error messages about a missing
   1057          * export, then use NBD_OPT_EXPORT_NAME.  */
   1058         result = nbd_opt_info_or_go(ioc, NBD_OPT_GO, info, errp);
   1059         if (result < 0) {
   1060             return -EINVAL;
   1061         }
   1062         if (result > 0) {
   1063             return 0;
   1064         }
   1065         /* Check our desired export is present in the
   1066          * server export list. Since NBD_OPT_EXPORT_NAME
   1067          * cannot return an error message, running this
   1068          * query gives us better error reporting if the
   1069          * export name is not available.
   1070          */
   1071         if (nbd_receive_query_exports(ioc, info->name, errp) < 0) {
   1072             return -EINVAL;
   1073         }
   1074         /* fall through */
   1075     case 1: /* newstyle, but limited to EXPORT_NAME */
   1076         /* write the export name request */
   1077         if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, info->name,
   1078                                     errp) < 0) {
   1079             return -EINVAL;
   1080         }
   1081 
   1082         /* Read the response */
   1083         if (nbd_read64(ioc, &info->size, "export length", errp) < 0) {
   1084             return -EINVAL;
   1085         }
   1086 
   1087         if (nbd_read16(ioc, &info->flags, "export flags", errp) < 0) {
   1088             return -EINVAL;
   1089         }
   1090         break;
   1091     case 0: /* oldstyle, parse length and flags */
   1092         if (*info->name) {
   1093             error_setg(errp, "Server does not support non-empty export names");
   1094             return -EINVAL;
   1095         }
   1096         if (nbd_negotiate_finish_oldstyle(ioc, info, errp) < 0) {
   1097             return -EINVAL;
   1098         }
   1099         break;
   1100     default:
   1101         return result;
   1102     }
   1103 
   1104     trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
   1105     if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
   1106         error_prepend(errp, "Failed to read reserved block: ");
   1107         return -EINVAL;
   1108     }
   1109     return 0;
   1110 }
   1111 
   1112 /* Clean up result of nbd_receive_export_list */
   1113 void nbd_free_export_list(NBDExportInfo *info, int count)
   1114 {
   1115     int i, j;
   1116 
   1117     if (!info) {
   1118         return;
   1119     }
   1120 
   1121     for (i = 0; i < count; i++) {
   1122         g_free(info[i].name);
   1123         g_free(info[i].description);
   1124         for (j = 0; j < info[i].n_contexts; j++) {
   1125             g_free(info[i].contexts[j]);
   1126         }
   1127         g_free(info[i].contexts);
   1128     }
   1129     g_free(info);
   1130 }
   1131 
   1132 /*
   1133  * nbd_receive_export_list:
   1134  * Query details about a server's exports, then disconnect without
   1135  * going into transmission phase. Return a count of the exports listed
   1136  * in @info by the server, or -1 on error. Caller must free @info using
   1137  * nbd_free_export_list().
   1138  */
   1139 int nbd_receive_export_list(QIOChannel *ioc, QCryptoTLSCreds *tlscreds,
   1140                             const char *hostname, NBDExportInfo **info,
   1141                             Error **errp)
   1142 {
   1143     int result;
   1144     int count = 0;
   1145     int i;
   1146     int rc;
   1147     int ret = -1;
   1148     NBDExportInfo *array = NULL;
   1149     QIOChannel *sioc = NULL;
   1150 
   1151     *info = NULL;
   1152     result = nbd_start_negotiate(NULL, ioc, tlscreds, hostname, &sioc, true,
   1153                                  NULL, errp);
   1154     if (tlscreds && sioc) {
   1155         ioc = sioc;
   1156     }
   1157 
   1158     switch (result) {
   1159     case 2:
   1160     case 3:
   1161         /* newstyle - use NBD_OPT_LIST to populate array, then try
   1162          * NBD_OPT_INFO on each array member. If structured replies
   1163          * are enabled, also try NBD_OPT_LIST_META_CONTEXT. */
   1164         if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
   1165             goto out;
   1166         }
   1167         while (1) {
   1168             char *name;
   1169             char *desc;
   1170 
   1171             rc = nbd_receive_list(ioc, &name, &desc, errp);
   1172             if (rc < 0) {
   1173                 goto out;
   1174             } else if (rc == 0) {
   1175                 break;
   1176             }
   1177             array = g_renew(NBDExportInfo, array, ++count);
   1178             memset(&array[count - 1], 0, sizeof(*array));
   1179             array[count - 1].name = name;
   1180             array[count - 1].description = desc;
   1181             array[count - 1].structured_reply = result == 3;
   1182         }
   1183 
   1184         for (i = 0; i < count; i++) {
   1185             array[i].request_sizes = true;
   1186             rc = nbd_opt_info_or_go(ioc, NBD_OPT_INFO, &array[i], errp);
   1187             if (rc < 0) {
   1188                 goto out;
   1189             } else if (rc == 0) {
   1190                 /*
   1191                  * Pointless to try rest of loop. If OPT_INFO doesn't work,
   1192                  * it's unlikely that meta contexts work either
   1193                  */
   1194                 break;
   1195             }
   1196 
   1197             if (result == 3 &&
   1198                 nbd_list_meta_contexts(ioc, &array[i], errp) < 0) {
   1199                 goto out;
   1200             }
   1201         }
   1202 
   1203         /* Send NBD_OPT_ABORT as a courtesy before hanging up */
   1204         nbd_send_opt_abort(ioc);
   1205         break;
   1206     case 1: /* newstyle, but limited to EXPORT_NAME */
   1207         error_setg(errp, "Server does not support export lists");
   1208         /* We can't even send NBD_OPT_ABORT, so merely hang up */
   1209         goto out;
   1210     case 0: /* oldstyle, parse length and flags */
   1211         array = g_new0(NBDExportInfo, 1);
   1212         array->name = g_strdup("");
   1213         count = 1;
   1214 
   1215         if (nbd_negotiate_finish_oldstyle(ioc, array, errp) < 0) {
   1216             goto out;
   1217         }
   1218 
   1219         /* Send NBD_CMD_DISC as a courtesy to the server, but ignore all
   1220          * errors now that we have the information we wanted. */
   1221         if (nbd_drop(ioc, 124, NULL) == 0) {
   1222             NBDRequest request = { .type = NBD_CMD_DISC };
   1223 
   1224             nbd_send_request(ioc, &request);
   1225         }
   1226         break;
   1227     default:
   1228         goto out;
   1229     }
   1230 
   1231     *info = array;
   1232     array = NULL;
   1233     ret = count;
   1234 
   1235  out:
   1236     qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
   1237     qio_channel_close(ioc, NULL);
   1238     object_unref(OBJECT(sioc));
   1239     nbd_free_export_list(array, count);
   1240     return ret;
   1241 }
   1242 
   1243 #ifdef __linux__
   1244 int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
   1245              Error **errp)
   1246 {
   1247     unsigned long sector_size = MAX(BDRV_SECTOR_SIZE, info->min_block);
   1248     unsigned long sectors = info->size / sector_size;
   1249 
   1250     /* FIXME: Once the kernel module is patched to honor block sizes,
   1251      * and to advertise that fact to user space, we should update the
   1252      * hand-off to the kernel to use any block sizes we learned. */
   1253     assert(!info->request_sizes);
   1254     if (info->size / sector_size != sectors) {
   1255         error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
   1256                    info->size);
   1257         return -E2BIG;
   1258     }
   1259 
   1260     trace_nbd_init_set_socket();
   1261 
   1262     if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
   1263         int serrno = errno;
   1264         error_setg(errp, "Failed to set NBD socket");
   1265         return -serrno;
   1266     }
   1267 
   1268     trace_nbd_init_set_block_size(sector_size);
   1269 
   1270     if (ioctl(fd, NBD_SET_BLKSIZE, sector_size) < 0) {
   1271         int serrno = errno;
   1272         error_setg(errp, "Failed setting NBD block size");
   1273         return -serrno;
   1274     }
   1275 
   1276     trace_nbd_init_set_size(sectors);
   1277     if (info->size % sector_size) {
   1278         trace_nbd_init_trailing_bytes(info->size % sector_size);
   1279     }
   1280 
   1281     if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
   1282         int serrno = errno;
   1283         error_setg(errp, "Failed setting size (in blocks)");
   1284         return -serrno;
   1285     }
   1286 
   1287     if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
   1288         if (errno == ENOTTY) {
   1289             int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
   1290             trace_nbd_init_set_readonly();
   1291 
   1292             if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
   1293                 int serrno = errno;
   1294                 error_setg(errp, "Failed setting read-only attribute");
   1295                 return -serrno;
   1296             }
   1297         } else {
   1298             int serrno = errno;
   1299             error_setg(errp, "Failed setting flags");
   1300             return -serrno;
   1301         }
   1302     }
   1303 
   1304     trace_nbd_init_finish();
   1305 
   1306     return 0;
   1307 }
   1308 
   1309 int nbd_client(int fd)
   1310 {
   1311     int ret;
   1312     int serrno;
   1313 
   1314     trace_nbd_client_loop();
   1315 
   1316     ret = ioctl(fd, NBD_DO_IT);
   1317     if (ret < 0 && errno == EPIPE) {
   1318         /* NBD_DO_IT normally returns EPIPE when someone has disconnected
   1319          * the socket via NBD_DISCONNECT.  We do not want to return 1 in
   1320          * that case.
   1321          */
   1322         ret = 0;
   1323     }
   1324     serrno = errno;
   1325 
   1326     trace_nbd_client_loop_ret(ret, strerror(serrno));
   1327 
   1328     trace_nbd_client_clear_queue();
   1329     ioctl(fd, NBD_CLEAR_QUE);
   1330 
   1331     trace_nbd_client_clear_socket();
   1332     ioctl(fd, NBD_CLEAR_SOCK);
   1333 
   1334     errno = serrno;
   1335     return ret;
   1336 }
   1337 
   1338 int nbd_disconnect(int fd)
   1339 {
   1340     ioctl(fd, NBD_CLEAR_QUE);
   1341     ioctl(fd, NBD_DISCONNECT);
   1342     ioctl(fd, NBD_CLEAR_SOCK);
   1343     return 0;
   1344 }
   1345 
   1346 #endif /* __linux__ */
   1347 
   1348 int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
   1349 {
   1350     uint8_t buf[NBD_REQUEST_SIZE];
   1351 
   1352     trace_nbd_send_request(request->from, request->len, request->handle,
   1353                            request->flags, request->type,
   1354                            nbd_cmd_lookup(request->type));
   1355 
   1356     stl_be_p(buf, NBD_REQUEST_MAGIC);
   1357     stw_be_p(buf + 4, request->flags);
   1358     stw_be_p(buf + 6, request->type);
   1359     stq_be_p(buf + 8, request->handle);
   1360     stq_be_p(buf + 16, request->from);
   1361     stl_be_p(buf + 24, request->len);
   1362 
   1363     return nbd_write(ioc, buf, sizeof(buf), NULL);
   1364 }
   1365 
   1366 /* nbd_receive_simple_reply
   1367  * Read simple reply except magic field (which should be already read).
   1368  * Payload is not read (payload is possible for CMD_READ, but here we even
   1369  * don't know whether it take place or not).
   1370  */
   1371 static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
   1372                                     Error **errp)
   1373 {
   1374     int ret;
   1375 
   1376     assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
   1377 
   1378     ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
   1379                    sizeof(*reply) - sizeof(reply->magic), "reply", errp);
   1380     if (ret < 0) {
   1381         return ret;
   1382     }
   1383 
   1384     reply->error = be32_to_cpu(reply->error);
   1385     reply->handle = be64_to_cpu(reply->handle);
   1386 
   1387     return 0;
   1388 }
   1389 
   1390 /* nbd_receive_structured_reply_chunk
   1391  * Read structured reply chunk except magic field (which should be already
   1392  * read).
   1393  * Payload is not read.
   1394  */
   1395 static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
   1396                                               NBDStructuredReplyChunk *chunk,
   1397                                               Error **errp)
   1398 {
   1399     int ret;
   1400 
   1401     assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
   1402 
   1403     ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
   1404                    sizeof(*chunk) - sizeof(chunk->magic), "structured chunk",
   1405                    errp);
   1406     if (ret < 0) {
   1407         return ret;
   1408     }
   1409 
   1410     chunk->flags = be16_to_cpu(chunk->flags);
   1411     chunk->type = be16_to_cpu(chunk->type);
   1412     chunk->handle = be64_to_cpu(chunk->handle);
   1413     chunk->length = be32_to_cpu(chunk->length);
   1414 
   1415     return 0;
   1416 }
   1417 
   1418 /* nbd_read_eof
   1419  * Tries to read @size bytes from @ioc.
   1420  * Returns 1 on success
   1421  *         0 on eof, when no data was read (errp is not set)
   1422  *         negative errno on failure (errp is set)
   1423  */
   1424 static inline int coroutine_fn
   1425 nbd_read_eof(BlockDriverState *bs, QIOChannel *ioc, void *buffer, size_t size,
   1426              Error **errp)
   1427 {
   1428     bool partial = false;
   1429 
   1430     assert(size);
   1431     while (size > 0) {
   1432         struct iovec iov = { .iov_base = buffer, .iov_len = size };
   1433         ssize_t len;
   1434 
   1435         len = qio_channel_readv(ioc, &iov, 1, errp);
   1436         if (len == QIO_CHANNEL_ERR_BLOCK) {
   1437             qio_channel_yield(ioc, G_IO_IN);
   1438             continue;
   1439         } else if (len < 0) {
   1440             return -EIO;
   1441         } else if (len == 0) {
   1442             if (partial) {
   1443                 error_setg(errp,
   1444                            "Unexpected end-of-file before all bytes were read");
   1445                 return -EIO;
   1446             } else {
   1447                 return 0;
   1448             }
   1449         }
   1450 
   1451         partial = true;
   1452         size -= len;
   1453         buffer = (uint8_t*) buffer + len;
   1454     }
   1455     return 1;
   1456 }
   1457 
   1458 /* nbd_receive_reply
   1459  *
   1460  * Decreases bs->in_flight while waiting for a new reply. This yield is where
   1461  * we wait indefinitely and the coroutine must be able to be safely reentered
   1462  * for nbd_client_attach_aio_context().
   1463  *
   1464  * Returns 1 on success
   1465  *         0 on eof, when no data was read (errp is not set)
   1466  *         negative errno on failure (errp is set)
   1467  */
   1468 int coroutine_fn nbd_receive_reply(BlockDriverState *bs, QIOChannel *ioc,
   1469                                    NBDReply *reply, Error **errp)
   1470 {
   1471     int ret;
   1472     const char *type;
   1473 
   1474     ret = nbd_read_eof(bs, ioc, &reply->magic, sizeof(reply->magic), errp);
   1475     if (ret <= 0) {
   1476         return ret;
   1477     }
   1478 
   1479     reply->magic = be32_to_cpu(reply->magic);
   1480 
   1481     switch (reply->magic) {
   1482     case NBD_SIMPLE_REPLY_MAGIC:
   1483         ret = nbd_receive_simple_reply(ioc, &reply->simple, errp);
   1484         if (ret < 0) {
   1485             break;
   1486         }
   1487         trace_nbd_receive_simple_reply(reply->simple.error,
   1488                                        nbd_err_lookup(reply->simple.error),
   1489                                        reply->handle);
   1490         break;
   1491     case NBD_STRUCTURED_REPLY_MAGIC:
   1492         ret = nbd_receive_structured_reply_chunk(ioc, &reply->structured, errp);
   1493         if (ret < 0) {
   1494             break;
   1495         }
   1496         type = nbd_reply_type_lookup(reply->structured.type);
   1497         trace_nbd_receive_structured_reply_chunk(reply->structured.flags,
   1498                                                  reply->structured.type, type,
   1499                                                  reply->structured.handle,
   1500                                                  reply->structured.length);
   1501         break;
   1502     default:
   1503         error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", reply->magic);
   1504         return -EINVAL;
   1505     }
   1506     if (ret < 0) {
   1507         return ret;
   1508     }
   1509 
   1510     return 1;
   1511 }
   1512