qemu

FORK: QEMU emulator
git clone https://git.neptards.moe/neptards/qemu.git
Log | Files | Refs | Submodules | LICENSE

iscsi.c (81143B)


      1 /*
      2  * QEMU Block driver for iSCSI images
      3  *
      4  * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
      5  * Copyright (c) 2012-2017 Peter Lieven <pl@kamp.de>
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a copy
      8  * of this software and associated documentation files (the "Software"), to deal
      9  * in the Software without restriction, including without limitation the rights
     10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11  * copies of the Software, and to permit persons to whom the Software is
     12  * furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included in
     15  * all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     23  * THE SOFTWARE.
     24  */
     25 
     26 #include "qemu/osdep.h"
     27 
     28 #include <poll.h>
     29 #include <math.h>
     30 #include <arpa/inet.h>
     31 #include "sysemu/sysemu.h"
     32 #include "qemu/config-file.h"
     33 #include "qemu/error-report.h"
     34 #include "qemu/bitops.h"
     35 #include "qemu/bitmap.h"
     36 #include "block/block_int.h"
     37 #include "block/qdict.h"
     38 #include "scsi/constants.h"
     39 #include "qemu/iov.h"
     40 #include "qemu/module.h"
     41 #include "qemu/option.h"
     42 #include "qemu/uuid.h"
     43 #include "sysemu/replay.h"
     44 #include "qapi/error.h"
     45 #include "qapi/qapi-commands-machine.h"
     46 #include "qapi/qmp/qdict.h"
     47 #include "qapi/qmp/qstring.h"
     48 #include "crypto/secret.h"
     49 #include "scsi/utils.h"
     50 #include "trace.h"
     51 
     52 /* Conflict between scsi/utils.h and libiscsi! :( */
     53 #define SCSI_XFER_NONE ISCSI_XFER_NONE
     54 #include <iscsi/iscsi.h>
     55 #define inline __attribute__((gnu_inline))  /* required for libiscsi v1.9.0 */
     56 #include <iscsi/scsi-lowlevel.h>
     57 #undef inline
     58 #undef SCSI_XFER_NONE
     59 QEMU_BUILD_BUG_ON((int)SCSI_XFER_NONE != (int)ISCSI_XFER_NONE);
     60 
     61 #ifdef __linux__
     62 #include <scsi/sg.h>
     63 #endif
     64 
     65 typedef struct IscsiLun {
     66     struct iscsi_context *iscsi;
     67     AioContext *aio_context;
     68     int lun;
     69     enum scsi_inquiry_peripheral_device_type type;
     70     int block_size;
     71     uint64_t num_blocks;
     72     int events;
     73     QEMUTimer *nop_timer;
     74     QEMUTimer *event_timer;
     75     QemuMutex mutex;
     76     struct scsi_inquiry_logical_block_provisioning lbp;
     77     struct scsi_inquiry_block_limits bl;
     78     struct scsi_inquiry_device_designator *dd;
     79     unsigned char *zeroblock;
     80     /* The allocmap tracks which clusters (pages) on the iSCSI target are
     81      * allocated and which are not. In case a target returns zeros for
     82      * unallocated pages (iscsilun->lprz) we can directly return zeros instead
     83      * of reading zeros over the wire if a read request falls within an
     84      * unallocated block. As there are 3 possible states we need 2 bitmaps to
     85      * track. allocmap_valid keeps track if QEMU's information about a page is
     86      * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
     87      * valid information about a page the corresponding allocmap entry should be
     88      * switched to unallocated as well to force a new lookup of the allocation
     89      * status as lookups are generally skipped if a page is suspect to be
     90      * allocated. If a iSCSI target is opened with cache.direct = on the
     91      * allocmap_valid does not exist turning all cached information invalid so
     92      * that a fresh lookup is made for any page even if allocmap entry returns
     93      * it's unallocated. */
     94     unsigned long *allocmap;
     95     unsigned long *allocmap_valid;
     96     long allocmap_size;
     97     int cluster_size;
     98     bool use_16_for_rw;
     99     bool write_protected;
    100     bool lbpme;
    101     bool lbprz;
    102     bool dpofua;
    103     bool has_write_same;
    104     bool request_timed_out;
    105 } IscsiLun;
    106 
    107 typedef struct IscsiTask {
    108     int status;
    109     int complete;
    110     int retries;
    111     int do_retry;
    112     struct scsi_task *task;
    113     Coroutine *co;
    114     IscsiLun *iscsilun;
    115     QEMUTimer retry_timer;
    116     int err_code;
    117     char *err_str;
    118 } IscsiTask;
    119 
    120 typedef struct IscsiAIOCB {
    121     BlockAIOCB common;
    122     QEMUBH *bh;
    123     IscsiLun *iscsilun;
    124     struct scsi_task *task;
    125     int status;
    126     int64_t sector_num;
    127     int nb_sectors;
    128     int ret;
    129 #ifdef __linux__
    130     sg_io_hdr_t *ioh;
    131 #endif
    132     bool cancelled;
    133 } IscsiAIOCB;
    134 
    135 /* libiscsi uses time_t so its enough to process events every second */
    136 #define EVENT_INTERVAL 1000
    137 #define NOP_INTERVAL 5000
    138 #define MAX_NOP_FAILURES 3
    139 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
    140 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
    141 
    142 /* this threshold is a trade-off knob to choose between
    143  * the potential additional overhead of an extra GET_LBA_STATUS request
    144  * vs. unnecessarily reading a lot of zero sectors over the wire.
    145  * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
    146  * sectors we check the allocation status of the area covered by the
    147  * request first if the allocationmap indicates that the area might be
    148  * unallocated. */
    149 #define ISCSI_CHECKALLOC_THRES 64
    150 
    151 #ifdef __linux__
    152 
    153 static void
    154 iscsi_bh_cb(void *p)
    155 {
    156     IscsiAIOCB *acb = p;
    157 
    158     qemu_bh_delete(acb->bh);
    159 
    160     acb->common.cb(acb->common.opaque, acb->status);
    161 
    162     if (acb->task != NULL) {
    163         scsi_free_scsi_task(acb->task);
    164         acb->task = NULL;
    165     }
    166 
    167     qemu_aio_unref(acb);
    168 }
    169 
    170 static void
    171 iscsi_schedule_bh(IscsiAIOCB *acb)
    172 {
    173     if (acb->bh) {
    174         return;
    175     }
    176     acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
    177     qemu_bh_schedule(acb->bh);
    178 }
    179 
    180 #endif
    181 
    182 static void iscsi_co_generic_bh_cb(void *opaque)
    183 {
    184     struct IscsiTask *iTask = opaque;
    185 
    186     iTask->complete = 1;
    187     aio_co_wake(iTask->co);
    188 }
    189 
    190 static void iscsi_retry_timer_expired(void *opaque)
    191 {
    192     struct IscsiTask *iTask = opaque;
    193     iTask->complete = 1;
    194     if (iTask->co) {
    195         aio_co_wake(iTask->co);
    196     }
    197 }
    198 
    199 static inline unsigned exp_random(double mean)
    200 {
    201     return -mean * log((double)rand() / RAND_MAX);
    202 }
    203 
    204 /* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
    205  * libiscsi 1.10.0, together with other constants we need.  Use it as
    206  * a hint that we have to define them ourselves if needed, to keep the
    207  * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
    208  * the test because SCSI_STATUS_* is an enum.
    209  *
    210  * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
    211  * an enum, check against the LIBISCSI_API_VERSION macro, which was
    212  * introduced in 1.11.0.  If it is present, there is no need to define
    213  * anything.
    214  */
    215 #if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
    216     !defined(LIBISCSI_API_VERSION)
    217 #define SCSI_STATUS_TASK_SET_FULL                          0x28
    218 #define SCSI_STATUS_TIMEOUT                                0x0f000002
    219 #define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
    220 #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
    221 #endif
    222 
    223 #ifndef LIBISCSI_API_VERSION
    224 #define LIBISCSI_API_VERSION 20130701
    225 #endif
    226 
    227 static int iscsi_translate_sense(struct scsi_sense *sense)
    228 {
    229     return scsi_sense_to_errno(sense->key,
    230                                (sense->ascq & 0xFF00) >> 8,
    231                                sense->ascq & 0xFF);
    232 }
    233 
    234 /* Called (via iscsi_service) with QemuMutex held.  */
    235 static void
    236 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
    237                         void *command_data, void *opaque)
    238 {
    239     struct IscsiTask *iTask = opaque;
    240     struct scsi_task *task = command_data;
    241 
    242     iTask->status = status;
    243     iTask->do_retry = 0;
    244     iTask->err_code = 0;
    245     iTask->task = task;
    246 
    247     if (status != SCSI_STATUS_GOOD) {
    248         iTask->err_code = -EIO;
    249         if (iTask->retries++ < ISCSI_CMD_RETRIES) {
    250             if (status == SCSI_STATUS_BUSY ||
    251                 status == SCSI_STATUS_TIMEOUT ||
    252                 status == SCSI_STATUS_TASK_SET_FULL) {
    253                 unsigned retry_time =
    254                     exp_random(iscsi_retry_times[iTask->retries - 1]);
    255                 if (status == SCSI_STATUS_TIMEOUT) {
    256                     /* make sure the request is rescheduled AFTER the
    257                      * reconnect is initiated */
    258                     retry_time = EVENT_INTERVAL * 2;
    259                     iTask->iscsilun->request_timed_out = true;
    260                 }
    261                 error_report("iSCSI Busy/TaskSetFull/TimeOut"
    262                              " (retry #%u in %u ms): %s",
    263                              iTask->retries, retry_time,
    264                              iscsi_get_error(iscsi));
    265                 aio_timer_init(iTask->iscsilun->aio_context,
    266                                &iTask->retry_timer, QEMU_CLOCK_REALTIME,
    267                                SCALE_MS, iscsi_retry_timer_expired, iTask);
    268                 timer_mod(&iTask->retry_timer,
    269                           qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
    270                 iTask->do_retry = 1;
    271             } else if (status == SCSI_STATUS_CHECK_CONDITION) {
    272                 int error = iscsi_translate_sense(&task->sense);
    273                 if (error == EAGAIN) {
    274                     error_report("iSCSI CheckCondition: %s",
    275                                  iscsi_get_error(iscsi));
    276                     iTask->do_retry = 1;
    277                 } else {
    278                     iTask->err_code = -error;
    279                     iTask->err_str = g_strdup(iscsi_get_error(iscsi));
    280                 }
    281             }
    282         }
    283     }
    284 
    285     if (iTask->co) {
    286         replay_bh_schedule_oneshot_event(iTask->iscsilun->aio_context,
    287                                          iscsi_co_generic_bh_cb, iTask);
    288     } else {
    289         iTask->complete = 1;
    290     }
    291 }
    292 
    293 static void coroutine_fn
    294 iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
    295 {
    296     *iTask = (struct IscsiTask) {
    297         .co         = qemu_coroutine_self(),
    298         .iscsilun   = iscsilun,
    299     };
    300 }
    301 
    302 #ifdef __linux__
    303 
    304 /* Called (via iscsi_service) with QemuMutex held. */
    305 static void
    306 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
    307                     void *private_data)
    308 {
    309     IscsiAIOCB *acb = private_data;
    310 
    311     /* If the command callback hasn't been called yet, drop the task */
    312     if (!acb->bh) {
    313         /* Call iscsi_aio_ioctl_cb() with SCSI_STATUS_CANCELLED */
    314         iscsi_scsi_cancel_task(iscsi, acb->task);
    315     }
    316 
    317     qemu_aio_unref(acb); /* acquired in iscsi_aio_cancel() */
    318 }
    319 
    320 static void
    321 iscsi_aio_cancel(BlockAIOCB *blockacb)
    322 {
    323     IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
    324     IscsiLun *iscsilun = acb->iscsilun;
    325 
    326     WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
    327 
    328         /* If it was cancelled or completed already, our work is done here */
    329         if (acb->cancelled || acb->status != -EINPROGRESS) {
    330             return;
    331         }
    332 
    333         acb->cancelled = true;
    334 
    335         qemu_aio_ref(acb); /* released in iscsi_abort_task_cb() */
    336 
    337         /* send a task mgmt call to the target to cancel the task on the target */
    338         if (iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
    339                                              iscsi_abort_task_cb, acb) < 0) {
    340             qemu_aio_unref(acb); /* since iscsi_abort_task_cb() won't be called */
    341         }
    342     }
    343 }
    344 
    345 static const AIOCBInfo iscsi_aiocb_info = {
    346     .aiocb_size         = sizeof(IscsiAIOCB),
    347     .cancel_async       = iscsi_aio_cancel,
    348 };
    349 
    350 #endif
    351 
    352 static void iscsi_process_read(void *arg);
    353 static void iscsi_process_write(void *arg);
    354 
    355 /* Called with QemuMutex held.  */
    356 static void
    357 iscsi_set_events(IscsiLun *iscsilun)
    358 {
    359     struct iscsi_context *iscsi = iscsilun->iscsi;
    360     int ev = iscsi_which_events(iscsi);
    361 
    362     if (ev != iscsilun->events) {
    363         aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
    364                            false,
    365                            (ev & POLLIN) ? iscsi_process_read : NULL,
    366                            (ev & POLLOUT) ? iscsi_process_write : NULL,
    367                            NULL, NULL,
    368                            iscsilun);
    369         iscsilun->events = ev;
    370     }
    371 }
    372 
    373 static void iscsi_timed_check_events(void *opaque)
    374 {
    375     IscsiLun *iscsilun = opaque;
    376 
    377     WITH_QEMU_LOCK_GUARD(&iscsilun->mutex) {
    378         /* check for timed out requests */
    379         iscsi_service(iscsilun->iscsi, 0);
    380 
    381         if (iscsilun->request_timed_out) {
    382             iscsilun->request_timed_out = false;
    383             iscsi_reconnect(iscsilun->iscsi);
    384         }
    385 
    386         /*
    387          * newer versions of libiscsi may return zero events. Ensure we are
    388          * able to return to service once this situation changes.
    389          */
    390         iscsi_set_events(iscsilun);
    391     }
    392 
    393     timer_mod(iscsilun->event_timer,
    394               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
    395 }
    396 
    397 static void
    398 iscsi_process_read(void *arg)
    399 {
    400     IscsiLun *iscsilun = arg;
    401     struct iscsi_context *iscsi = iscsilun->iscsi;
    402 
    403     qemu_mutex_lock(&iscsilun->mutex);
    404     iscsi_service(iscsi, POLLIN);
    405     iscsi_set_events(iscsilun);
    406     qemu_mutex_unlock(&iscsilun->mutex);
    407 }
    408 
    409 static void
    410 iscsi_process_write(void *arg)
    411 {
    412     IscsiLun *iscsilun = arg;
    413     struct iscsi_context *iscsi = iscsilun->iscsi;
    414 
    415     qemu_mutex_lock(&iscsilun->mutex);
    416     iscsi_service(iscsi, POLLOUT);
    417     iscsi_set_events(iscsilun);
    418     qemu_mutex_unlock(&iscsilun->mutex);
    419 }
    420 
    421 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
    422 {
    423     return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
    424 }
    425 
    426 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
    427 {
    428     return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
    429 }
    430 
    431 static bool is_byte_request_lun_aligned(int64_t offset, int64_t bytes,
    432                                         IscsiLun *iscsilun)
    433 {
    434     if (offset % iscsilun->block_size || bytes % iscsilun->block_size) {
    435         error_report("iSCSI misaligned request: "
    436                      "iscsilun->block_size %u, offset %" PRIi64
    437                      ", bytes %" PRIi64,
    438                      iscsilun->block_size, offset, bytes);
    439         return false;
    440     }
    441     return true;
    442 }
    443 
    444 static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
    445                                           IscsiLun *iscsilun)
    446 {
    447     assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
    448     return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
    449                                        nb_sectors << BDRV_SECTOR_BITS,
    450                                        iscsilun);
    451 }
    452 
    453 static void iscsi_allocmap_free(IscsiLun *iscsilun)
    454 {
    455     g_free(iscsilun->allocmap);
    456     g_free(iscsilun->allocmap_valid);
    457     iscsilun->allocmap = NULL;
    458     iscsilun->allocmap_valid = NULL;
    459 }
    460 
    461 
    462 static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
    463 {
    464     iscsi_allocmap_free(iscsilun);
    465 
    466     assert(iscsilun->cluster_size);
    467     iscsilun->allocmap_size =
    468         DIV_ROUND_UP(iscsilun->num_blocks * iscsilun->block_size,
    469                      iscsilun->cluster_size);
    470 
    471     iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
    472     if (!iscsilun->allocmap) {
    473         return -ENOMEM;
    474     }
    475 
    476     if (open_flags & BDRV_O_NOCACHE) {
    477         /* when cache.direct = on all allocmap entries are
    478          * treated as invalid to force a relookup of the block
    479          * status on every read request */
    480         return 0;
    481     }
    482 
    483     iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
    484     if (!iscsilun->allocmap_valid) {
    485         /* if we are under memory pressure free the allocmap as well */
    486         iscsi_allocmap_free(iscsilun);
    487         return -ENOMEM;
    488     }
    489 
    490     return 0;
    491 }
    492 
    493 static void
    494 iscsi_allocmap_update(IscsiLun *iscsilun, int64_t offset,
    495                       int64_t bytes, bool allocated, bool valid)
    496 {
    497     int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
    498 
    499     if (iscsilun->allocmap == NULL) {
    500         return;
    501     }
    502     /* expand to entirely contain all affected clusters */
    503     assert(iscsilun->cluster_size);
    504     cl_num_expanded = offset / iscsilun->cluster_size;
    505     nb_cls_expanded = DIV_ROUND_UP(offset + bytes,
    506                                    iscsilun->cluster_size) - cl_num_expanded;
    507     /* shrink to touch only completely contained clusters */
    508     cl_num_shrunk = DIV_ROUND_UP(offset, iscsilun->cluster_size);
    509     nb_cls_shrunk = (offset + bytes) / iscsilun->cluster_size - cl_num_shrunk;
    510     if (allocated) {
    511         bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
    512     } else {
    513         if (nb_cls_shrunk > 0) {
    514             bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
    515         }
    516     }
    517 
    518     if (iscsilun->allocmap_valid == NULL) {
    519         return;
    520     }
    521     if (valid) {
    522         if (nb_cls_shrunk > 0) {
    523             bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
    524         }
    525     } else {
    526         bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
    527                      nb_cls_expanded);
    528     }
    529 }
    530 
    531 static void
    532 iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t offset,
    533                              int64_t bytes)
    534 {
    535     iscsi_allocmap_update(iscsilun, offset, bytes, true, true);
    536 }
    537 
    538 static void
    539 iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t offset,
    540                                int64_t bytes)
    541 {
    542     /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
    543      * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
    544      */
    545     iscsi_allocmap_update(iscsilun, offset, bytes, false, true);
    546 }
    547 
    548 static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t offset,
    549                                        int64_t bytes)
    550 {
    551     iscsi_allocmap_update(iscsilun, offset, bytes, false, false);
    552 }
    553 
    554 static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
    555 {
    556     if (iscsilun->allocmap) {
    557         bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
    558     }
    559     if (iscsilun->allocmap_valid) {
    560         bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
    561     }
    562 }
    563 
    564 static inline bool
    565 iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t offset,
    566                             int64_t bytes)
    567 {
    568     unsigned long size;
    569     if (iscsilun->allocmap == NULL) {
    570         return true;
    571     }
    572     assert(iscsilun->cluster_size);
    573     size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
    574     return !(find_next_bit(iscsilun->allocmap, size,
    575                            offset / iscsilun->cluster_size) == size);
    576 }
    577 
    578 static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
    579                                            int64_t offset, int64_t bytes)
    580 {
    581     unsigned long size;
    582     if (iscsilun->allocmap_valid == NULL) {
    583         return false;
    584     }
    585     assert(iscsilun->cluster_size);
    586     size = DIV_ROUND_UP(offset + bytes, iscsilun->cluster_size);
    587     return (find_next_zero_bit(iscsilun->allocmap_valid, size,
    588                                offset / iscsilun->cluster_size) == size);
    589 }
    590 
    591 static void coroutine_fn iscsi_co_wait_for_task(IscsiTask *iTask,
    592                                                 IscsiLun *iscsilun)
    593 {
    594     while (!iTask->complete) {
    595         iscsi_set_events(iscsilun);
    596         qemu_mutex_unlock(&iscsilun->mutex);
    597         qemu_coroutine_yield();
    598         qemu_mutex_lock(&iscsilun->mutex);
    599     }
    600 }
    601 
    602 static int coroutine_fn
    603 iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
    604                 QEMUIOVector *iov, int flags)
    605 {
    606     IscsiLun *iscsilun = bs->opaque;
    607     struct IscsiTask iTask;
    608     uint64_t lba;
    609     uint32_t num_sectors;
    610     bool fua = flags & BDRV_REQ_FUA;
    611     int r = 0;
    612 
    613     if (fua) {
    614         assert(iscsilun->dpofua);
    615     }
    616     if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
    617         return -EINVAL;
    618     }
    619 
    620     if (bs->bl.max_transfer) {
    621         assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
    622     }
    623 
    624     lba = sector_qemu2lun(sector_num, iscsilun);
    625     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
    626     iscsi_co_init_iscsitask(iscsilun, &iTask);
    627     qemu_mutex_lock(&iscsilun->mutex);
    628 retry:
    629     if (iscsilun->use_16_for_rw) {
    630 #if LIBISCSI_API_VERSION >= (20160603)
    631         iTask.task = iscsi_write16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
    632                                             NULL, num_sectors * iscsilun->block_size,
    633                                             iscsilun->block_size, 0, 0, fua, 0, 0,
    634                                             iscsi_co_generic_cb, &iTask,
    635                                             (struct scsi_iovec *)iov->iov, iov->niov);
    636     } else {
    637         iTask.task = iscsi_write10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
    638                                             NULL, num_sectors * iscsilun->block_size,
    639                                             iscsilun->block_size, 0, 0, fua, 0, 0,
    640                                             iscsi_co_generic_cb, &iTask,
    641                                             (struct scsi_iovec *)iov->iov, iov->niov);
    642     }
    643 #else
    644         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
    645                                         NULL, num_sectors * iscsilun->block_size,
    646                                         iscsilun->block_size, 0, 0, fua, 0, 0,
    647                                         iscsi_co_generic_cb, &iTask);
    648     } else {
    649         iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
    650                                         NULL, num_sectors * iscsilun->block_size,
    651                                         iscsilun->block_size, 0, 0, fua, 0, 0,
    652                                         iscsi_co_generic_cb, &iTask);
    653     }
    654 #endif
    655     if (iTask.task == NULL) {
    656         qemu_mutex_unlock(&iscsilun->mutex);
    657         return -ENOMEM;
    658     }
    659 #if LIBISCSI_API_VERSION < (20160603)
    660     scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
    661                           iov->niov);
    662 #endif
    663     iscsi_co_wait_for_task(&iTask, iscsilun);
    664 
    665     if (iTask.task != NULL) {
    666         scsi_free_scsi_task(iTask.task);
    667         iTask.task = NULL;
    668     }
    669 
    670     if (iTask.do_retry) {
    671         iTask.complete = 0;
    672         goto retry;
    673     }
    674 
    675     if (iTask.status != SCSI_STATUS_GOOD) {
    676         iscsi_allocmap_set_invalid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
    677                                    nb_sectors * BDRV_SECTOR_SIZE);
    678         error_report("iSCSI WRITE10/16 failed at lba %" PRIu64 ": %s", lba,
    679                      iTask.err_str);
    680         r = iTask.err_code;
    681         goto out_unlock;
    682     }
    683 
    684     iscsi_allocmap_set_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
    685                                  nb_sectors * BDRV_SECTOR_SIZE);
    686 
    687 out_unlock:
    688     qemu_mutex_unlock(&iscsilun->mutex);
    689     g_free(iTask.err_str);
    690     return r;
    691 }
    692 
    693 
    694 
    695 static int coroutine_fn iscsi_co_block_status(BlockDriverState *bs,
    696                                               bool want_zero, int64_t offset,
    697                                               int64_t bytes, int64_t *pnum,
    698                                               int64_t *map,
    699                                               BlockDriverState **file)
    700 {
    701     IscsiLun *iscsilun = bs->opaque;
    702     struct scsi_get_lba_status *lbas = NULL;
    703     struct scsi_lba_status_descriptor *lbasd = NULL;
    704     struct IscsiTask iTask;
    705     uint64_t lba, max_bytes;
    706     int ret;
    707 
    708     iscsi_co_init_iscsitask(iscsilun, &iTask);
    709 
    710     assert(QEMU_IS_ALIGNED(offset | bytes, iscsilun->block_size));
    711 
    712     /* default to all sectors allocated */
    713     ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
    714     if (map) {
    715         *map = offset;
    716     }
    717     *pnum = bytes;
    718 
    719     /* LUN does not support logical block provisioning */
    720     if (!iscsilun->lbpme) {
    721         goto out;
    722     }
    723 
    724     lba = offset / iscsilun->block_size;
    725     max_bytes = (iscsilun->num_blocks - lba) * iscsilun->block_size;
    726 
    727     qemu_mutex_lock(&iscsilun->mutex);
    728 retry:
    729     if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
    730                                   lba, 8 + 16, iscsi_co_generic_cb,
    731                                   &iTask) == NULL) {
    732         ret = -ENOMEM;
    733         goto out_unlock;
    734     }
    735     iscsi_co_wait_for_task(&iTask, iscsilun);
    736 
    737     if (iTask.do_retry) {
    738         if (iTask.task != NULL) {
    739             scsi_free_scsi_task(iTask.task);
    740             iTask.task = NULL;
    741         }
    742         iTask.complete = 0;
    743         goto retry;
    744     }
    745 
    746     if (iTask.status != SCSI_STATUS_GOOD) {
    747         /* in case the get_lba_status_callout fails (i.e.
    748          * because the device is busy or the cmd is not
    749          * supported) we pretend all blocks are allocated
    750          * for backwards compatibility */
    751         error_report("iSCSI GET_LBA_STATUS failed at lba %" PRIu64 ": %s",
    752                      lba, iTask.err_str);
    753         goto out_unlock;
    754     }
    755 
    756     lbas = scsi_datain_unmarshall(iTask.task);
    757     if (lbas == NULL || lbas->num_descriptors == 0) {
    758         ret = -EIO;
    759         goto out_unlock;
    760     }
    761 
    762     lbasd = &lbas->descriptors[0];
    763 
    764     if (lba != lbasd->lba) {
    765         ret = -EIO;
    766         goto out_unlock;
    767     }
    768 
    769     *pnum = MIN((int64_t) lbasd->num_blocks * iscsilun->block_size, max_bytes);
    770 
    771     if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
    772         lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
    773         ret &= ~BDRV_BLOCK_DATA;
    774         if (iscsilun->lbprz) {
    775             ret |= BDRV_BLOCK_ZERO;
    776         }
    777     }
    778 
    779     if (ret & BDRV_BLOCK_ZERO) {
    780         iscsi_allocmap_set_unallocated(iscsilun, offset, *pnum);
    781     } else {
    782         iscsi_allocmap_set_allocated(iscsilun, offset, *pnum);
    783     }
    784 
    785 out_unlock:
    786     qemu_mutex_unlock(&iscsilun->mutex);
    787     g_free(iTask.err_str);
    788 out:
    789     if (iTask.task != NULL) {
    790         scsi_free_scsi_task(iTask.task);
    791     }
    792     if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID && file) {
    793         *file = bs;
    794     }
    795     return ret;
    796 }
    797 
    798 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
    799                                        int64_t sector_num, int nb_sectors,
    800                                        QEMUIOVector *iov)
    801 {
    802     IscsiLun *iscsilun = bs->opaque;
    803     struct IscsiTask iTask;
    804     uint64_t lba;
    805     uint32_t num_sectors;
    806     int r = 0;
    807 
    808     if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
    809         return -EINVAL;
    810     }
    811 
    812     if (bs->bl.max_transfer) {
    813         assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
    814     }
    815 
    816     /* if cache.direct is off and we have a valid entry in our allocation map
    817      * we can skip checking the block status and directly return zeroes if
    818      * the request falls within an unallocated area */
    819     if (iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
    820                                 nb_sectors * BDRV_SECTOR_SIZE) &&
    821         !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
    822                                      nb_sectors * BDRV_SECTOR_SIZE)) {
    823             qemu_iovec_memset(iov, 0, 0x00, iov->size);
    824             return 0;
    825     }
    826 
    827     if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
    828         !iscsi_allocmap_is_valid(iscsilun, sector_num * BDRV_SECTOR_SIZE,
    829                                  nb_sectors * BDRV_SECTOR_SIZE) &&
    830         !iscsi_allocmap_is_allocated(iscsilun, sector_num * BDRV_SECTOR_SIZE,
    831                                      nb_sectors * BDRV_SECTOR_SIZE)) {
    832         int64_t pnum;
    833         /* check the block status from the beginning of the cluster
    834          * containing the start sector */
    835         int64_t head;
    836         int ret;
    837 
    838         assert(iscsilun->cluster_size);
    839         head = (sector_num * BDRV_SECTOR_SIZE) % iscsilun->cluster_size;
    840         ret = iscsi_co_block_status(bs, true,
    841                                     sector_num * BDRV_SECTOR_SIZE - head,
    842                                     BDRV_REQUEST_MAX_BYTES, &pnum, NULL, NULL);
    843         if (ret < 0) {
    844             return ret;
    845         }
    846         /* if the whole request falls into an unallocated area we can avoid
    847          * reading and directly return zeroes instead */
    848         if (ret & BDRV_BLOCK_ZERO &&
    849             pnum >= nb_sectors * BDRV_SECTOR_SIZE + head) {
    850             qemu_iovec_memset(iov, 0, 0x00, iov->size);
    851             return 0;
    852         }
    853     }
    854 
    855     lba = sector_qemu2lun(sector_num, iscsilun);
    856     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
    857 
    858     iscsi_co_init_iscsitask(iscsilun, &iTask);
    859     qemu_mutex_lock(&iscsilun->mutex);
    860 retry:
    861     if (iscsilun->use_16_for_rw) {
    862 #if LIBISCSI_API_VERSION >= (20160603)
    863         iTask.task = iscsi_read16_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
    864                                            num_sectors * iscsilun->block_size,
    865                                            iscsilun->block_size, 0, 0, 0, 0, 0,
    866                                            iscsi_co_generic_cb, &iTask,
    867                                            (struct scsi_iovec *)iov->iov, iov->niov);
    868     } else {
    869         iTask.task = iscsi_read10_iov_task(iscsilun->iscsi, iscsilun->lun, lba,
    870                                            num_sectors * iscsilun->block_size,
    871                                            iscsilun->block_size,
    872                                            0, 0, 0, 0, 0,
    873                                            iscsi_co_generic_cb, &iTask,
    874                                            (struct scsi_iovec *)iov->iov, iov->niov);
    875     }
    876 #else
    877         iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
    878                                        num_sectors * iscsilun->block_size,
    879                                        iscsilun->block_size, 0, 0, 0, 0, 0,
    880                                        iscsi_co_generic_cb, &iTask);
    881     } else {
    882         iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
    883                                        num_sectors * iscsilun->block_size,
    884                                        iscsilun->block_size,
    885                                        0, 0, 0, 0, 0,
    886                                        iscsi_co_generic_cb, &iTask);
    887     }
    888 #endif
    889     if (iTask.task == NULL) {
    890         qemu_mutex_unlock(&iscsilun->mutex);
    891         return -ENOMEM;
    892     }
    893 #if LIBISCSI_API_VERSION < (20160603)
    894     scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
    895 #endif
    896 
    897     iscsi_co_wait_for_task(&iTask, iscsilun);
    898     if (iTask.task != NULL) {
    899         scsi_free_scsi_task(iTask.task);
    900         iTask.task = NULL;
    901     }
    902 
    903     if (iTask.do_retry) {
    904         iTask.complete = 0;
    905         goto retry;
    906     }
    907 
    908     if (iTask.status != SCSI_STATUS_GOOD) {
    909         error_report("iSCSI READ10/16 failed at lba %" PRIu64 ": %s",
    910                      lba, iTask.err_str);
    911         r = iTask.err_code;
    912     }
    913 
    914     qemu_mutex_unlock(&iscsilun->mutex);
    915     g_free(iTask.err_str);
    916     return r;
    917 }
    918 
    919 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
    920 {
    921     IscsiLun *iscsilun = bs->opaque;
    922     struct IscsiTask iTask;
    923     int r = 0;
    924 
    925     iscsi_co_init_iscsitask(iscsilun, &iTask);
    926     qemu_mutex_lock(&iscsilun->mutex);
    927 retry:
    928     if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
    929                                       0, iscsi_co_generic_cb, &iTask) == NULL) {
    930         qemu_mutex_unlock(&iscsilun->mutex);
    931         return -ENOMEM;
    932     }
    933 
    934     iscsi_co_wait_for_task(&iTask, iscsilun);
    935 
    936     if (iTask.task != NULL) {
    937         scsi_free_scsi_task(iTask.task);
    938         iTask.task = NULL;
    939     }
    940 
    941     if (iTask.do_retry) {
    942         iTask.complete = 0;
    943         goto retry;
    944     }
    945 
    946     if (iTask.status != SCSI_STATUS_GOOD) {
    947         error_report("iSCSI SYNCHRONIZECACHE10 failed: %s", iTask.err_str);
    948         r = iTask.err_code;
    949     }
    950 
    951     qemu_mutex_unlock(&iscsilun->mutex);
    952     g_free(iTask.err_str);
    953     return r;
    954 }
    955 
    956 #ifdef __linux__
    957 /* Called (via iscsi_service) with QemuMutex held.  */
    958 static void
    959 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
    960                      void *command_data, void *opaque)
    961 {
    962     IscsiAIOCB *acb = opaque;
    963 
    964     if (status == SCSI_STATUS_CANCELLED) {
    965         if (!acb->bh) {
    966             acb->status = -ECANCELED;
    967             iscsi_schedule_bh(acb);
    968         }
    969         return;
    970     }
    971 
    972     acb->status = 0;
    973     if (status < 0) {
    974         error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
    975                      iscsi_get_error(iscsi));
    976         acb->status = -iscsi_translate_sense(&acb->task->sense);
    977     }
    978 
    979     acb->ioh->driver_status = 0;
    980     acb->ioh->host_status   = 0;
    981     acb->ioh->resid         = 0;
    982     acb->ioh->status        = status;
    983 
    984 #define SG_ERR_DRIVER_SENSE    0x08
    985 
    986     if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
    987         int ss;
    988 
    989         acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
    990 
    991         acb->ioh->sb_len_wr = acb->task->datain.size - 2;
    992         ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr);
    993         memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
    994     }
    995 
    996     iscsi_schedule_bh(acb);
    997 }
    998 
    999 static void iscsi_ioctl_bh_completion(void *opaque)
   1000 {
   1001     IscsiAIOCB *acb = opaque;
   1002 
   1003     qemu_bh_delete(acb->bh);
   1004     acb->common.cb(acb->common.opaque, acb->ret);
   1005     qemu_aio_unref(acb);
   1006 }
   1007 
   1008 static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
   1009 {
   1010     BlockDriverState *bs = acb->common.bs;
   1011     IscsiLun *iscsilun = bs->opaque;
   1012     int ret = 0;
   1013 
   1014     switch (req) {
   1015     case SG_GET_VERSION_NUM:
   1016         *(int *)buf = 30000;
   1017         break;
   1018     case SG_GET_SCSI_ID:
   1019         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
   1020         break;
   1021     default:
   1022         ret = -EINVAL;
   1023     }
   1024     assert(!acb->bh);
   1025     acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
   1026                          iscsi_ioctl_bh_completion, acb);
   1027     acb->ret = ret;
   1028     qemu_bh_schedule(acb->bh);
   1029 }
   1030 
   1031 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
   1032         unsigned long int req, void *buf,
   1033         BlockCompletionFunc *cb, void *opaque)
   1034 {
   1035     IscsiLun *iscsilun = bs->opaque;
   1036     struct iscsi_context *iscsi = iscsilun->iscsi;
   1037     struct iscsi_data data;
   1038     IscsiAIOCB *acb;
   1039 
   1040     acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
   1041 
   1042     acb->iscsilun = iscsilun;
   1043     acb->bh          = NULL;
   1044     acb->status      = -EINPROGRESS;
   1045     acb->ioh         = buf;
   1046     acb->cancelled   = false;
   1047 
   1048     if (req != SG_IO) {
   1049         iscsi_ioctl_handle_emulated(acb, req, buf);
   1050         return &acb->common;
   1051     }
   1052 
   1053     if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
   1054         error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
   1055                      acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
   1056         qemu_aio_unref(acb);
   1057         return NULL;
   1058     }
   1059 
   1060     acb->task = malloc(sizeof(struct scsi_task));
   1061     if (acb->task == NULL) {
   1062         error_report("iSCSI: Failed to allocate task for scsi command. %s",
   1063                      iscsi_get_error(iscsi));
   1064         qemu_aio_unref(acb);
   1065         return NULL;
   1066     }
   1067     memset(acb->task, 0, sizeof(struct scsi_task));
   1068 
   1069     switch (acb->ioh->dxfer_direction) {
   1070     case SG_DXFER_TO_DEV:
   1071         acb->task->xfer_dir = SCSI_XFER_WRITE;
   1072         break;
   1073     case SG_DXFER_FROM_DEV:
   1074         acb->task->xfer_dir = SCSI_XFER_READ;
   1075         break;
   1076     default:
   1077         acb->task->xfer_dir = SCSI_XFER_NONE;
   1078         break;
   1079     }
   1080 
   1081     acb->task->cdb_size = acb->ioh->cmd_len;
   1082     memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
   1083     acb->task->expxferlen = acb->ioh->dxfer_len;
   1084 
   1085     data.size = 0;
   1086     qemu_mutex_lock(&iscsilun->mutex);
   1087     if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
   1088         if (acb->ioh->iovec_count == 0) {
   1089             data.data = acb->ioh->dxferp;
   1090             data.size = acb->ioh->dxfer_len;
   1091         } else {
   1092             scsi_task_set_iov_out(acb->task,
   1093                                  (struct scsi_iovec *) acb->ioh->dxferp,
   1094                                  acb->ioh->iovec_count);
   1095         }
   1096     }
   1097 
   1098     if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
   1099                                  iscsi_aio_ioctl_cb,
   1100                                  (data.size > 0) ? &data : NULL,
   1101                                  acb) != 0) {
   1102         qemu_mutex_unlock(&iscsilun->mutex);
   1103         scsi_free_scsi_task(acb->task);
   1104         qemu_aio_unref(acb);
   1105         return NULL;
   1106     }
   1107 
   1108     /* tell libiscsi to read straight into the buffer we got from ioctl */
   1109     if (acb->task->xfer_dir == SCSI_XFER_READ) {
   1110         if (acb->ioh->iovec_count == 0) {
   1111             scsi_task_add_data_in_buffer(acb->task,
   1112                                          acb->ioh->dxfer_len,
   1113                                          acb->ioh->dxferp);
   1114         } else {
   1115             scsi_task_set_iov_in(acb->task,
   1116                                  (struct scsi_iovec *) acb->ioh->dxferp,
   1117                                  acb->ioh->iovec_count);
   1118         }
   1119     }
   1120 
   1121     iscsi_set_events(iscsilun);
   1122     qemu_mutex_unlock(&iscsilun->mutex);
   1123 
   1124     return &acb->common;
   1125 }
   1126 
   1127 #endif
   1128 
   1129 static int64_t
   1130 iscsi_getlength(BlockDriverState *bs)
   1131 {
   1132     IscsiLun *iscsilun = bs->opaque;
   1133     int64_t len;
   1134 
   1135     len  = iscsilun->num_blocks;
   1136     len *= iscsilun->block_size;
   1137 
   1138     return len;
   1139 }
   1140 
   1141 static int
   1142 coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset,
   1143                                int64_t bytes)
   1144 {
   1145     IscsiLun *iscsilun = bs->opaque;
   1146     struct IscsiTask iTask;
   1147     struct unmap_list list;
   1148     int r = 0;
   1149 
   1150     if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
   1151         return -ENOTSUP;
   1152     }
   1153 
   1154     if (!iscsilun->lbp.lbpu) {
   1155         /* UNMAP is not supported by the target */
   1156         return 0;
   1157     }
   1158 
   1159     /*
   1160      * We don't want to overflow list.num which is uint32_t.
   1161      * We rely on our max_pdiscard.
   1162      */
   1163     assert(bytes / iscsilun->block_size <= UINT32_MAX);
   1164 
   1165     list.lba = offset / iscsilun->block_size;
   1166     list.num = bytes / iscsilun->block_size;
   1167 
   1168     iscsi_co_init_iscsitask(iscsilun, &iTask);
   1169     qemu_mutex_lock(&iscsilun->mutex);
   1170 retry:
   1171     if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
   1172                          iscsi_co_generic_cb, &iTask) == NULL) {
   1173         r = -ENOMEM;
   1174         goto out_unlock;
   1175     }
   1176 
   1177     iscsi_co_wait_for_task(&iTask, iscsilun);
   1178 
   1179     if (iTask.task != NULL) {
   1180         scsi_free_scsi_task(iTask.task);
   1181         iTask.task = NULL;
   1182     }
   1183 
   1184     if (iTask.do_retry) {
   1185         iTask.complete = 0;
   1186         goto retry;
   1187     }
   1188 
   1189     iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
   1190 
   1191     if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
   1192         /* the target might fail with a check condition if it
   1193            is not happy with the alignment of the UNMAP request
   1194            we silently fail in this case */
   1195         goto out_unlock;
   1196     }
   1197 
   1198     if (iTask.status != SCSI_STATUS_GOOD) {
   1199         error_report("iSCSI UNMAP failed at lba %" PRIu64 ": %s",
   1200                      list.lba, iTask.err_str);
   1201         r = iTask.err_code;
   1202         goto out_unlock;
   1203     }
   1204 
   1205 out_unlock:
   1206     qemu_mutex_unlock(&iscsilun->mutex);
   1207     g_free(iTask.err_str);
   1208     return r;
   1209 }
   1210 
   1211 static int
   1212 coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
   1213                                     int64_t bytes, BdrvRequestFlags flags)
   1214 {
   1215     IscsiLun *iscsilun = bs->opaque;
   1216     struct IscsiTask iTask;
   1217     uint64_t lba;
   1218     uint64_t nb_blocks;
   1219     bool use_16_for_ws = iscsilun->use_16_for_rw;
   1220     int r = 0;
   1221 
   1222     if (!is_byte_request_lun_aligned(offset, bytes, iscsilun)) {
   1223         return -ENOTSUP;
   1224     }
   1225 
   1226     if (flags & BDRV_REQ_MAY_UNMAP) {
   1227         if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
   1228             /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
   1229             use_16_for_ws = true;
   1230         }
   1231         if (use_16_for_ws && !iscsilun->lbp.lbpws) {
   1232             /* WRITESAME16 with UNMAP is not supported by the target,
   1233              * fall back and try WRITESAME10/16 without UNMAP */
   1234             flags &= ~BDRV_REQ_MAY_UNMAP;
   1235             use_16_for_ws = iscsilun->use_16_for_rw;
   1236         }
   1237     }
   1238 
   1239     if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
   1240         /* WRITESAME without UNMAP is not supported by the target */
   1241         return -ENOTSUP;
   1242     }
   1243 
   1244     lba = offset / iscsilun->block_size;
   1245     nb_blocks = bytes / iscsilun->block_size;
   1246 
   1247     if (iscsilun->zeroblock == NULL) {
   1248         iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
   1249         if (iscsilun->zeroblock == NULL) {
   1250             return -ENOMEM;
   1251         }
   1252     }
   1253 
   1254     qemu_mutex_lock(&iscsilun->mutex);
   1255     iscsi_co_init_iscsitask(iscsilun, &iTask);
   1256 retry:
   1257     if (use_16_for_ws) {
   1258         /*
   1259          * iscsi_writesame16_task num_blocks argument is uint32_t. We rely here
   1260          * on our max_pwrite_zeroes limit.
   1261          */
   1262         assert(nb_blocks <= UINT32_MAX);
   1263         iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
   1264                                             iscsilun->zeroblock, iscsilun->block_size,
   1265                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
   1266                                             0, 0, iscsi_co_generic_cb, &iTask);
   1267     } else {
   1268         /*
   1269          * iscsi_writesame10_task num_blocks argument is uint16_t. We rely here
   1270          * on our max_pwrite_zeroes limit.
   1271          */
   1272         assert(nb_blocks <= UINT16_MAX);
   1273         iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
   1274                                             iscsilun->zeroblock, iscsilun->block_size,
   1275                                             nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
   1276                                             0, 0, iscsi_co_generic_cb, &iTask);
   1277     }
   1278     if (iTask.task == NULL) {
   1279         qemu_mutex_unlock(&iscsilun->mutex);
   1280         return -ENOMEM;
   1281     }
   1282 
   1283     iscsi_co_wait_for_task(&iTask, iscsilun);
   1284 
   1285     if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
   1286         iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
   1287         (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
   1288          iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
   1289         /* WRITE SAME is not supported by the target */
   1290         iscsilun->has_write_same = false;
   1291         scsi_free_scsi_task(iTask.task);
   1292         r = -ENOTSUP;
   1293         goto out_unlock;
   1294     }
   1295 
   1296     if (iTask.task != NULL) {
   1297         scsi_free_scsi_task(iTask.task);
   1298         iTask.task = NULL;
   1299     }
   1300 
   1301     if (iTask.do_retry) {
   1302         iTask.complete = 0;
   1303         goto retry;
   1304     }
   1305 
   1306     if (iTask.status != SCSI_STATUS_GOOD) {
   1307         iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
   1308         error_report("iSCSI WRITESAME10/16 failed at lba %" PRIu64 ": %s",
   1309                      lba, iTask.err_str);
   1310         r = iTask.err_code;
   1311         goto out_unlock;
   1312     }
   1313 
   1314     if (flags & BDRV_REQ_MAY_UNMAP) {
   1315         iscsi_allocmap_set_invalid(iscsilun, offset, bytes);
   1316     } else {
   1317         iscsi_allocmap_set_allocated(iscsilun, offset, bytes);
   1318     }
   1319 
   1320 out_unlock:
   1321     qemu_mutex_unlock(&iscsilun->mutex);
   1322     g_free(iTask.err_str);
   1323     return r;
   1324 }
   1325 
   1326 static void apply_chap(struct iscsi_context *iscsi, QemuOpts *opts,
   1327                        Error **errp)
   1328 {
   1329     const char *user = NULL;
   1330     const char *password = NULL;
   1331     const char *secretid;
   1332     char *secret = NULL;
   1333 
   1334     user = qemu_opt_get(opts, "user");
   1335     if (!user) {
   1336         return;
   1337     }
   1338 
   1339     secretid = qemu_opt_get(opts, "password-secret");
   1340     password = qemu_opt_get(opts, "password");
   1341     if (secretid && password) {
   1342         error_setg(errp, "'password' and 'password-secret' properties are "
   1343                    "mutually exclusive");
   1344         return;
   1345     }
   1346     if (secretid) {
   1347         secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
   1348         if (!secret) {
   1349             return;
   1350         }
   1351         password = secret;
   1352     } else if (!password) {
   1353         error_setg(errp, "CHAP username specified but no password was given");
   1354         return;
   1355     }
   1356 
   1357     if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
   1358         error_setg(errp, "Failed to set initiator username and password");
   1359     }
   1360 
   1361     g_free(secret);
   1362 }
   1363 
   1364 static void apply_header_digest(struct iscsi_context *iscsi, QemuOpts *opts,
   1365                                 Error **errp)
   1366 {
   1367     const char *digest = NULL;
   1368 
   1369     digest = qemu_opt_get(opts, "header-digest");
   1370     if (!digest) {
   1371         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
   1372     } else if (!strcmp(digest, "crc32c")) {
   1373         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
   1374     } else if (!strcmp(digest, "none")) {
   1375         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
   1376     } else if (!strcmp(digest, "crc32c-none")) {
   1377         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
   1378     } else if (!strcmp(digest, "none-crc32c")) {
   1379         iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
   1380     } else {
   1381         error_setg(errp, "Invalid header-digest setting : %s", digest);
   1382     }
   1383 }
   1384 
   1385 static char *get_initiator_name(QemuOpts *opts)
   1386 {
   1387     const char *name;
   1388     char *iscsi_name;
   1389     UuidInfo *uuid_info;
   1390 
   1391     name = qemu_opt_get(opts, "initiator-name");
   1392     if (name) {
   1393         return g_strdup(name);
   1394     }
   1395 
   1396     uuid_info = qmp_query_uuid(NULL);
   1397     if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
   1398         name = qemu_get_vm_name();
   1399     } else {
   1400         name = uuid_info->UUID;
   1401     }
   1402     iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
   1403                                  name ? ":" : "", name ? name : "");
   1404     qapi_free_UuidInfo(uuid_info);
   1405     return iscsi_name;
   1406 }
   1407 
   1408 static void iscsi_nop_timed_event(void *opaque)
   1409 {
   1410     IscsiLun *iscsilun = opaque;
   1411 
   1412     QEMU_LOCK_GUARD(&iscsilun->mutex);
   1413     if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
   1414         error_report("iSCSI: NOP timeout. Reconnecting...");
   1415         iscsilun->request_timed_out = true;
   1416     } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
   1417         error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
   1418         return;
   1419     }
   1420 
   1421     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
   1422     iscsi_set_events(iscsilun);
   1423 }
   1424 
   1425 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
   1426 {
   1427     struct scsi_task *task = NULL;
   1428     struct scsi_readcapacity10 *rc10 = NULL;
   1429     struct scsi_readcapacity16 *rc16 = NULL;
   1430     int retries = ISCSI_CMD_RETRIES; 
   1431 
   1432     do {
   1433         if (task != NULL) {
   1434             scsi_free_scsi_task(task);
   1435             task = NULL;
   1436         }
   1437 
   1438         switch (iscsilun->type) {
   1439         case TYPE_DISK:
   1440             task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
   1441             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
   1442                 rc16 = scsi_datain_unmarshall(task);
   1443                 if (rc16 == NULL) {
   1444                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
   1445                 } else {
   1446                     iscsilun->block_size = rc16->block_length;
   1447                     iscsilun->num_blocks = rc16->returned_lba + 1;
   1448                     iscsilun->lbpme = !!rc16->lbpme;
   1449                     iscsilun->lbprz = !!rc16->lbprz;
   1450                     iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
   1451                 }
   1452                 break;
   1453             }
   1454             if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
   1455                 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
   1456                 break;
   1457             }
   1458             /* Fall through and try READ CAPACITY(10) instead.  */
   1459         case TYPE_ROM:
   1460             task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
   1461             if (task != NULL && task->status == SCSI_STATUS_GOOD) {
   1462                 rc10 = scsi_datain_unmarshall(task);
   1463                 if (rc10 == NULL) {
   1464                     error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
   1465                 } else {
   1466                     iscsilun->block_size = rc10->block_size;
   1467                     if (rc10->lba == 0) {
   1468                         /* blank disk loaded */
   1469                         iscsilun->num_blocks = 0;
   1470                     } else {
   1471                         iscsilun->num_blocks = rc10->lba + 1;
   1472                     }
   1473                 }
   1474             }
   1475             break;
   1476         default:
   1477             return;
   1478         }
   1479     } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
   1480              && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
   1481              && retries-- > 0);
   1482 
   1483     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
   1484         error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
   1485     } else if (!iscsilun->block_size ||
   1486                iscsilun->block_size % BDRV_SECTOR_SIZE) {
   1487         error_setg(errp, "iSCSI: the target returned an invalid "
   1488                    "block size of %d.", iscsilun->block_size);
   1489     }
   1490     if (task) {
   1491         scsi_free_scsi_task(task);
   1492     }
   1493 }
   1494 
   1495 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
   1496                                           int evpd, int pc, void **inq, Error **errp)
   1497 {
   1498     int full_size;
   1499     struct scsi_task *task = NULL;
   1500     task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
   1501     if (task == NULL || task->status != SCSI_STATUS_GOOD) {
   1502         goto fail;
   1503     }
   1504     full_size = scsi_datain_getfullsize(task);
   1505     if (full_size > task->datain.size) {
   1506         scsi_free_scsi_task(task);
   1507 
   1508         /* we need more data for the full list */
   1509         task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
   1510         if (task == NULL || task->status != SCSI_STATUS_GOOD) {
   1511             goto fail;
   1512         }
   1513     }
   1514 
   1515     *inq = scsi_datain_unmarshall(task);
   1516     if (*inq == NULL) {
   1517         error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
   1518         goto fail_with_err;
   1519     }
   1520 
   1521     return task;
   1522 
   1523 fail:
   1524     error_setg(errp, "iSCSI: Inquiry command failed : %s",
   1525                iscsi_get_error(iscsi));
   1526 fail_with_err:
   1527     if (task != NULL) {
   1528         scsi_free_scsi_task(task);
   1529     }
   1530     return NULL;
   1531 }
   1532 
   1533 static void iscsi_detach_aio_context(BlockDriverState *bs)
   1534 {
   1535     IscsiLun *iscsilun = bs->opaque;
   1536 
   1537     aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
   1538                        false, NULL, NULL, NULL, NULL, NULL);
   1539     iscsilun->events = 0;
   1540 
   1541     if (iscsilun->nop_timer) {
   1542         timer_free(iscsilun->nop_timer);
   1543         iscsilun->nop_timer = NULL;
   1544     }
   1545     if (iscsilun->event_timer) {
   1546         timer_free(iscsilun->event_timer);
   1547         iscsilun->event_timer = NULL;
   1548     }
   1549 }
   1550 
   1551 static void iscsi_attach_aio_context(BlockDriverState *bs,
   1552                                      AioContext *new_context)
   1553 {
   1554     IscsiLun *iscsilun = bs->opaque;
   1555 
   1556     iscsilun->aio_context = new_context;
   1557     iscsi_set_events(iscsilun);
   1558 
   1559     /* Set up a timer for sending out iSCSI NOPs */
   1560     iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
   1561                                         QEMU_CLOCK_REALTIME, SCALE_MS,
   1562                                         iscsi_nop_timed_event, iscsilun);
   1563     timer_mod(iscsilun->nop_timer,
   1564               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
   1565 
   1566     /* Set up a timer for periodic calls to iscsi_set_events and to
   1567      * scan for command timeout */
   1568     iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
   1569                                           QEMU_CLOCK_REALTIME, SCALE_MS,
   1570                                           iscsi_timed_check_events, iscsilun);
   1571     timer_mod(iscsilun->event_timer,
   1572               qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
   1573 }
   1574 
   1575 static void iscsi_modesense_sync(IscsiLun *iscsilun)
   1576 {
   1577     struct scsi_task *task;
   1578     struct scsi_mode_sense *ms = NULL;
   1579     iscsilun->write_protected = false;
   1580     iscsilun->dpofua = false;
   1581 
   1582     task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
   1583                                  1, SCSI_MODESENSE_PC_CURRENT,
   1584                                  0x3F, 0, 255);
   1585     if (task == NULL) {
   1586         error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
   1587                      iscsi_get_error(iscsilun->iscsi));
   1588         goto out;
   1589     }
   1590 
   1591     if (task->status != SCSI_STATUS_GOOD) {
   1592         error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
   1593         goto out;
   1594     }
   1595     ms = scsi_datain_unmarshall(task);
   1596     if (!ms) {
   1597         error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
   1598                      iscsi_get_error(iscsilun->iscsi));
   1599         goto out;
   1600     }
   1601     iscsilun->write_protected = ms->device_specific_parameter & 0x80;
   1602     iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
   1603 
   1604 out:
   1605     if (task) {
   1606         scsi_free_scsi_task(task);
   1607     }
   1608 }
   1609 
   1610 static void iscsi_parse_iscsi_option(const char *target, QDict *options)
   1611 {
   1612     QemuOptsList *list;
   1613     QemuOpts *opts;
   1614     const char *user, *password, *password_secret, *initiator_name,
   1615                *header_digest, *timeout;
   1616 
   1617     list = qemu_find_opts("iscsi");
   1618     if (!list) {
   1619         return;
   1620     }
   1621 
   1622     opts = qemu_opts_find(list, target);
   1623     if (opts == NULL) {
   1624         opts = QTAILQ_FIRST(&list->head);
   1625         if (!opts) {
   1626             return;
   1627         }
   1628     }
   1629 
   1630     user = qemu_opt_get(opts, "user");
   1631     if (user) {
   1632         qdict_set_default_str(options, "user", user);
   1633     }
   1634 
   1635     password = qemu_opt_get(opts, "password");
   1636     if (password) {
   1637         qdict_set_default_str(options, "password", password);
   1638     }
   1639 
   1640     password_secret = qemu_opt_get(opts, "password-secret");
   1641     if (password_secret) {
   1642         qdict_set_default_str(options, "password-secret", password_secret);
   1643     }
   1644 
   1645     initiator_name = qemu_opt_get(opts, "initiator-name");
   1646     if (initiator_name) {
   1647         qdict_set_default_str(options, "initiator-name", initiator_name);
   1648     }
   1649 
   1650     header_digest = qemu_opt_get(opts, "header-digest");
   1651     if (header_digest) {
   1652         /* -iscsi takes upper case values, but QAPI only supports lower case
   1653          * enum constant names, so we have to convert here. */
   1654         char *qapi_value = g_ascii_strdown(header_digest, -1);
   1655         qdict_set_default_str(options, "header-digest", qapi_value);
   1656         g_free(qapi_value);
   1657     }
   1658 
   1659     timeout = qemu_opt_get(opts, "timeout");
   1660     if (timeout) {
   1661         qdict_set_default_str(options, "timeout", timeout);
   1662     }
   1663 }
   1664 
   1665 /*
   1666  * We support iscsi url's on the form
   1667  * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
   1668  */
   1669 static void iscsi_parse_filename(const char *filename, QDict *options,
   1670                                  Error **errp)
   1671 {
   1672     struct iscsi_url *iscsi_url;
   1673     const char *transport_name;
   1674     char *lun_str;
   1675 
   1676     iscsi_url = iscsi_parse_full_url(NULL, filename);
   1677     if (iscsi_url == NULL) {
   1678         error_setg(errp, "Failed to parse URL : %s", filename);
   1679         return;
   1680     }
   1681 
   1682 #if LIBISCSI_API_VERSION >= (20160603)
   1683     switch (iscsi_url->transport) {
   1684     case TCP_TRANSPORT:
   1685         transport_name = "tcp";
   1686         break;
   1687     case ISER_TRANSPORT:
   1688         transport_name = "iser";
   1689         break;
   1690     default:
   1691         error_setg(errp, "Unknown transport type (%d)",
   1692                    iscsi_url->transport);
   1693         return;
   1694     }
   1695 #else
   1696     transport_name = "tcp";
   1697 #endif
   1698 
   1699     qdict_set_default_str(options, "transport", transport_name);
   1700     qdict_set_default_str(options, "portal", iscsi_url->portal);
   1701     qdict_set_default_str(options, "target", iscsi_url->target);
   1702 
   1703     lun_str = g_strdup_printf("%d", iscsi_url->lun);
   1704     qdict_set_default_str(options, "lun", lun_str);
   1705     g_free(lun_str);
   1706 
   1707     /* User/password from -iscsi take precedence over those from the URL */
   1708     iscsi_parse_iscsi_option(iscsi_url->target, options);
   1709 
   1710     if (iscsi_url->user[0] != '\0') {
   1711         qdict_set_default_str(options, "user", iscsi_url->user);
   1712         qdict_set_default_str(options, "password", iscsi_url->passwd);
   1713     }
   1714 
   1715     iscsi_destroy_url(iscsi_url);
   1716 }
   1717 
   1718 static QemuOptsList runtime_opts = {
   1719     .name = "iscsi",
   1720     .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
   1721     .desc = {
   1722         {
   1723             .name = "transport",
   1724             .type = QEMU_OPT_STRING,
   1725         },
   1726         {
   1727             .name = "portal",
   1728             .type = QEMU_OPT_STRING,
   1729         },
   1730         {
   1731             .name = "target",
   1732             .type = QEMU_OPT_STRING,
   1733         },
   1734         {
   1735             .name = "user",
   1736             .type = QEMU_OPT_STRING,
   1737         },
   1738         {
   1739             .name = "password",
   1740             .type = QEMU_OPT_STRING,
   1741         },
   1742         {
   1743             .name = "password-secret",
   1744             .type = QEMU_OPT_STRING,
   1745         },
   1746         {
   1747             .name = "lun",
   1748             .type = QEMU_OPT_NUMBER,
   1749         },
   1750         {
   1751             .name = "initiator-name",
   1752             .type = QEMU_OPT_STRING,
   1753         },
   1754         {
   1755             .name = "header-digest",
   1756             .type = QEMU_OPT_STRING,
   1757         },
   1758         {
   1759             .name = "timeout",
   1760             .type = QEMU_OPT_NUMBER,
   1761         },
   1762         { /* end of list */ }
   1763     },
   1764 };
   1765 
   1766 static void iscsi_save_designator(IscsiLun *lun,
   1767                                   struct scsi_inquiry_device_identification *inq_di)
   1768 {
   1769     struct scsi_inquiry_device_designator *desig, *copy = NULL;
   1770 
   1771     for (desig = inq_di->designators; desig; desig = desig->next) {
   1772         if (desig->association ||
   1773             desig->designator_type > SCSI_DESIGNATOR_TYPE_NAA) {
   1774             continue;
   1775         }
   1776         /* NAA works better than T10 vendor ID based designator. */
   1777         if (!copy || copy->designator_type < desig->designator_type) {
   1778             copy = desig;
   1779         }
   1780     }
   1781     if (copy) {
   1782         lun->dd = g_new(struct scsi_inquiry_device_designator, 1);
   1783         *lun->dd = *copy;
   1784         lun->dd->next = NULL;
   1785         lun->dd->designator = g_malloc(copy->designator_length);
   1786         memcpy(lun->dd->designator, copy->designator, copy->designator_length);
   1787     }
   1788 }
   1789 
   1790 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
   1791                       Error **errp)
   1792 {
   1793     IscsiLun *iscsilun = bs->opaque;
   1794     struct iscsi_context *iscsi = NULL;
   1795     struct scsi_task *task = NULL;
   1796     struct scsi_inquiry_standard *inq = NULL;
   1797     struct scsi_inquiry_supported_pages *inq_vpd;
   1798     char *initiator_name = NULL;
   1799     QemuOpts *opts;
   1800     Error *local_err = NULL;
   1801     const char *transport_name, *portal, *target;
   1802 #if LIBISCSI_API_VERSION >= (20160603)
   1803     enum iscsi_transport_type transport;
   1804 #endif
   1805     int i, ret = 0, timeout = 0, lun;
   1806 
   1807     opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
   1808     if (!qemu_opts_absorb_qdict(opts, options, errp)) {
   1809         ret = -EINVAL;
   1810         goto out;
   1811     }
   1812 
   1813     transport_name = qemu_opt_get(opts, "transport");
   1814     portal = qemu_opt_get(opts, "portal");
   1815     target = qemu_opt_get(opts, "target");
   1816     lun = qemu_opt_get_number(opts, "lun", 0);
   1817 
   1818     if (!transport_name || !portal || !target) {
   1819         error_setg(errp, "Need all of transport, portal and target options");
   1820         ret = -EINVAL;
   1821         goto out;
   1822     }
   1823 
   1824     if (!strcmp(transport_name, "tcp")) {
   1825 #if LIBISCSI_API_VERSION >= (20160603)
   1826         transport = TCP_TRANSPORT;
   1827     } else if (!strcmp(transport_name, "iser")) {
   1828         transport = ISER_TRANSPORT;
   1829 #else
   1830         /* TCP is what older libiscsi versions always use */
   1831 #endif
   1832     } else {
   1833         error_setg(errp, "Unknown transport: %s", transport_name);
   1834         ret = -EINVAL;
   1835         goto out;
   1836     }
   1837 
   1838     memset(iscsilun, 0, sizeof(IscsiLun));
   1839 
   1840     initiator_name = get_initiator_name(opts);
   1841 
   1842     iscsi = iscsi_create_context(initiator_name);
   1843     if (iscsi == NULL) {
   1844         error_setg(errp, "iSCSI: Failed to create iSCSI context.");
   1845         ret = -ENOMEM;
   1846         goto out;
   1847     }
   1848 #if LIBISCSI_API_VERSION >= (20160603)
   1849     if (iscsi_init_transport(iscsi, transport)) {
   1850         error_setg(errp, ("Error initializing transport."));
   1851         ret = -EINVAL;
   1852         goto out;
   1853     }
   1854 #endif
   1855     if (iscsi_set_targetname(iscsi, target)) {
   1856         error_setg(errp, "iSCSI: Failed to set target name.");
   1857         ret = -EINVAL;
   1858         goto out;
   1859     }
   1860 
   1861     /* check if we got CHAP username/password via the options */
   1862     apply_chap(iscsi, opts, &local_err);
   1863     if (local_err != NULL) {
   1864         error_propagate(errp, local_err);
   1865         ret = -EINVAL;
   1866         goto out;
   1867     }
   1868 
   1869     if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
   1870         error_setg(errp, "iSCSI: Failed to set session type to normal.");
   1871         ret = -EINVAL;
   1872         goto out;
   1873     }
   1874 
   1875     /* check if we got HEADER_DIGEST via the options */
   1876     apply_header_digest(iscsi, opts, &local_err);
   1877     if (local_err != NULL) {
   1878         error_propagate(errp, local_err);
   1879         ret = -EINVAL;
   1880         goto out;
   1881     }
   1882 
   1883     /* timeout handling is broken in libiscsi before 1.15.0 */
   1884     timeout = qemu_opt_get_number(opts, "timeout", 0);
   1885 #if LIBISCSI_API_VERSION >= 20150621
   1886     iscsi_set_timeout(iscsi, timeout);
   1887 #else
   1888     if (timeout) {
   1889         warn_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
   1890     }
   1891 #endif
   1892 
   1893     if (iscsi_full_connect_sync(iscsi, portal, lun) != 0) {
   1894         error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
   1895             iscsi_get_error(iscsi));
   1896         ret = -EINVAL;
   1897         goto out;
   1898     }
   1899 
   1900     iscsilun->iscsi = iscsi;
   1901     iscsilun->aio_context = bdrv_get_aio_context(bs);
   1902     iscsilun->lun = lun;
   1903     iscsilun->has_write_same = true;
   1904 
   1905     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
   1906                             (void **) &inq, errp);
   1907     if (task == NULL) {
   1908         ret = -EINVAL;
   1909         goto out;
   1910     }
   1911     iscsilun->type = inq->periperal_device_type;
   1912     scsi_free_scsi_task(task);
   1913     task = NULL;
   1914 
   1915     iscsi_modesense_sync(iscsilun);
   1916     if (iscsilun->dpofua) {
   1917         bs->supported_write_flags = BDRV_REQ_FUA;
   1918     }
   1919 
   1920     /* Check the write protect flag of the LUN if we want to write */
   1921     if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
   1922         iscsilun->write_protected) {
   1923         ret = bdrv_apply_auto_read_only(bs, "LUN is write protected", errp);
   1924         if (ret < 0) {
   1925             goto out;
   1926         }
   1927         flags &= ~BDRV_O_RDWR;
   1928     }
   1929 
   1930     iscsi_readcapacity_sync(iscsilun, &local_err);
   1931     if (local_err != NULL) {
   1932         error_propagate(errp, local_err);
   1933         ret = -EINVAL;
   1934         goto out;
   1935     }
   1936     bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
   1937 
   1938     /* We don't have any emulation for devices other than disks and CD-ROMs, so
   1939      * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
   1940      * will try to read from the device to guess the image format.
   1941      */
   1942     if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
   1943         bs->sg = true;
   1944     }
   1945 
   1946     task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
   1947                             SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
   1948                             (void **) &inq_vpd, errp);
   1949     if (task == NULL) {
   1950         ret = -EINVAL;
   1951         goto out;
   1952     }
   1953     for (i = 0; i < inq_vpd->num_pages; i++) {
   1954         struct scsi_task *inq_task;
   1955         struct scsi_inquiry_logical_block_provisioning *inq_lbp;
   1956         struct scsi_inquiry_block_limits *inq_bl;
   1957         struct scsi_inquiry_device_identification *inq_di;
   1958         switch (inq_vpd->pages[i]) {
   1959         case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
   1960             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
   1961                                         SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
   1962                                         (void **) &inq_lbp, errp);
   1963             if (inq_task == NULL) {
   1964                 ret = -EINVAL;
   1965                 goto out;
   1966             }
   1967             memcpy(&iscsilun->lbp, inq_lbp,
   1968                    sizeof(struct scsi_inquiry_logical_block_provisioning));
   1969             scsi_free_scsi_task(inq_task);
   1970             break;
   1971         case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
   1972             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
   1973                                     SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
   1974                                     (void **) &inq_bl, errp);
   1975             if (inq_task == NULL) {
   1976                 ret = -EINVAL;
   1977                 goto out;
   1978             }
   1979             memcpy(&iscsilun->bl, inq_bl,
   1980                    sizeof(struct scsi_inquiry_block_limits));
   1981             scsi_free_scsi_task(inq_task);
   1982             break;
   1983         case SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION:
   1984             inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
   1985                                     SCSI_INQUIRY_PAGECODE_DEVICE_IDENTIFICATION,
   1986                                     (void **) &inq_di, errp);
   1987             if (inq_task == NULL) {
   1988                 ret = -EINVAL;
   1989                 goto out;
   1990             }
   1991             iscsi_save_designator(iscsilun, inq_di);
   1992             scsi_free_scsi_task(inq_task);
   1993             break;
   1994         default:
   1995             break;
   1996         }
   1997     }
   1998     scsi_free_scsi_task(task);
   1999     task = NULL;
   2000 
   2001     qemu_mutex_init(&iscsilun->mutex);
   2002     iscsi_attach_aio_context(bs, iscsilun->aio_context);
   2003 
   2004     /* Guess the internal cluster (page) size of the iscsi target by the means
   2005      * of opt_unmap_gran. Transfer the unmap granularity only if it has a
   2006      * reasonable size */
   2007     if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
   2008         iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
   2009         iscsilun->cluster_size = iscsilun->bl.opt_unmap_gran *
   2010             iscsilun->block_size;
   2011         if (iscsilun->lbprz) {
   2012             ret = iscsi_allocmap_init(iscsilun, flags);
   2013         }
   2014     }
   2015 
   2016     if (iscsilun->lbprz && iscsilun->lbp.lbpws) {
   2017         bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
   2018     }
   2019 
   2020 out:
   2021     qemu_opts_del(opts);
   2022     g_free(initiator_name);
   2023     if (task != NULL) {
   2024         scsi_free_scsi_task(task);
   2025     }
   2026 
   2027     if (ret) {
   2028         if (iscsi != NULL) {
   2029             if (iscsi_is_logged_in(iscsi)) {
   2030                 iscsi_logout_sync(iscsi);
   2031             }
   2032             iscsi_destroy_context(iscsi);
   2033         }
   2034         memset(iscsilun, 0, sizeof(IscsiLun));
   2035     }
   2036 
   2037     return ret;
   2038 }
   2039 
   2040 static void iscsi_close(BlockDriverState *bs)
   2041 {
   2042     IscsiLun *iscsilun = bs->opaque;
   2043     struct iscsi_context *iscsi = iscsilun->iscsi;
   2044 
   2045     iscsi_detach_aio_context(bs);
   2046     if (iscsi_is_logged_in(iscsi)) {
   2047         iscsi_logout_sync(iscsi);
   2048     }
   2049     iscsi_destroy_context(iscsi);
   2050     if (iscsilun->dd) {
   2051         g_free(iscsilun->dd->designator);
   2052         g_free(iscsilun->dd);
   2053     }
   2054     g_free(iscsilun->zeroblock);
   2055     iscsi_allocmap_free(iscsilun);
   2056     qemu_mutex_destroy(&iscsilun->mutex);
   2057     memset(iscsilun, 0, sizeof(IscsiLun));
   2058 }
   2059 
   2060 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
   2061 {
   2062     /* We don't actually refresh here, but just return data queried in
   2063      * iscsi_open(): iscsi targets don't change their limits. */
   2064 
   2065     IscsiLun *iscsilun = bs->opaque;
   2066     uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
   2067     unsigned int block_size = MAX(BDRV_SECTOR_SIZE, iscsilun->block_size);
   2068 
   2069     assert(iscsilun->block_size >= BDRV_SECTOR_SIZE || bdrv_is_sg(bs));
   2070 
   2071     bs->bl.request_alignment = block_size;
   2072 
   2073     if (iscsilun->bl.max_xfer_len) {
   2074         max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
   2075     }
   2076 
   2077     if (max_xfer_len * block_size < INT_MAX) {
   2078         bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
   2079     }
   2080 
   2081     if (iscsilun->lbp.lbpu) {
   2082         bs->bl.max_pdiscard =
   2083             MIN_NON_ZERO(iscsilun->bl.max_unmap * iscsilun->block_size,
   2084                          (uint64_t)UINT32_MAX * iscsilun->block_size);
   2085         bs->bl.pdiscard_alignment =
   2086             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
   2087     } else {
   2088         bs->bl.pdiscard_alignment = iscsilun->block_size;
   2089     }
   2090 
   2091     bs->bl.max_pwrite_zeroes =
   2092         MIN_NON_ZERO(iscsilun->bl.max_ws_len * iscsilun->block_size,
   2093                      max_xfer_len * iscsilun->block_size);
   2094 
   2095     if (iscsilun->lbp.lbpws) {
   2096         bs->bl.pwrite_zeroes_alignment =
   2097             iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
   2098     } else {
   2099         bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
   2100     }
   2101     if (iscsilun->bl.opt_xfer_len &&
   2102         iscsilun->bl.opt_xfer_len < INT_MAX / block_size) {
   2103         bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
   2104                                         iscsilun->block_size);
   2105     }
   2106 }
   2107 
   2108 /* Note that this will not re-establish a connection with an iSCSI target - it
   2109  * is effectively a NOP.  */
   2110 static int iscsi_reopen_prepare(BDRVReopenState *state,
   2111                                 BlockReopenQueue *queue, Error **errp)
   2112 {
   2113     IscsiLun *iscsilun = state->bs->opaque;
   2114 
   2115     if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
   2116         error_setg(errp, "Cannot open a write protected LUN as read-write");
   2117         return -EACCES;
   2118     }
   2119     return 0;
   2120 }
   2121 
   2122 static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
   2123 {
   2124     IscsiLun *iscsilun = reopen_state->bs->opaque;
   2125 
   2126     /* the cache.direct status might have changed */
   2127     if (iscsilun->allocmap != NULL) {
   2128         iscsi_allocmap_init(iscsilun, reopen_state->flags);
   2129     }
   2130 }
   2131 
   2132 static int coroutine_fn iscsi_co_truncate(BlockDriverState *bs, int64_t offset,
   2133                                           bool exact, PreallocMode prealloc,
   2134                                           BdrvRequestFlags flags, Error **errp)
   2135 {
   2136     IscsiLun *iscsilun = bs->opaque;
   2137     int64_t cur_length;
   2138     Error *local_err = NULL;
   2139 
   2140     if (prealloc != PREALLOC_MODE_OFF) {
   2141         error_setg(errp, "Unsupported preallocation mode '%s'",
   2142                    PreallocMode_str(prealloc));
   2143         return -ENOTSUP;
   2144     }
   2145 
   2146     if (iscsilun->type != TYPE_DISK) {
   2147         error_setg(errp, "Cannot resize non-disk iSCSI devices");
   2148         return -ENOTSUP;
   2149     }
   2150 
   2151     iscsi_readcapacity_sync(iscsilun, &local_err);
   2152     if (local_err != NULL) {
   2153         error_propagate(errp, local_err);
   2154         return -EIO;
   2155     }
   2156 
   2157     cur_length = iscsi_getlength(bs);
   2158     if (offset != cur_length && exact) {
   2159         error_setg(errp, "Cannot resize iSCSI devices");
   2160         return -ENOTSUP;
   2161     } else if (offset > cur_length) {
   2162         error_setg(errp, "Cannot grow iSCSI devices");
   2163         return -EINVAL;
   2164     }
   2165 
   2166     if (iscsilun->allocmap != NULL) {
   2167         iscsi_allocmap_init(iscsilun, bs->open_flags);
   2168     }
   2169 
   2170     return 0;
   2171 }
   2172 
   2173 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
   2174 {
   2175     IscsiLun *iscsilun = bs->opaque;
   2176     bdi->cluster_size = iscsilun->cluster_size;
   2177     return 0;
   2178 }
   2179 
   2180 static void coroutine_fn iscsi_co_invalidate_cache(BlockDriverState *bs,
   2181                                                    Error **errp)
   2182 {
   2183     IscsiLun *iscsilun = bs->opaque;
   2184     iscsi_allocmap_invalidate(iscsilun);
   2185 }
   2186 
   2187 static int coroutine_fn iscsi_co_copy_range_from(BlockDriverState *bs,
   2188                                                  BdrvChild *src,
   2189                                                  int64_t src_offset,
   2190                                                  BdrvChild *dst,
   2191                                                  int64_t dst_offset,
   2192                                                  int64_t bytes,
   2193                                                  BdrvRequestFlags read_flags,
   2194                                                  BdrvRequestFlags write_flags)
   2195 {
   2196     return bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
   2197                                  read_flags, write_flags);
   2198 }
   2199 
   2200 static struct scsi_task *iscsi_xcopy_task(int param_len)
   2201 {
   2202     struct scsi_task *task;
   2203 
   2204     task = g_new0(struct scsi_task, 1);
   2205 
   2206     task->cdb[0]     = EXTENDED_COPY;
   2207     task->cdb[10]    = (param_len >> 24) & 0xFF;
   2208     task->cdb[11]    = (param_len >> 16) & 0xFF;
   2209     task->cdb[12]    = (param_len >> 8) & 0xFF;
   2210     task->cdb[13]    = param_len & 0xFF;
   2211     task->cdb_size   = 16;
   2212     task->xfer_dir   = SCSI_XFER_WRITE;
   2213     task->expxferlen = param_len;
   2214 
   2215     return task;
   2216 }
   2217 
   2218 static void iscsi_populate_target_desc(unsigned char *desc, IscsiLun *lun)
   2219 {
   2220     struct scsi_inquiry_device_designator *dd = lun->dd;
   2221 
   2222     memset(desc, 0, 32);
   2223     desc[0] = 0xE4; /* IDENT_DESCR_TGT_DESCR */
   2224     desc[4] = dd->code_set;
   2225     desc[5] = (dd->designator_type & 0xF)
   2226         | ((dd->association & 3) << 4);
   2227     desc[7] = dd->designator_length;
   2228     memcpy(desc + 8, dd->designator, MIN(dd->designator_length, 20));
   2229 
   2230     desc[28] = 0;
   2231     desc[29] = (lun->block_size >> 16) & 0xFF;
   2232     desc[30] = (lun->block_size >> 8) & 0xFF;
   2233     desc[31] = lun->block_size & 0xFF;
   2234 }
   2235 
   2236 static void iscsi_xcopy_desc_hdr(uint8_t *hdr, int dc, int cat, int src_index,
   2237                                  int dst_index)
   2238 {
   2239     hdr[0] = 0x02; /* BLK_TO_BLK_SEG_DESCR */
   2240     hdr[1] = ((dc << 1) | cat) & 0xFF;
   2241     hdr[2] = (XCOPY_BLK2BLK_SEG_DESC_SIZE >> 8) & 0xFF;
   2242     /* don't account for the first 4 bytes in descriptor header*/
   2243     hdr[3] = (XCOPY_BLK2BLK_SEG_DESC_SIZE - 4 /* SEG_DESC_SRC_INDEX_OFFSET */) & 0xFF;
   2244     hdr[4] = (src_index >> 8) & 0xFF;
   2245     hdr[5] = src_index & 0xFF;
   2246     hdr[6] = (dst_index >> 8) & 0xFF;
   2247     hdr[7] = dst_index & 0xFF;
   2248 }
   2249 
   2250 static void iscsi_xcopy_populate_desc(uint8_t *desc, int dc, int cat,
   2251                                       int src_index, int dst_index, int num_blks,
   2252                                       uint64_t src_lba, uint64_t dst_lba)
   2253 {
   2254     iscsi_xcopy_desc_hdr(desc, dc, cat, src_index, dst_index);
   2255 
   2256     /* The caller should verify the request size */
   2257     assert(num_blks < 65536);
   2258     desc[10] = (num_blks >> 8) & 0xFF;
   2259     desc[11] = num_blks & 0xFF;
   2260     desc[12] = (src_lba >> 56) & 0xFF;
   2261     desc[13] = (src_lba >> 48) & 0xFF;
   2262     desc[14] = (src_lba >> 40) & 0xFF;
   2263     desc[15] = (src_lba >> 32) & 0xFF;
   2264     desc[16] = (src_lba >> 24) & 0xFF;
   2265     desc[17] = (src_lba >> 16) & 0xFF;
   2266     desc[18] = (src_lba >> 8) & 0xFF;
   2267     desc[19] = src_lba & 0xFF;
   2268     desc[20] = (dst_lba >> 56) & 0xFF;
   2269     desc[21] = (dst_lba >> 48) & 0xFF;
   2270     desc[22] = (dst_lba >> 40) & 0xFF;
   2271     desc[23] = (dst_lba >> 32) & 0xFF;
   2272     desc[24] = (dst_lba >> 24) & 0xFF;
   2273     desc[25] = (dst_lba >> 16) & 0xFF;
   2274     desc[26] = (dst_lba >> 8) & 0xFF;
   2275     desc[27] = dst_lba & 0xFF;
   2276 }
   2277 
   2278 static void iscsi_xcopy_populate_header(unsigned char *buf, int list_id, int str,
   2279                                         int list_id_usage, int prio,
   2280                                         int tgt_desc_len,
   2281                                         int seg_desc_len, int inline_data_len)
   2282 {
   2283     buf[0] = list_id;
   2284     buf[1] = ((str & 1) << 5) | ((list_id_usage & 3) << 3) | (prio & 7);
   2285     buf[2] = (tgt_desc_len >> 8) & 0xFF;
   2286     buf[3] = tgt_desc_len & 0xFF;
   2287     buf[8] = (seg_desc_len >> 24) & 0xFF;
   2288     buf[9] = (seg_desc_len >> 16) & 0xFF;
   2289     buf[10] = (seg_desc_len >> 8) & 0xFF;
   2290     buf[11] = seg_desc_len & 0xFF;
   2291     buf[12] = (inline_data_len >> 24) & 0xFF;
   2292     buf[13] = (inline_data_len >> 16) & 0xFF;
   2293     buf[14] = (inline_data_len >> 8) & 0xFF;
   2294     buf[15] = inline_data_len & 0xFF;
   2295 }
   2296 
   2297 static void iscsi_xcopy_data(struct iscsi_data *data,
   2298                              IscsiLun *src, int64_t src_lba,
   2299                              IscsiLun *dst, int64_t dst_lba,
   2300                              uint16_t num_blocks)
   2301 {
   2302     uint8_t *buf;
   2303     const int src_offset = XCOPY_DESC_OFFSET;
   2304     const int dst_offset = XCOPY_DESC_OFFSET + IDENT_DESCR_TGT_DESCR_SIZE;
   2305     const int seg_offset = dst_offset + IDENT_DESCR_TGT_DESCR_SIZE;
   2306 
   2307     data->size = XCOPY_DESC_OFFSET +
   2308                  IDENT_DESCR_TGT_DESCR_SIZE * 2 +
   2309                  XCOPY_BLK2BLK_SEG_DESC_SIZE;
   2310     data->data = g_malloc0(data->size);
   2311     buf = data->data;
   2312 
   2313     /* Initialise the parameter list header */
   2314     iscsi_xcopy_populate_header(buf, 1, 0, 2 /* LIST_ID_USAGE_DISCARD */,
   2315                                 0, 2 * IDENT_DESCR_TGT_DESCR_SIZE,
   2316                                 XCOPY_BLK2BLK_SEG_DESC_SIZE,
   2317                                 0);
   2318 
   2319     /* Initialise CSCD list with one src + one dst descriptor */
   2320     iscsi_populate_target_desc(&buf[src_offset], src);
   2321     iscsi_populate_target_desc(&buf[dst_offset], dst);
   2322 
   2323     /* Initialise one segment descriptor */
   2324     iscsi_xcopy_populate_desc(&buf[seg_offset], 0, 0, 0, 1, num_blocks,
   2325                               src_lba, dst_lba);
   2326 }
   2327 
   2328 static int coroutine_fn iscsi_co_copy_range_to(BlockDriverState *bs,
   2329                                                BdrvChild *src,
   2330                                                int64_t src_offset,
   2331                                                BdrvChild *dst,
   2332                                                int64_t dst_offset,
   2333                                                int64_t bytes,
   2334                                                BdrvRequestFlags read_flags,
   2335                                                BdrvRequestFlags write_flags)
   2336 {
   2337     IscsiLun *dst_lun = dst->bs->opaque;
   2338     IscsiLun *src_lun;
   2339     struct IscsiTask iscsi_task;
   2340     struct iscsi_data data;
   2341     int r = 0;
   2342     int block_size;
   2343 
   2344     if (src->bs->drv->bdrv_co_copy_range_to != iscsi_co_copy_range_to) {
   2345         return -ENOTSUP;
   2346     }
   2347     src_lun = src->bs->opaque;
   2348 
   2349     if (!src_lun->dd || !dst_lun->dd) {
   2350         return -ENOTSUP;
   2351     }
   2352     if (!is_byte_request_lun_aligned(dst_offset, bytes, dst_lun)) {
   2353         return -ENOTSUP;
   2354     }
   2355     if (!is_byte_request_lun_aligned(src_offset, bytes, src_lun)) {
   2356         return -ENOTSUP;
   2357     }
   2358     if (dst_lun->block_size != src_lun->block_size ||
   2359         !dst_lun->block_size) {
   2360         return -ENOTSUP;
   2361     }
   2362 
   2363     block_size = dst_lun->block_size;
   2364     if (bytes / block_size > 65535) {
   2365         return -ENOTSUP;
   2366     }
   2367 
   2368     iscsi_xcopy_data(&data,
   2369                      src_lun, src_offset / block_size,
   2370                      dst_lun, dst_offset / block_size,
   2371                      bytes / block_size);
   2372 
   2373     iscsi_co_init_iscsitask(dst_lun, &iscsi_task);
   2374 
   2375     qemu_mutex_lock(&dst_lun->mutex);
   2376     iscsi_task.task = iscsi_xcopy_task(data.size);
   2377 retry:
   2378     if (iscsi_scsi_command_async(dst_lun->iscsi, dst_lun->lun,
   2379                                  iscsi_task.task, iscsi_co_generic_cb,
   2380                                  &data,
   2381                                  &iscsi_task) != 0) {
   2382         r = -EIO;
   2383         goto out_unlock;
   2384     }
   2385 
   2386     iscsi_co_wait_for_task(&iscsi_task, dst_lun);
   2387 
   2388     if (iscsi_task.do_retry) {
   2389         iscsi_task.complete = 0;
   2390         goto retry;
   2391     }
   2392 
   2393     if (iscsi_task.status != SCSI_STATUS_GOOD) {
   2394         r = iscsi_task.err_code;
   2395         goto out_unlock;
   2396     }
   2397 
   2398 out_unlock:
   2399 
   2400     trace_iscsi_xcopy(src_lun, src_offset, dst_lun, dst_offset, bytes, r);
   2401     g_free(iscsi_task.task);
   2402     qemu_mutex_unlock(&dst_lun->mutex);
   2403     g_free(iscsi_task.err_str);
   2404     return r;
   2405 }
   2406 
   2407 
   2408 static const char *const iscsi_strong_runtime_opts[] = {
   2409     "transport",
   2410     "portal",
   2411     "target",
   2412     "user",
   2413     "password",
   2414     "password-secret",
   2415     "lun",
   2416     "initiator-name",
   2417     "header-digest",
   2418 
   2419     NULL
   2420 };
   2421 
   2422 static BlockDriver bdrv_iscsi = {
   2423     .format_name     = "iscsi",
   2424     .protocol_name   = "iscsi",
   2425 
   2426     .instance_size          = sizeof(IscsiLun),
   2427     .bdrv_parse_filename    = iscsi_parse_filename,
   2428     .bdrv_file_open         = iscsi_open,
   2429     .bdrv_close             = iscsi_close,
   2430     .bdrv_co_create_opts    = bdrv_co_create_opts_simple,
   2431     .create_opts            = &bdrv_create_opts_simple,
   2432     .bdrv_reopen_prepare    = iscsi_reopen_prepare,
   2433     .bdrv_reopen_commit     = iscsi_reopen_commit,
   2434     .bdrv_co_invalidate_cache = iscsi_co_invalidate_cache,
   2435 
   2436     .bdrv_getlength  = iscsi_getlength,
   2437     .bdrv_get_info   = iscsi_get_info,
   2438     .bdrv_co_truncate    = iscsi_co_truncate,
   2439     .bdrv_refresh_limits = iscsi_refresh_limits,
   2440 
   2441     .bdrv_co_block_status  = iscsi_co_block_status,
   2442     .bdrv_co_pdiscard      = iscsi_co_pdiscard,
   2443     .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
   2444     .bdrv_co_copy_range_to  = iscsi_co_copy_range_to,
   2445     .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
   2446     .bdrv_co_readv         = iscsi_co_readv,
   2447     .bdrv_co_writev        = iscsi_co_writev,
   2448     .bdrv_co_flush_to_disk = iscsi_co_flush,
   2449 
   2450 #ifdef __linux__
   2451     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
   2452 #endif
   2453 
   2454     .bdrv_detach_aio_context = iscsi_detach_aio_context,
   2455     .bdrv_attach_aio_context = iscsi_attach_aio_context,
   2456 
   2457     .strong_runtime_opts = iscsi_strong_runtime_opts,
   2458 };
   2459 
   2460 #if LIBISCSI_API_VERSION >= (20160603)
   2461 static BlockDriver bdrv_iser = {
   2462     .format_name     = "iser",
   2463     .protocol_name   = "iser",
   2464 
   2465     .instance_size          = sizeof(IscsiLun),
   2466     .bdrv_parse_filename    = iscsi_parse_filename,
   2467     .bdrv_file_open         = iscsi_open,
   2468     .bdrv_close             = iscsi_close,
   2469     .bdrv_co_create_opts    = bdrv_co_create_opts_simple,
   2470     .create_opts            = &bdrv_create_opts_simple,
   2471     .bdrv_reopen_prepare    = iscsi_reopen_prepare,
   2472     .bdrv_reopen_commit     = iscsi_reopen_commit,
   2473     .bdrv_co_invalidate_cache  = iscsi_co_invalidate_cache,
   2474 
   2475     .bdrv_getlength  = iscsi_getlength,
   2476     .bdrv_get_info   = iscsi_get_info,
   2477     .bdrv_co_truncate    = iscsi_co_truncate,
   2478     .bdrv_refresh_limits = iscsi_refresh_limits,
   2479 
   2480     .bdrv_co_block_status  = iscsi_co_block_status,
   2481     .bdrv_co_pdiscard      = iscsi_co_pdiscard,
   2482     .bdrv_co_copy_range_from = iscsi_co_copy_range_from,
   2483     .bdrv_co_copy_range_to  = iscsi_co_copy_range_to,
   2484     .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
   2485     .bdrv_co_readv         = iscsi_co_readv,
   2486     .bdrv_co_writev        = iscsi_co_writev,
   2487     .bdrv_co_flush_to_disk = iscsi_co_flush,
   2488 
   2489 #ifdef __linux__
   2490     .bdrv_aio_ioctl   = iscsi_aio_ioctl,
   2491 #endif
   2492 
   2493     .bdrv_detach_aio_context = iscsi_detach_aio_context,
   2494     .bdrv_attach_aio_context = iscsi_attach_aio_context,
   2495 
   2496     .strong_runtime_opts = iscsi_strong_runtime_opts,
   2497 };
   2498 #endif
   2499 
   2500 static void iscsi_block_init(void)
   2501 {
   2502     bdrv_register(&bdrv_iscsi);
   2503 #if LIBISCSI_API_VERSION >= (20160603)
   2504     bdrv_register(&bdrv_iser);
   2505 #endif
   2506 }
   2507 
   2508 block_init(iscsi_block_init);