forked from mirror/qemu
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
354 lines
9.9 KiB
C
354 lines
9.9 KiB
C
/*
|
|
* DMA helper functions
|
|
*
|
|
* Copyright (c) 2009,2020 Red Hat
|
|
*
|
|
* This work is licensed under the terms of the GNU General Public License
|
|
* (GNU GPL), version 2 or later.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "sysemu/block-backend.h"
|
|
#include "sysemu/dma.h"
|
|
#include "trace/trace-root.h"
|
|
#include "qemu/thread.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "sysemu/cpu-timers.h"
|
|
#include "qemu/range.h"
|
|
|
|
/* #define DEBUG_IOMMU */
|
|
|
|
MemTxResult dma_memory_set(AddressSpace *as, dma_addr_t addr,
|
|
uint8_t c, dma_addr_t len, MemTxAttrs attrs)
|
|
{
|
|
dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
|
|
|
|
return address_space_set(as, addr, c, len, attrs);
|
|
}
|
|
|
|
void qemu_sglist_init(QEMUSGList *qsg, DeviceState *dev, int alloc_hint,
|
|
AddressSpace *as)
|
|
{
|
|
qsg->sg = g_new(ScatterGatherEntry, alloc_hint);
|
|
qsg->nsg = 0;
|
|
qsg->nalloc = alloc_hint;
|
|
qsg->size = 0;
|
|
qsg->as = as;
|
|
qsg->dev = dev;
|
|
object_ref(OBJECT(dev));
|
|
}
|
|
|
|
void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
|
|
{
|
|
if (qsg->nsg == qsg->nalloc) {
|
|
qsg->nalloc = 2 * qsg->nalloc + 1;
|
|
qsg->sg = g_renew(ScatterGatherEntry, qsg->sg, qsg->nalloc);
|
|
}
|
|
qsg->sg[qsg->nsg].base = base;
|
|
qsg->sg[qsg->nsg].len = len;
|
|
qsg->size += len;
|
|
++qsg->nsg;
|
|
}
|
|
|
|
void qemu_sglist_destroy(QEMUSGList *qsg)
|
|
{
|
|
object_unref(OBJECT(qsg->dev));
|
|
g_free(qsg->sg);
|
|
memset(qsg, 0, sizeof(*qsg));
|
|
}
|
|
|
|
typedef struct {
|
|
BlockAIOCB common;
|
|
AioContext *ctx;
|
|
BlockAIOCB *acb;
|
|
QEMUSGList *sg;
|
|
uint32_t align;
|
|
uint64_t offset;
|
|
DMADirection dir;
|
|
int sg_cur_index;
|
|
dma_addr_t sg_cur_byte;
|
|
QEMUIOVector iov;
|
|
QEMUBH *bh;
|
|
DMAIOFunc *io_func;
|
|
void *io_func_opaque;
|
|
} DMAAIOCB;
|
|
|
|
static void dma_blk_cb(void *opaque, int ret);
|
|
|
|
static void reschedule_dma(void *opaque)
|
|
{
|
|
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
|
|
|
assert(!dbs->acb && dbs->bh);
|
|
qemu_bh_delete(dbs->bh);
|
|
dbs->bh = NULL;
|
|
dma_blk_cb(dbs, 0);
|
|
}
|
|
|
|
static void dma_blk_unmap(DMAAIOCB *dbs)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < dbs->iov.niov; ++i) {
|
|
dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
|
|
dbs->iov.iov[i].iov_len, dbs->dir,
|
|
dbs->iov.iov[i].iov_len);
|
|
}
|
|
qemu_iovec_reset(&dbs->iov);
|
|
}
|
|
|
|
static void dma_complete(DMAAIOCB *dbs, int ret)
|
|
{
|
|
trace_dma_complete(dbs, ret, dbs->common.cb);
|
|
|
|
assert(!dbs->acb && !dbs->bh);
|
|
dma_blk_unmap(dbs);
|
|
if (dbs->common.cb) {
|
|
dbs->common.cb(dbs->common.opaque, ret);
|
|
}
|
|
qemu_iovec_destroy(&dbs->iov);
|
|
qemu_aio_unref(dbs);
|
|
}
|
|
|
|
static void dma_blk_cb(void *opaque, int ret)
|
|
{
|
|
DMAAIOCB *dbs = (DMAAIOCB *)opaque;
|
|
dma_addr_t cur_addr, cur_len;
|
|
void *mem;
|
|
|
|
trace_dma_blk_cb(dbs, ret);
|
|
|
|
dbs->acb = NULL;
|
|
dbs->offset += dbs->iov.size;
|
|
|
|
if (dbs->sg_cur_index == dbs->sg->nsg || ret < 0) {
|
|
dma_complete(dbs, ret);
|
|
return;
|
|
}
|
|
dma_blk_unmap(dbs);
|
|
|
|
while (dbs->sg_cur_index < dbs->sg->nsg) {
|
|
cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
|
|
cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
|
|
mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir,
|
|
MEMTXATTRS_UNSPECIFIED);
|
|
/*
|
|
* Make reads deterministic in icount mode. Windows sometimes issues
|
|
* disk read requests with overlapping SGs. It leads
|
|
* to non-determinism, because resulting buffer contents may be mixed
|
|
* from several sectors. This code splits all SGs into several
|
|
* groups. SGs in every group do not overlap.
|
|
*/
|
|
if (mem && icount_enabled() && dbs->dir == DMA_DIRECTION_FROM_DEVICE) {
|
|
int i;
|
|
for (i = 0 ; i < dbs->iov.niov ; ++i) {
|
|
if (ranges_overlap((intptr_t)dbs->iov.iov[i].iov_base,
|
|
dbs->iov.iov[i].iov_len, (intptr_t)mem,
|
|
cur_len)) {
|
|
dma_memory_unmap(dbs->sg->as, mem, cur_len,
|
|
dbs->dir, cur_len);
|
|
mem = NULL;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!mem)
|
|
break;
|
|
qemu_iovec_add(&dbs->iov, mem, cur_len);
|
|
dbs->sg_cur_byte += cur_len;
|
|
if (dbs->sg_cur_byte == dbs->sg->sg[dbs->sg_cur_index].len) {
|
|
dbs->sg_cur_byte = 0;
|
|
++dbs->sg_cur_index;
|
|
}
|
|
}
|
|
|
|
if (dbs->iov.size == 0) {
|
|
trace_dma_map_wait(dbs);
|
|
dbs->bh = aio_bh_new(dbs->ctx, reschedule_dma, dbs);
|
|
cpu_register_map_client(dbs->bh);
|
|
return;
|
|
}
|
|
|
|
if (!QEMU_IS_ALIGNED(dbs->iov.size, dbs->align)) {
|
|
qemu_iovec_discard_back(&dbs->iov,
|
|
QEMU_ALIGN_DOWN(dbs->iov.size, dbs->align));
|
|
}
|
|
|
|
aio_context_acquire(dbs->ctx);
|
|
dbs->acb = dbs->io_func(dbs->offset, &dbs->iov,
|
|
dma_blk_cb, dbs, dbs->io_func_opaque);
|
|
aio_context_release(dbs->ctx);
|
|
assert(dbs->acb);
|
|
}
|
|
|
|
static void dma_aio_cancel(BlockAIOCB *acb)
|
|
{
|
|
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
|
|
|
|
trace_dma_aio_cancel(dbs);
|
|
|
|
assert(!(dbs->acb && dbs->bh));
|
|
if (dbs->acb) {
|
|
/* This will invoke dma_blk_cb. */
|
|
blk_aio_cancel_async(dbs->acb);
|
|
return;
|
|
}
|
|
|
|
if (dbs->bh) {
|
|
cpu_unregister_map_client(dbs->bh);
|
|
qemu_bh_delete(dbs->bh);
|
|
dbs->bh = NULL;
|
|
}
|
|
if (dbs->common.cb) {
|
|
dbs->common.cb(dbs->common.opaque, -ECANCELED);
|
|
}
|
|
}
|
|
|
|
static AioContext *dma_get_aio_context(BlockAIOCB *acb)
|
|
{
|
|
DMAAIOCB *dbs = container_of(acb, DMAAIOCB, common);
|
|
|
|
return dbs->ctx;
|
|
}
|
|
|
|
static const AIOCBInfo dma_aiocb_info = {
|
|
.aiocb_size = sizeof(DMAAIOCB),
|
|
.cancel_async = dma_aio_cancel,
|
|
.get_aio_context = dma_get_aio_context,
|
|
};
|
|
|
|
BlockAIOCB *dma_blk_io(AioContext *ctx,
|
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
|
DMAIOFunc *io_func, void *io_func_opaque,
|
|
BlockCompletionFunc *cb,
|
|
void *opaque, DMADirection dir)
|
|
{
|
|
DMAAIOCB *dbs = qemu_aio_get(&dma_aiocb_info, NULL, cb, opaque);
|
|
|
|
trace_dma_blk_io(dbs, io_func_opaque, offset, (dir == DMA_DIRECTION_TO_DEVICE));
|
|
|
|
dbs->acb = NULL;
|
|
dbs->sg = sg;
|
|
dbs->ctx = ctx;
|
|
dbs->offset = offset;
|
|
dbs->align = align;
|
|
dbs->sg_cur_index = 0;
|
|
dbs->sg_cur_byte = 0;
|
|
dbs->dir = dir;
|
|
dbs->io_func = io_func;
|
|
dbs->io_func_opaque = io_func_opaque;
|
|
dbs->bh = NULL;
|
|
qemu_iovec_init(&dbs->iov, sg->nsg);
|
|
dma_blk_cb(dbs, 0);
|
|
return &dbs->common;
|
|
}
|
|
|
|
|
|
static
|
|
BlockAIOCB *dma_blk_read_io_func(int64_t offset, QEMUIOVector *iov,
|
|
BlockCompletionFunc *cb, void *cb_opaque,
|
|
void *opaque)
|
|
{
|
|
BlockBackend *blk = opaque;
|
|
return blk_aio_preadv(blk, offset, iov, 0, cb, cb_opaque);
|
|
}
|
|
|
|
BlockAIOCB *dma_blk_read(BlockBackend *blk,
|
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
|
void (*cb)(void *opaque, int ret), void *opaque)
|
|
{
|
|
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
|
|
dma_blk_read_io_func, blk, cb, opaque,
|
|
DMA_DIRECTION_FROM_DEVICE);
|
|
}
|
|
|
|
static
|
|
BlockAIOCB *dma_blk_write_io_func(int64_t offset, QEMUIOVector *iov,
|
|
BlockCompletionFunc *cb, void *cb_opaque,
|
|
void *opaque)
|
|
{
|
|
BlockBackend *blk = opaque;
|
|
return blk_aio_pwritev(blk, offset, iov, 0, cb, cb_opaque);
|
|
}
|
|
|
|
BlockAIOCB *dma_blk_write(BlockBackend *blk,
|
|
QEMUSGList *sg, uint64_t offset, uint32_t align,
|
|
void (*cb)(void *opaque, int ret), void *opaque)
|
|
{
|
|
return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
|
|
dma_blk_write_io_func, blk, cb, opaque,
|
|
DMA_DIRECTION_TO_DEVICE);
|
|
}
|
|
|
|
|
|
static MemTxResult dma_buf_rw(void *buf, dma_addr_t len, dma_addr_t *residual,
|
|
QEMUSGList *sg, DMADirection dir,
|
|
MemTxAttrs attrs)
|
|
{
|
|
uint8_t *ptr = buf;
|
|
dma_addr_t xresidual;
|
|
int sg_cur_index;
|
|
MemTxResult res = MEMTX_OK;
|
|
|
|
xresidual = sg->size;
|
|
sg_cur_index = 0;
|
|
len = MIN(len, xresidual);
|
|
while (len > 0) {
|
|
ScatterGatherEntry entry = sg->sg[sg_cur_index++];
|
|
dma_addr_t xfer = MIN(len, entry.len);
|
|
res |= dma_memory_rw(sg->as, entry.base, ptr, xfer, dir, attrs);
|
|
ptr += xfer;
|
|
len -= xfer;
|
|
xresidual -= xfer;
|
|
}
|
|
|
|
if (residual) {
|
|
*residual = xresidual;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
MemTxResult dma_buf_read(void *ptr, dma_addr_t len, dma_addr_t *residual,
|
|
QEMUSGList *sg, MemTxAttrs attrs)
|
|
{
|
|
return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_FROM_DEVICE, attrs);
|
|
}
|
|
|
|
MemTxResult dma_buf_write(void *ptr, dma_addr_t len, dma_addr_t *residual,
|
|
QEMUSGList *sg, MemTxAttrs attrs)
|
|
{
|
|
return dma_buf_rw(ptr, len, residual, sg, DMA_DIRECTION_TO_DEVICE, attrs);
|
|
}
|
|
|
|
void dma_acct_start(BlockBackend *blk, BlockAcctCookie *cookie,
|
|
QEMUSGList *sg, enum BlockAcctType type)
|
|
{
|
|
block_acct_start(blk_get_stats(blk), cookie, sg->size, type);
|
|
}
|
|
|
|
uint64_t dma_aligned_pow2_mask(uint64_t start, uint64_t end, int max_addr_bits)
|
|
{
|
|
uint64_t max_mask = UINT64_MAX, addr_mask = end - start;
|
|
uint64_t alignment_mask, size_mask;
|
|
|
|
if (max_addr_bits != 64) {
|
|
max_mask = (1ULL << max_addr_bits) - 1;
|
|
}
|
|
|
|
alignment_mask = start ? (start & -start) - 1 : max_mask;
|
|
alignment_mask = MIN(alignment_mask, max_mask);
|
|
size_mask = MIN(addr_mask, max_mask);
|
|
|
|
if (alignment_mask <= size_mask) {
|
|
/* Increase the alignment of start */
|
|
return alignment_mask;
|
|
} else {
|
|
/* Find the largest page mask from size */
|
|
if (addr_mask == UINT64_MAX) {
|
|
return UINT64_MAX;
|
|
}
|
|
return (1ULL << (63 - clz64(addr_mask + 1))) - 1;
|
|
}
|
|
}
|
|
|