diff --git a/include/libvfio-user.h b/include/libvfio-user.h index 9b814208..76181ad2 100644 --- a/include/libvfio-user.h +++ b/include/libvfio-user.h @@ -697,7 +697,9 @@ vfu_addr_to_sgl(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr, size_t len, * vfu_sgl_put(). * * This is only supported when a @dma_unregister callback is provided to - * vfu_setup_device_dma(). + * vfu_setup_device_dma(). In addition, the client must have registered the + * relevant DMA regions to allow mmap()-ed access and supplied file + * descriptors, otherwise this function will fail with EFAULT. * * @vfu_ctx: the libvfio-user context * @sgl: array of scatter/gather entries returned by vfu_addr_to_sg. These @@ -738,12 +740,19 @@ vfu_sgl_mark_dirty(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t cnt); void vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, struct iovec *iov, size_t cnt); +/* Flags to vfu_sgl_read() and vfu_sgl_write(). */ + +/* Perform direct access via file descriptor, if possible. */ +#define VFU_SGL_DIRECT_ACCESS (1) + /** * Read from the dma region exposed by the client. This can be used as an * alternative to reading from a vfu_sgl_get() mapping, if the region is not * directly mappable, or DMA notification callbacks have not been provided. * - * The implementation involves a round-trip communication with the client. + * When the VFU_SGL_DIRECT_ACCESS flag is specified, the DMA operation is + * performed against a client-supplied file descriptor, if applicable. + * Otherwise, the operation incurs an IPC round-trip with the client. * * Note that currently, only one @sg entry is supported (@sg_cnt must be 1). * @@ -751,7 +760,7 @@ vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, struct iovec *iov, size_t cnt); * @sg: array of scatter/gather entries * @sg_cnt: number of scatter/gather entries * @data: data buffer to read - * @flags: must be 0 + * @flags: VFU_SGL_* flags * * @returns 0 on success, -1 on failure. Sets errno. */ @@ -764,18 +773,21 @@ vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, size_t sg_cnt, * alternative to reading from a vfu_sgl_get() mapping, if the region is not * directly mappable, or DMA notification callbacks have not been provided. * - * The implementation involves a round-trip communication with the client. + * When the VFU_SGL_DIRECT_ACCESS flag is specified, the DMA operation is + * performed against a client-supplied file descriptor, if applicable. + * Otherwise, the operation incurs an IPC round-trip with the client. * * Note that currently, only one @sg entry is supported (@sg_cnt must be 1). * - * During live migration, this call does not mark any of the written pages as - * dirty; the client is expected to track this. + * During live migration, this call does only mark the written pages as dirty + * if the access is performed directly against a file descriptor. For access + * via IPC, the client is expected to track which pages have been written. * * @vfu_ctx: the libvfio-user context * @sg: array of scatter/gather entries * @sg_cnt: number of scatter/gather entries * @data: data buffer to write - * @flags: must be 0 + * @flags: VFU_SGL_* flags * * @returns 0 on success, -1 on failure. Sets errno. */ diff --git a/include/vfio-user.h b/include/vfio-user.h index 0b115d32..eb1b1414 100644 --- a/include/vfio-user.h +++ b/include/vfio-user.h @@ -122,6 +122,8 @@ struct vfio_user_dma_map { uint32_t argsz; #define VFIO_USER_F_DMA_REGION_READ (1 << 0) #define VFIO_USER_F_DMA_REGION_WRITE (1 << 1) +#define VFIO_USER_F_DMA_REGION_MMAP (1 << 2) +#define VFIO_USER_F_DMA_REGION_FILE_IO (1 << 3) uint32_t flags; uint64_t offset; uint64_t addr; diff --git a/lib/dma.c b/lib/dma.c index 724c170d..c0bea6db 100644 --- a/lib/dma.c +++ b/lib/dma.c @@ -52,7 +52,7 @@ dma_sg_size(void) bool dma_sg_is_mappable(const dma_sg_t *sg) { - return sg->region->info.vaddr != NULL; + return sg->region->access_mode == REGION_ACCESS_MODE_MMAP; } static inline ssize_t @@ -69,7 +69,7 @@ fd_get_blocksize(int fd) static int dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize) { - assert(region->fd != -1); + assert(region->access_mode != REGION_ACCESS_MODE_MSG); ssize_t size = get_bitmap_size(region->info.iova.iov_len, pgsize); if (size < 0) { @@ -158,13 +158,42 @@ MOCK_DEFINE(dma_controller_unmap_region)(dma_controller_t *dma, region->fd, region->info.mapping.iov_base, iov_end(®ion->info.mapping)); } +} + +static void +dma_controller_destroy_region(dma_controller_t *dma, dma_memory_region_t *region, + vfu_dma_unregister_cb_t *dma_unregister, + void *data) +{ + int err; - assert(region->fd != -1); + assert(dma != NULL); + assert(region != NULL); - dirty_page_logging_stop_on_region(region); + if (dma_unregister != NULL) { + dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER; + dma_unregister(data, ®ion->info); + dma->vfu_ctx->in_cb = CB_NONE; + } + + switch (region->access_mode) { + case REGION_ACCESS_MODE_MSG: + assert(region->fd == -1); + break; + case REGION_ACCESS_MODE_MMAP: + if (region->info.vaddr != NULL) { + dma_controller_unmap_region(dma, region); + } + /* fall through */ + case REGION_ACCESS_MODE_FILE_IO: + err = fd_cache_put(®ion->fd); + assert(err == 0); + break; + } - err = fd_cache_put(®ion->fd); - assert(err == 0); + dma_controller_increment_regions_generation(dma); + dirty_page_logging_stop_on_region(region); + free(region); } /* FIXME not thread safe */ @@ -190,21 +219,8 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, return ERROR_INT(ENOENT); } - if (dma_unregister != NULL) { - dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER; - dma_unregister(data, ®ion->info); - dma->vfu_ctx->in_cb = CB_NONE; - } - - if (region->info.vaddr != NULL) { - dma_controller_unmap_region(dma, region); - } else { - assert(region->fd == -1); - } - btree_iter_remove(&iter); - dma_controller_increment_regions_generation(dma); - free(region); + dma_controller_destroy_region(dma, region, dma_unregister, data); return 0; } @@ -228,19 +244,7 @@ dma_controller_remove_all_regions(dma_controller_t *dma, region->info.vaddr, region->info.mapping.iov_base, iov_end(®ion->info.mapping)); - if (dma_unregister != NULL) { - dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER; - dma_unregister(data, ®ion->info); - dma->vfu_ctx->in_cb = CB_NONE; - } - - if (region->info.vaddr != NULL) { - dma_controller_unmap_region(dma, region); - } else { - assert(region->fd == -1); - } - - free(region); + dma_controller_destroy_region(dma, region, dma_unregister, data); } } @@ -289,7 +293,8 @@ dma_map_region(dma_controller_t *dma, dma_memory_region_t *region) dma_memory_region_t * MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size, - int fd, off_t offset, uint32_t prot) + int *fd, off_t offset, uint32_t prot, + enum region_access_mode access_mode) { dma_memory_region_t *existing = NULL; dma_memory_region_t *region = NULL; @@ -297,12 +302,11 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, int page_size = 0; char rstr[1024]; int ret = 0; - int err; assert(dma != NULL); snprintf(rstr, sizeof(rstr), "[%p, %p) fd=%d offset=%#llx prot=%#x", - dma_addr, dma_addr + size, fd, (ull_t)offset, prot); + dma_addr, dma_addr + size, *fd, (ull_t)offset, prot); if (size > dma->max_size) { vfu_log(dma->vfu_ctx, LOG_ERR, "DMA region size %llu > max %zu", @@ -322,7 +326,7 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, (ull_t)existing->offset); return ERROR_PTR(EINVAL); } - if (fd_cache_is_same_file(existing->fd, fd) != 0) { + if (fd_cache_is_same_file(existing->fd, *fd) != 0) { vfu_log(dma->vfu_ctx, LOG_ERR, "bad fd for new DMA region %s; " "existing=%d", rstr, existing->fd); return ERROR_PTR(EINVAL); @@ -332,7 +336,7 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, "%s; existing=%#x", rstr, existing->info.prot); return ERROR_PTR(EINVAL); } - close_safely(&fd); + close_safely(fd); return existing; } @@ -353,82 +357,81 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, return ERROR_PTR(EINVAL); } - if (fd != -1) { - page_size = fd_get_blocksize(fd); - if (page_size < 0) { - vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %d", page_size); - return ERROR_PTR(EINVAL); - } - - fd = fd_cache_get(fd); - if (fd == -1) { - vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to de-duplicate fd for new DMA region %s: %m", - rstr); - return NULL; - } - } - page_size = MAX(page_size, getpagesize()); - region = calloc(1, sizeof(*region)); if (region == NULL) { - errno = ENOMEM; - goto rollback; + vfu_log(dma->vfu_ctx, LOG_ERR, "failed to allocate region entry"); + return ERROR_PTR(ENOMEM); } region->info.iova.iov_base = (void *)dma_addr; region->info.iova.iov_len = size; - region->info.page_size = page_size; region->info.prot = prot; + region->info.page_size = getpagesize(); + region->access_mode = access_mode; region->offset = offset; - region->fd = fd; + region->fd = -1; - if (fd != -1) { - /* - * TODO introduce a function that tells whether dirty page logging is - * enabled - */ - if (dma->dirty_pgsize != 0) { - if (dirty_page_logging_start_on_region(region, dma->dirty_pgsize) < 0) { - /* - * TODO We don't necessarily have to fail, we can continue - * and fail the get dirty page bitmap request later. - */ - goto rollback; + switch (access_mode) { + case REGION_ACCESS_MODE_MSG: + assert(*fd == -1); + break; + case REGION_ACCESS_MODE_MMAP: + case REGION_ACCESS_MODE_FILE_IO: + assert(*fd != -1); + region->fd = fd_cache_get(*fd); + if (region->fd == -1) { + vfu_log(dma->vfu_ctx, LOG_ERR, + "failed to de-duplicate fd for new DMA region %s: %m", + rstr); + goto destroy_region; } - } + *fd = -1; /* The fd is now owned by the cache */ - ret = dma_map_region(dma, region); + page_size = fd_get_blocksize(region->fd); + if (page_size < 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %d", page_size); + errno = EINVAL; + goto destroy_region; + } + region->info.page_size = MAX(region->info.page_size, (size_t)page_size); - if (ret != 0) { - vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to memory map DMA region %s: %m", rstr); - goto rollback; - } else { - /* Ownership of the fd is now with the region. */ - fd = -1; - } + /* + * TODO introduce a function that tells whether dirty page logging is + * enabled + */ + if (dma->dirty_pgsize != 0) { + if (dirty_page_logging_start_on_region(region, dma->dirty_pgsize) < 0) { + /* + * TODO We don't necessarily have to fail, we can continue + * and fail the get dirty page bitmap request later. + */ + goto destroy_region; + } + } + + if (access_mode == REGION_ACCESS_MODE_MMAP) { + ret = dma_map_region(dma, region); + + if (ret != 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, + "failed to memory map DMA region %s: %m", rstr); + goto destroy_region; + } + } + break; } if (btree_iter_insert(&iter, (uintptr_t)dma_addr + size - 1, region) != 0) { - goto rollback; + goto destroy_region; } dma_controller_increment_regions_generation(dma); return region; -rollback: +destroy_region: ret = errno; - if (region != NULL) { - if (region->info.vaddr != NULL) { - dma_controller_unmap_region(dma, region); - } - dirty_page_logging_stop_on_region(region); - free(region); - } - err = fd_cache_put(&fd); - assert(err == 0); + dma_controller_destroy_region(dma, region, NULL, NULL); return ERROR_PTR(ret); } @@ -513,7 +516,7 @@ dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize) for (btree_iter_init(&dma->regions, 0, &iter); (region = btree_iter_get(&iter, NULL)) != NULL; btree_iter_next(&iter)) { - if (region->fd == -1) { + if (region->access_mode == REGION_ACCESS_MODE_MSG) { continue; } @@ -776,8 +779,8 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, region = sg.region; - if (region->fd == -1) { - vfu_log(dma->vfu_ctx, LOG_ERR, "region [%p-%p] is not mapped", + if (region->access_mode == REGION_ACCESS_MODE_MSG) { + vfu_log(dma->vfu_ctx, LOG_ERR, "region [%p-%p] isn't accessed directly", region->info.iova.iov_base, iov_end(®ion->info.iova)); return ERROR_INT(EINVAL); } diff --git a/lib/dma.h b/lib/dma.h index 90200a61..890143f5 100644 --- a/lib/dma.h +++ b/lib/dma.h @@ -85,9 +85,20 @@ struct vfu_ctx; +/* Designates how to access a DMA target region */ +enum region_access_mode { + /* Use VFIO_USER_DMA_{READ,WRITE} messages on socket */ + REGION_ACCESS_MODE_MSG, + /* mmap() provided file descriptor */ + REGION_ACCESS_MODE_MMAP, + /* pread()/pwrite() on provided file descriptor */ + REGION_ACCESS_MODE_FILE_IO, +}; + typedef struct { vfu_dma_info_t info; - int fd; // File descriptor to mmap + enum region_access_mode access_mode; + int fd; // File descriptor for direct access off_t offset; // File offset uint8_t *dirty_bitmap; // Dirty page bitmap } dma_memory_region_t; @@ -127,7 +138,8 @@ dma_controller_destroy(dma_controller_t *dma); */ MOCK_DECLARE(dma_memory_region_t *, dma_controller_add_region, dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size, - int fd, off_t offset, uint32_t prot); + int *fd, off_t offset, uint32_t prot, + enum region_access_mode access_mode); MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma, vfu_dma_addr_t dma_addr, size_t size, @@ -294,9 +306,10 @@ dma_sgl_get(dma_controller_t *dma, dma_sg_t *sgl, struct iovec *iov, size_t cnt) return ERROR_INT(EINVAL); } - if (sg->region->info.vaddr == NULL) { + if (sg->region->access_mode != REGION_ACCESS_MODE_MMAP) { return ERROR_INT(EFAULT); } + assert(sg->region->info.vaddr != NULL); #ifdef DEBUG_SGL vfu_log(dma->vfu_ctx, LOG_DEBUG, "map %p-%p", diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 21a4545f..407061a7 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -676,6 +676,7 @@ int handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, struct vfio_user_dma_map *dma_map) { + enum region_access_mode access_mode; dma_memory_region_t *region; char rstr[1024]; int fd = -1; @@ -699,6 +700,14 @@ handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, vfu_log(vfu_ctx, LOG_DEBUG, "adding DMA region %s", rstr); + if (msg->in.nr_fds > 0) { + fd = consume_fd(msg->in.fds, msg->in.nr_fds, 0); + if (fd < 0) { + vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr); + return -1; + } + } + if (dma_map->flags & VFIO_USER_F_DMA_REGION_READ) { prot |= PROT_READ; dma_map->flags &= ~VFIO_USER_F_DMA_REGION_READ; @@ -709,22 +718,37 @@ handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, dma_map->flags &= ~VFIO_USER_F_DMA_REGION_WRITE; } + if (dma_map->flags & VFIO_USER_F_DMA_REGION_MMAP) { + if (fd == -1) { + vfu_log(vfu_ctx, LOG_ERR, + "file descriptor required for MMAP DMA region"); + return ERROR_INT(EINVAL); + } + access_mode = REGION_ACCESS_MODE_MMAP; + dma_map->flags &= ~VFIO_USER_F_DMA_REGION_MMAP; + } else if (dma_map->flags & VFIO_USER_F_DMA_REGION_FILE_IO) { + if (fd == -1) { + vfu_log(vfu_ctx, LOG_ERR, + "file descriptor required for FILE_IO DMA region"); + return ERROR_INT(EINVAL); + } + access_mode = REGION_ACCESS_MODE_FILE_IO; + dma_map->flags &= ~VFIO_USER_F_DMA_REGION_FILE_IO; + } else { + /* Compatibility: default to mmap()-ed access if an fd is provided. */ + access_mode = + fd != -1 ? REGION_ACCESS_MODE_MMAP : REGION_ACCESS_MODE_MSG; + } + if (dma_map->flags != 0) { vfu_log(vfu_ctx, LOG_ERR, "bad flags=%#x", dma_map->flags); + close_safely(&fd); return ERROR_INT(EINVAL); } - if (msg->in.nr_fds > 0) { - fd = consume_fd(msg->in.fds, msg->in.nr_fds, 0); - if (fd < 0) { - vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr); - return -1; - } - } - region = dma_controller_add_region( vfu_ctx->dma, (vfu_dma_addr_t)(uintptr_t)dma_map->addr, dma_map->size, - fd, dma_map->offset, prot); + &fd, dma_map->offset, prot, access_mode); if (region == NULL) { vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr); close_safely(&fd); @@ -2372,7 +2396,7 @@ vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, static int vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, - dma_sg_t *sg, void *data) + int flags, dma_sg_t *sg, void *data) { struct vfio_user_dma_region_access *dma_reply; struct vfio_user_dma_region_access *dma_req; @@ -2391,6 +2415,48 @@ vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, return ERROR_INT(EPERM); } + if (flags & VFU_SGL_DIRECT_ACCESS) { + if (sg->region->access_mode == REGION_ACCESS_MODE_MMAP) { + void *target, *src, *dst; + assert(sg->region->info.vaddr != NULL); + target = sg->region->info.vaddr + sg->offset; + src = cmd == VFIO_USER_DMA_READ ? target : data; + dst = cmd == VFIO_USER_DMA_READ ? data : target; + if (cmd == VFIO_USER_DMA_WRITE) { + dma_sgl_mark_dirty(vfu_ctx->dma, sg, 1); + } + memcpy(dst, src, sg->length); + return 0; + } + + if (sg->region->access_mode == REGION_ACCESS_MODE_FILE_IO) { + size_t length, offset; + assert(sg->region->fd != -1); + length = sg->length; + offset = sg->offset + sg->region->offset; + if (cmd == VFIO_USER_DMA_WRITE) { + dma_sgl_mark_dirty(vfu_ctx->dma, sg, 1); + } + while (length > 0) { + ssize_t ret; + if (cmd == VFIO_USER_DMA_READ) { + ret = pread(sg->region->fd, data, length, offset); + } else { + ret = pwrite(sg->region->fd, data, length, offset); + } + if (ret <= 0) { + return ERROR_INT(EIO); + } + data += ret; + offset += ret; + length -= ret; + } + return 0; + } + + assert(sg->region->access_mode == REGION_ACCESS_MODE_MSG); + } + rlen = sizeof(struct vfio_user_dma_region_access) + MIN(sg->length, vfu_ctx->client_max_data_xfer_size); @@ -2473,7 +2539,7 @@ vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, { assert(vfu_ctx->pending.state == VFU_CTX_PENDING_NONE); - if (flags != 0) { + if (flags & ~VFU_SGL_DIRECT_ACCESS) { return ERROR_INT(EINVAL); } @@ -2482,7 +2548,7 @@ vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, return ERROR_INT(ENOTSUP); } - return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sgl, data); + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, flags, sgl, data); } EXPORT int @@ -2491,7 +2557,7 @@ vfu_sgl_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, { assert(vfu_ctx->pending.state == VFU_CTX_PENDING_NONE); - if (flags != 0) { + if (flags & ~VFU_SGL_DIRECT_ACCESS) { return ERROR_INT(EINVAL); } @@ -2500,7 +2566,7 @@ vfu_sgl_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, return ERROR_INT(ENOTSUP); } - return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sgl, data); + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, flags, sgl, data); } EXPORT bool diff --git a/test/mocks.c b/test/mocks.c index 6b413526..35d47204 100644 --- a/test/mocks.c +++ b/test/mocks.c @@ -112,13 +112,13 @@ unpatch_all(void) } dma_memory_region_t * -dma_controller_add_region(dma_controller_t *dma, void *dma_addr, - uint64_t size, int fd, off_t offset, - uint32_t prot) +dma_controller_add_region(dma_controller_t *dma, void *dma_addr, uint64_t size, + int *fd, off_t offset, uint32_t prot, + enum region_access_mode access_mode) { if (!is_patched("dma_controller_add_region")) { return __real_dma_controller_add_region(dma, dma_addr, size, fd, offset, - prot); + prot, access_mode); } check_expected_ptr(dma); @@ -127,6 +127,7 @@ dma_controller_add_region(dma_controller_t *dma, void *dma_addr, check_expected_int(fd); check_expected_int(offset); check_expected_uint(prot); + check_expected_int(access_mode); errno = mock(); return mock_ptr_type(dma_memory_region_t *); } diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py index e756780d..0efbc592 100644 --- a/test/py/libvfio_user.py +++ b/test/py/libvfio_user.py @@ -214,6 +214,8 @@ def is_32bit(): VFIO_USER_F_DMA_REGION_READ = (1 << 0) VFIO_USER_F_DMA_REGION_WRITE = (1 << 1) +VFIO_USER_F_DMA_REGION_MMAP = (1 << 2) +VFIO_USER_F_DMA_REGION_FILE_IO = (1 << 3) VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP = (1 << 0) @@ -279,6 +281,8 @@ def is_32bit(): VFU_MIGR_CALLBACKS_VERS = 2 +VFU_SGL_DIRECT_ACCESS = 1 + SOCK_PATH = b"/tmp/vfio-user.sock.%d" % os.getpid() topdir = os.path.realpath(os.path.dirname(__file__) + "/../..") diff --git a/test/py/test_sgl_read_write.py b/test/py/test_sgl_read_write.py index 4fd31b62..6ec4779a 100644 --- a/test/py/test_sgl_read_write.py +++ b/test/py/test_sgl_read_write.py @@ -29,7 +29,10 @@ # from libvfio_user import * +import mmap +import pytest import select +import tempfile import threading MAP_ADDR = 0x10000000 @@ -39,6 +42,11 @@ client = None +class Counter: + def __init__(self): + self.value = 0 + + class DMARegionHandler: """ A helper to service DMA region accesses arriving over a socket. Accesses @@ -46,7 +54,7 @@ class DMARegionHandler: takes place on a separate thread so as to not block the test code. """ - def __handle_requests(sock, pipe, buf, lock, addr, error_no): + def __handle_requests(sock, pipe, buf, counter, lock, addr, error_no): while True: (ready, _, _) = select.select([sock, pipe], [], []) if pipe in ready: @@ -63,6 +71,7 @@ def __handle_requests(sock, pipe, buf, lock, addr, error_no): offset = access.addr - addr with lock: + counter.value += 1 if cmd == VFIO_USER_DMA_READ: data = buf[offset:offset + access.count] else: @@ -80,16 +89,20 @@ def __handle_requests(sock, pipe, buf, lock, addr, error_no): sock.close() def __init__(self, sock, addr, size, error_no=0): - self.data = bytearray(size) + self.file = tempfile.TemporaryFile() + self.file.truncate(size) + self.data = mmap.mmap(self.file.fileno(), 0) self.data_lock = threading.Lock() self.addr = addr (pipe_r, self.pipe_w) = os.pipe() # Duplicate the socket file descriptor so the thread can own it and # make sure it gets closed only when terminating the thread. sock = socket.socket(fileno=os.dup(sock.fileno())) + self.counter = Counter() thread = threading.Thread( target=DMARegionHandler.__handle_requests, - args=[sock, pipe_r, self.data, self.data_lock, addr, error_no]) + args=[sock, pipe_r, self.data, self.counter, self.data_lock, addr, + error_no]) thread.start() def shutdown(self): @@ -102,7 +115,15 @@ def read(self, addr, size): return self.data[offset:offset + size] -def setup_function(function): +access_mode_flags = [ + 0, + VFIO_USER_F_DMA_REGION_MMAP, + VFIO_USER_F_DMA_REGION_FILE_IO, +] + + +@pytest.fixture(autouse=True, params=access_mode_flags) +def flags(request): global ctx, client, dma_handler ctx = prepare_ctx_for_dma() assert ctx is not None @@ -117,26 +138,35 @@ def setup_function(function): client = connect_client(ctx, caps) assert client.client_cmd_socket is not None + dma_handler = DMARegionHandler(client.client_cmd_socket, MAP_ADDR, + MAP_SIZE) + + flags = (request.param | VFIO_USER_F_DMA_REGION_READ + | VFIO_USER_F_DMA_REGION_WRITE) payload = vfio_user_dma_map(argsz=len(vfio_user_dma_map()), - flags=(VFIO_USER_F_DMA_REGION_READ - | VFIO_USER_F_DMA_REGION_WRITE), + flags=flags, offset=0, addr=MAP_ADDR, size=MAP_SIZE) - msg(ctx, client.sock, VFIO_USER_DMA_MAP, payload) + if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO): + fds = [dma_handler.file.fileno()] + else: + fds = [] - dma_handler = DMARegionHandler(client.client_cmd_socket, payload.addr, - payload.size) + msg(ctx, client.sock, VFIO_USER_DMA_MAP, payload, fds=fds) + + return flags def teardown_function(function): + global ctx, client, dma_handler dma_handler.shutdown() client.disconnect(ctx) vfu_destroy_ctx(ctx) -def test_dma_read_write(): +def test_dma_read_write(flags): ret, sg = vfu_addr_to_sgl(ctx, dma_addr=MAP_ADDR + 0x1000, length=64, @@ -145,13 +175,19 @@ def test_dma_read_write(): assert ret == 1 data = bytearray([x & 0xff for x in range(0, sg[0].length)]) - assert vfu_sgl_write(ctx, sg, 1, data, 0) == 0 + assert vfu_sgl_write(ctx, sg, 1, data, VFU_SGL_DIRECT_ACCESS) == 0 - assert vfu_sgl_read(ctx, sg, 1, 0) == (0, data) + assert vfu_sgl_read(ctx, sg, 1, VFU_SGL_DIRECT_ACCESS) == (0, data) assert dma_handler.read(sg[0].dma_addr + sg[0].offset, sg[0].length) == data + # Make sure we only see DMA IPC when there is no other option. + if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO): + assert dma_handler.counter.value == 0 + else: + assert dma_handler.counter.value == 2 + def test_dma_read_write_large(): ret, sg = vfu_addr_to_sgl(ctx, @@ -170,7 +206,10 @@ def test_dma_read_write_large(): sg[0].length) == data -def test_dma_read_write_error(): +def test_dma_read_write_error(flags): + if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO): + pytest.skip("Can't generate error for direct file access modes") + # Reinitialize the handler to return EIO. global dma_handler dma_handler.shutdown() diff --git a/test/unit-tests.c b/test/unit-tests.c index 361112c5..7377052b 100644 --- a/test/unit-tests.c +++ b/test/unit-tests.c @@ -145,6 +145,13 @@ test_dma_map_mappable_without_fd(void **state UNUSED) assert_int_equal(0, ret); } +static int check_fd_ptr(const uintmax_t value, const uintmax_t fd_val) +{ + int *fd_ptr = (int*)value; + int expected_fd = (int)fd_val; + return fd_ptr != NULL && *fd_ptr == expected_fd; +} + static void test_dma_map_without_fd(void **state UNUSED) { @@ -162,9 +169,11 @@ test_dma_map_without_fd(void **state UNUSED) expect_uint_value(dma_controller_add_region, dma, (uintptr_t)vfu_ctx.dma); expect_uint_value(dma_controller_add_region, dma_addr, (uintptr_t)dma_map.addr); expect_uint_value(dma_controller_add_region, size, dma_map.size); - expect_int_value(dma_controller_add_region, fd, -1); + expect_check(dma_controller_add_region, fd, check_fd_ptr, -1); expect_int_value(dma_controller_add_region, offset, dma_map.offset); expect_uint_value(dma_controller_add_region, prot, PROT_NONE); + expect_uint_value(dma_controller_add_region, access_mode, + REGION_ACCESS_MODE_MSG); ret = handle_dma_map(&vfu_ctx, mkmsg(VFIO_USER_DMA_MAP, &dma_map, sizeof(dma_map)), &dma_map); @@ -252,9 +261,11 @@ test_dma_map_return_value(void **state UNUSED) expect_uint_value(dma_controller_add_region, dma, (uintptr_t)vfu_ctx.dma); expect_uint_value(dma_controller_add_region, dma_addr, (uintptr_t) dma_map.addr); expect_uint_value(dma_controller_add_region, size, dma_map.size); - expect_int_value(dma_controller_add_region, fd, -1); + expect_check(dma_controller_add_region, fd, check_fd_ptr, -1); expect_int_value(dma_controller_add_region, offset, dma_map.offset); expect_uint_value(dma_controller_add_region, prot, PROT_NONE); + expect_uint_value(dma_controller_add_region, access_mode, + REGION_ACCESS_MODE_MSG); will_return(dma_controller_add_region, 0); will_return(dma_controller_add_region, 2); @@ -324,8 +335,8 @@ test_dma_controller_add_region_no_fd(void **state UNUSED) size_t size = 0; int fd = -1; - r = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd, offset, - PROT_NONE); + r = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd, offset, + PROT_NONE, REGION_ACCESS_MODE_MSG); assert_non_null(r); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); assert_ptr_equal(NULL, r->info.vaddr); @@ -358,8 +369,8 @@ test_dma_controller_add_region_fd_deduplication(void **state UNUSED) fd2 = dup(fd1); assert_int_not_equal(-1, fd2); - r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset, - PROT_NONE); + r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd1, offset, + PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r1); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); assert_ptr_not_equal(NULL, r1->info.vaddr); @@ -375,8 +386,9 @@ test_dma_controller_add_region_fd_deduplication(void **state UNUSED) * Add another mapping for the same file and verify that the file * descriptor gets de-duplicated. */ - r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr + size, size, fd2, - offset + size, PROT_NONE); + r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr + size, size, &fd2, + offset + size, PROT_NONE, + REGION_ACCESS_MODE_MMAP); assert_non_null(r2); assert_int_equal(2, btree_size(&vfu_ctx.dma->regions)); assert_int_equal(r1->fd, r2->fd); @@ -400,14 +412,14 @@ test_dma_controller_add_region_twice(void **state UNUSED) fd2 = dup(fd1); assert_int_not_equal(-1, fd2); - r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset, - PROT_NONE); + r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd1, offset, + PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r1); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); /* Once more to confirm that identical regions are accepted. */ - r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd2, offset, - PROT_NONE); + r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd2, offset, + PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r2); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); assert_ptr_equal(r1, r2); @@ -422,6 +434,9 @@ test_dma_controller_remove_region_mapped(void **state UNUSED) .info.mapping.iov_base = (void *)0xcafebabe, .info.mapping.iov_len = 0x1000, .info.vaddr = (void *)0xcafebabe, + .access_mode = REGION_ACCESS_MODE_MMAP, + /* Cheating a bit with the fd to avoid opening a file */ + .fd = -1, }; stage_dma_region(vfu_ctx.dma, ®ion); @@ -470,12 +485,14 @@ test_dma_addr_to_sgl(void **state UNUSED) .info.iova.iov_base = (void *)0x1000, .info.iova.iov_len = 0x4000, .info.vaddr = (void *)0xdeadbeef, + .access_mode = REGION_ACCESS_MODE_MMAP, }, { .info.iova.iov_base = (void *)0x5000, .info.iova.iov_len = 0x2000, .info.vaddr = (void *)0xcafebabe, .info.prot = PROT_WRITE, + .access_mode = REGION_ACCESS_MODE_MMAP, }, };