From a9695564107940e29605152f2d45f5c6764a242d Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Thu, 9 Apr 2026 05:20:10 -0700 Subject: [PATCH 1/8] Introduce explicit DMA region access mode This adds an enum to convey how a DMA region is to be accessed. Previously, this was implicit in the presence of a file descriptor in the DMA region tracking information. With this change, a new file I/O access mode is added that designates accesses should happen via file I/O syscalls, such as `pread()`/`pwrite()`. This is useful for file descriptors that don't support `mmap()`, such as `/proc//mem`. The access mode to be used is determined from new flag bits in the VFIO_USER_DMA_MAP message, and the message handler makes sure the passed flags and file descriptor presence are consistent. Existing code is updated to use the access mode in conditionals rather than testing file descriptor presence. Signed-off-by: Mattias Nissler --- include/vfio-user.h | 2 ++ lib/dma.c | 48 ++++++++++++++++++++++----------------------- lib/dma.h | 18 ++++++++++++++--- lib/libvfio-user.c | 32 +++++++++++++++++++++++------- test/mocks.c | 9 +++++---- test/unit-tests.c | 20 ++++++++++++++----- 6 files changed, 85 insertions(+), 44 deletions(-) diff --git a/include/vfio-user.h b/include/vfio-user.h index 0b115d32..eb1b1414 100644 --- a/include/vfio-user.h +++ b/include/vfio-user.h @@ -122,6 +122,8 @@ struct vfio_user_dma_map { uint32_t argsz; #define VFIO_USER_F_DMA_REGION_READ (1 << 0) #define VFIO_USER_F_DMA_REGION_WRITE (1 << 1) +#define VFIO_USER_F_DMA_REGION_MMAP (1 << 2) +#define VFIO_USER_F_DMA_REGION_FILE_IO (1 << 3) uint32_t flags; uint64_t offset; uint64_t addr; diff --git a/lib/dma.c b/lib/dma.c index 334d258c..1c5906ab 100644 --- a/lib/dma.c +++ b/lib/dma.c @@ -52,7 +52,7 @@ dma_sg_size(void) bool dma_sg_is_mappable(const dma_sg_t *sg) { - return sg->region->info.vaddr != NULL; + return sg->region->access_mode == REGION_ACCESS_MODE_MMAP; } static inline ssize_t @@ -132,11 +132,6 @@ MOCK_DEFINE(dma_controller_unmap_region)(dma_controller_t *dma, region->fd, region->info.mapping.iov_base, iov_end(®ion->info.mapping)); } - - assert(region->fd != -1); - - err = fd_cache_put(®ion->fd); - assert(err == 0); } /* FIXME not thread safe */ @@ -148,6 +143,7 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, { dma_memory_region_t *region; btree_iter_t iter; + int err; assert(dma != NULL); @@ -170,12 +166,12 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, if (region->info.vaddr != NULL) { dma_controller_unmap_region(dma, region); - } else { - assert(region->fd == -1); } btree_iter_remove(&iter); dma_controller_increment_regions_generation(dma); + err = fd_cache_put(®ion->fd); + assert(err == 0); free(region); return 0; @@ -188,6 +184,7 @@ dma_controller_remove_all_regions(dma_controller_t *dma, { dma_memory_region_t *region = NULL; btree_iter_t iter; + int err; assert(dma != NULL); @@ -208,10 +205,10 @@ dma_controller_remove_all_regions(dma_controller_t *dma, if (region->info.vaddr != NULL) { dma_controller_unmap_region(dma, region); - } else { - assert(region->fd == -1); } + err = fd_cache_put(®ion->fd); + assert(err == 0); free(region); } } @@ -261,7 +258,7 @@ dma_map_region(dma_controller_t *dma, dma_memory_region_t *region) static int dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize) { - assert(region->fd != -1); + assert(region->access_mode != REGION_ACCESS_MODE_MSG); ssize_t size = get_bitmap_size(region->info.iova.iov_len, pgsize); if (size < 0) { @@ -278,7 +275,8 @@ dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize) dma_memory_region_t * MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size, - int fd, off_t offset, uint32_t prot) + int fd, off_t offset, uint32_t prot, + enum region_access_mode access_mode) { dma_memory_region_t *existing = NULL; dma_memory_region_t *region = NULL; @@ -342,7 +340,7 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, return ERROR_PTR(EINVAL); } - if (fd != -1) { + if (access_mode != REGION_ACCESS_MODE_MSG) { page_size = fd_get_blocksize(fd); if (page_size < 0) { vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %d", page_size); @@ -369,10 +367,11 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, region->info.iova.iov_len = size; region->info.page_size = page_size; region->info.prot = prot; + region->access_mode = access_mode; region->offset = offset; region->fd = fd; - if (fd != -1) { + if (access_mode != REGION_ACCESS_MODE_MSG) { /* * TODO introduce a function that tells whether dirty page logging is * enabled @@ -387,15 +386,14 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, } } - ret = dma_map_region(dma, region); + if (access_mode == REGION_ACCESS_MODE_MMAP) { + ret = dma_map_region(dma, region); - if (ret != 0) { - vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to memory map DMA region %s: %m", rstr); - goto rollback; - } else { - /* Ownership of the fd is now with the region. */ - fd = -1; + if (ret != 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, + "failed to memory map DMA region %s: %m", rstr); + goto rollback; + } } } @@ -503,7 +501,7 @@ dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize) for (btree_iter_init(&dma->regions, 0, &iter); (region = btree_iter_get(&iter, NULL)) != NULL; btree_iter_next(&iter)) { - if (region->fd == -1) { + if (region->access_mode == REGION_ACCESS_MODE_MSG) { continue; } @@ -766,8 +764,8 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, region = sg.region; - if (region->fd == -1) { - vfu_log(dma->vfu_ctx, LOG_ERR, "region [%p-%p] is not mapped", + if (region->access_mode == REGION_ACCESS_MODE_MSG) { + vfu_log(dma->vfu_ctx, LOG_ERR, "region [%p-%p] isn't accessed directly", region->info.iova.iov_base, iov_end(®ion->info.iova)); return ERROR_INT(EINVAL); } diff --git a/lib/dma.h b/lib/dma.h index ea80ae71..1054af63 100644 --- a/lib/dma.h +++ b/lib/dma.h @@ -85,9 +85,20 @@ struct vfu_ctx; +/* Designates how to access a DMA target region */ +enum region_access_mode { + /* Use VFIO_USER_DMA_{READ,WRITE} messages on socket */ + REGION_ACCESS_MODE_MSG, + /* mmap() provided file descriptor */ + REGION_ACCESS_MODE_MMAP, + /* pread()/pwrite() on provided file descriptor */ + REGION_ACCESS_MODE_FILE_IO, +}; + typedef struct { vfu_dma_info_t info; - int fd; // File descriptor to mmap + enum region_access_mode access_mode; + int fd; // File descriptor for direct access off_t offset; // File offset uint8_t *dirty_bitmap; // Dirty page bitmap } dma_memory_region_t; @@ -127,7 +138,8 @@ dma_controller_destroy(dma_controller_t *dma); */ MOCK_DECLARE(dma_memory_region_t *, dma_controller_add_region, dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size, - int fd, off_t offset, uint32_t prot); + int fd, off_t offset, uint32_t prot, + enum region_access_mode access_mode); MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma, vfu_dma_addr_t dma_addr, size_t size, @@ -293,7 +305,7 @@ dma_sgl_get(dma_controller_t *dma, dma_sg_t *sgl, struct iovec *iov, size_t cnt) return ERROR_INT(EINVAL); } - if (sg->region->info.vaddr == NULL) { + if (sg->region->access_mode != REGION_ACCESS_MODE_MMAP) { return ERROR_INT(EFAULT); } diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 35b65ea7..14f1152a 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -676,6 +676,7 @@ int handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, struct vfio_user_dma_map *dma_map) { + enum region_access_mode access_mode; dma_memory_region_t *region; char rstr[1024]; int fd = -1; @@ -699,6 +700,14 @@ handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, vfu_log(vfu_ctx, LOG_DEBUG, "adding DMA region %s", rstr); + if (msg->in.nr_fds > 0) { + fd = consume_fd(msg->in.fds, msg->in.nr_fds, 0); + if (fd < 0) { + vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr); + return -1; + } + } + if (dma_map->flags & VFIO_USER_F_DMA_REGION_READ) { prot |= PROT_READ; dma_map->flags &= ~VFIO_USER_F_DMA_REGION_READ; @@ -709,22 +718,31 @@ handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, dma_map->flags &= ~VFIO_USER_F_DMA_REGION_WRITE; } + /* If not specified, default to mmap()-ed access if an fd is provided. */ + access_mode = fd != -1 ? REGION_ACCESS_MODE_MMAP : REGION_ACCESS_MODE_MSG; + if (dma_map->flags & VFIO_USER_F_DMA_REGION_MMAP) { + access_mode = REGION_ACCESS_MODE_MMAP; + dma_map->flags &= ~VFIO_USER_F_DMA_REGION_MMAP; + } else if (dma_map->flags & VFIO_USER_F_DMA_REGION_FILE_IO) { + access_mode = REGION_ACCESS_MODE_FILE_IO; + dma_map->flags &= ~VFIO_USER_F_DMA_REGION_FILE_IO; + } + if (dma_map->flags != 0) { vfu_log(vfu_ctx, LOG_ERR, "bad flags=%#x", dma_map->flags); + close_safely(&fd); return ERROR_INT(EINVAL); } - if (msg->in.nr_fds > 0) { - fd = consume_fd(msg->in.fds, msg->in.nr_fds, 0); - if (fd < 0) { - vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr); - return -1; - } + if (access_mode != REGION_ACCESS_MODE_MSG && fd == -1) { + vfu_log(vfu_ctx, LOG_ERR, "file descriptor required for mode=%u", + access_mode); + return ERROR_INT(EINVAL); } region = dma_controller_add_region( vfu_ctx->dma, (vfu_dma_addr_t)(uintptr_t)dma_map->addr, dma_map->size, - fd, dma_map->offset, prot); + fd, dma_map->offset, prot, access_mode); if (region == NULL) { vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr); close_safely(&fd); diff --git a/test/mocks.c b/test/mocks.c index 3d3f5bed..d7bd456a 100644 --- a/test/mocks.c +++ b/test/mocks.c @@ -112,13 +112,13 @@ unpatch_all(void) } dma_memory_region_t * -dma_controller_add_region(dma_controller_t *dma, void *dma_addr, - uint64_t size, int fd, off_t offset, - uint32_t prot) +dma_controller_add_region(dma_controller_t *dma, void *dma_addr, uint64_t size, + int fd, off_t offset, uint32_t prot, + enum region_access_mode access_mode) { if (!is_patched("dma_controller_add_region")) { return __real_dma_controller_add_region(dma, dma_addr, size, fd, offset, - prot); + prot, access_mode); } check_expected_ptr(dma); @@ -127,6 +127,7 @@ dma_controller_add_region(dma_controller_t *dma, void *dma_addr, check_expected(fd); check_expected(offset); check_expected(prot); + check_expected(access_mode); errno = mock(); return mock_ptr_type(dma_memory_region_t *); } diff --git a/test/unit-tests.c b/test/unit-tests.c index 2c1acb68..2d676ea9 100644 --- a/test/unit-tests.c +++ b/test/unit-tests.c @@ -165,6 +165,8 @@ test_dma_map_without_fd(void **state UNUSED) expect_value(dma_controller_add_region, fd, -1); expect_value(dma_controller_add_region, offset, dma_map.offset); expect_value(dma_controller_add_region, prot, PROT_NONE); + expect_value(dma_controller_add_region, access_mode, + REGION_ACCESS_MODE_MSG); ret = handle_dma_map(&vfu_ctx, mkmsg(VFIO_USER_DMA_MAP, &dma_map, sizeof(dma_map)), &dma_map); @@ -249,6 +251,8 @@ test_dma_map_return_value(void **state UNUSED) expect_value(dma_controller_add_region, fd, -1); expect_value(dma_controller_add_region, offset, dma_map.offset); expect_value(dma_controller_add_region, prot, PROT_NONE); + expect_value(dma_controller_add_region, access_mode, + REGION_ACCESS_MODE_MSG); will_return(dma_controller_add_region, 0); will_return(dma_controller_add_region, 2); @@ -318,7 +322,7 @@ test_dma_controller_add_region_no_fd(void **state UNUSED) int fd = -1; r = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd, offset, - PROT_NONE); + PROT_NONE, REGION_ACCESS_MODE_MSG); assert_non_null(r); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); assert_ptr_equal(NULL, r->info.vaddr); @@ -352,7 +356,7 @@ test_dma_controller_add_region_fd_deduplication(void **state UNUSED) assert_int_not_equal(-1, fd2); r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset, - PROT_NONE); + PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r1); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); assert_ptr_not_equal(NULL, r1->info.vaddr); @@ -369,7 +373,8 @@ test_dma_controller_add_region_fd_deduplication(void **state UNUSED) * descriptor gets de-duplicated. */ r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr + size, size, fd2, - offset + size, PROT_NONE); + offset + size, PROT_NONE, + REGION_ACCESS_MODE_MMAP); assert_non_null(r2); assert_int_equal(2, btree_size(&vfu_ctx.dma->regions)); assert_int_equal(r1->fd, r2->fd); @@ -394,13 +399,13 @@ test_dma_controller_add_region_twice(void **state UNUSED) assert_int_not_equal(-1, fd2); r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset, - PROT_NONE); + PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r1); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); /* Once more to confirm that identical regions are accepted. */ r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd2, offset, - PROT_NONE); + PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r2); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); assert_ptr_equal(r1, r2); @@ -415,6 +420,9 @@ test_dma_controller_remove_region_mapped(void **state UNUSED) .info.mapping.iov_base = (void *)0xcafebabe, .info.mapping.iov_len = 0x1000, .info.vaddr = (void *)0xcafebabe, + .access_mode = REGION_ACCESS_MODE_MMAP, + /* Cheating a bit with the fd to avoid opening a file */ + .fd = -1, }; stage_dma_region(vfu_ctx.dma, ®ion); @@ -461,12 +469,14 @@ test_dma_addr_to_sgl(void **state UNUSED) .info.iova.iov_base = (void *)0x1000, .info.iova.iov_len = 0x4000, .info.vaddr = (void *)0xdeadbeef, + .access_mode = REGION_ACCESS_MODE_MMAP, }, { .info.iova.iov_base = (void *)0x5000, .info.iova.iov_len = 0x2000, .info.vaddr = (void *)0xcafebabe, .info.prot = PROT_WRITE, + .access_mode = REGION_ACCESS_MODE_MMAP, }, }; From 4e775cab556e259375736bb0df1c391e215f66c2 Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Thu, 9 Apr 2026 05:29:09 -0700 Subject: [PATCH 2/8] Honor DMA region access mode in `vfu_sgl_{read,write}` When performing DMA from/to a caller-supplied buffer via `vfu_sgl_write` and `vfu_sgl_read`, respectively, we can use file descriptor based access modes as configured for the region, rather than servicing the DMA operation via IPC unconditionally. Signed-off-by: Mattias Nissler --- lib/libvfio-user.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 14f1152a..c02535a8 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -2413,6 +2413,40 @@ vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, return ERROR_INT(EPERM); } + if (sg->region->access_mode == REGION_ACCESS_MODE_MMAP) { + void *target, *src, *dst; + assert(sg->region->info.vaddr != NULL); + target = sg->region->info.vaddr + sg->offset; + src = cmd == VFIO_USER_DMA_READ ? target : data; + dst = cmd == VFIO_USER_DMA_READ ? data : target; + memcpy(dst, src, sg->length); + return 0; + } + + if (sg->region->access_mode == REGION_ACCESS_MODE_FILE_IO) { + size_t length, offset; + assert(sg->region->fd != -1); + length = sg->length; + offset = sg->offset + sg->region->offset; + while (length > 0) { + ssize_t ret; + if (cmd == VFIO_USER_DMA_READ) { + ret = pread(sg->region->fd, data, length, offset); + } else { + ret = pwrite(sg->region->fd, data, length, offset); + } + if (ret <= 0) { + return ERROR_INT(EIO); + } + data += ret; + offset += ret; + length -= ret; + } + return 0; + } + + assert(sg->region->access_mode == REGION_ACCESS_MODE_MSG); + rlen = sizeof(struct vfio_user_dma_region_access) + MIN(sg->length, vfu_ctx->client_max_data_xfer_size); From f8b22b20d87a42dc5290d9d0f2b63f4a817b9b8c Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Tue, 19 May 2026 05:12:39 -0700 Subject: [PATCH 3/8] Rework region removal code paths Share tear down logic across `remove_region` and `remove_all_regions`, check access mode to select appropriate tear down actions. Signed-off-by: Mattias Nissler --- lib/dma.c | 68 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/lib/dma.c b/lib/dma.c index dd8b3e96..e77caa2a 100644 --- a/lib/dma.c +++ b/lib/dma.c @@ -160,6 +160,42 @@ MOCK_DEFINE(dma_controller_unmap_region)(dma_controller_t *dma, } } +static void +dma_controller_destroy_region(dma_controller_t *dma, dma_memory_region_t *region, + vfu_dma_unregister_cb_t *dma_unregister, + void *data) +{ + int err; + + assert(dma != NULL); + assert(region != NULL); + + if (dma_unregister != NULL) { + dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER; + dma_unregister(data, ®ion->info); + dma->vfu_ctx->in_cb = CB_NONE; + } + + switch (region->access_mode) { + case REGION_ACCESS_MODE_MSG: + assert(region->fd == -1); + break; + case REGION_ACCESS_MODE_MMAP: + if (region->info.vaddr != NULL) { + dma_controller_unmap_region(dma, region); + } + /* fall through */ + case REGION_ACCESS_MODE_FILE_IO: + err = fd_cache_put(®ion->fd); + assert(err == 0); + break; + } + + dma_controller_increment_regions_generation(dma); + dirty_page_logging_stop_on_region(region); + free(region); +} + /* FIXME not thread safe */ int MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, @@ -169,7 +205,6 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, { dma_memory_region_t *region; btree_iter_t iter; - int err; assert(dma != NULL); @@ -184,22 +219,8 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, return ERROR_INT(ENOENT); } - if (dma_unregister != NULL) { - dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER; - dma_unregister(data, ®ion->info); - dma->vfu_ctx->in_cb = CB_NONE; - } - - if (region->info.vaddr != NULL) { - dma_controller_unmap_region(dma, region); - } - btree_iter_remove(&iter); - dma_controller_increment_regions_generation(dma); - dirty_page_logging_stop_on_region(region); - err = fd_cache_put(®ion->fd); - assert(err == 0); - free(region); + dma_controller_destroy_region(dma, region, dma_unregister, data); return 0; } @@ -211,7 +232,6 @@ dma_controller_remove_all_regions(dma_controller_t *dma, { dma_memory_region_t *region = NULL; btree_iter_t iter; - int err; assert(dma != NULL); @@ -224,19 +244,7 @@ dma_controller_remove_all_regions(dma_controller_t *dma, region->info.vaddr, region->info.mapping.iov_base, iov_end(®ion->info.mapping)); - if (dma_unregister != NULL) { - dma->vfu_ctx->in_cb = CB_DMA_UNREGISTER; - dma_unregister(data, ®ion->info); - dma->vfu_ctx->in_cb = CB_NONE; - } - - if (region->info.vaddr != NULL) { - dma_controller_unmap_region(dma, region); - } - - err = fd_cache_put(®ion->fd); - assert(err == 0); - free(region); + dma_controller_destroy_region(dma, region, dma_unregister, data); } } From 7293d23c97d17b84a594609341ce51a329abcb6b Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Tue, 19 May 2026 06:43:21 -0700 Subject: [PATCH 4/8] Rework dma_controller_add region Use the new `dma_controller_destroy_region` helper and make sure the passed-in fd gets closed only when we fail before deduplicating it. Signed-off-by: Mattias Nissler --- lib/dma.c | 112 ++++++++++++++++++++++----------------------- lib/dma.h | 2 +- lib/libvfio-user.c | 2 +- test/mocks.c | 2 +- test/unit-tests.c | 21 ++++++--- 5 files changed, 72 insertions(+), 67 deletions(-) diff --git a/lib/dma.c b/lib/dma.c index e77caa2a..c0bea6db 100644 --- a/lib/dma.c +++ b/lib/dma.c @@ -293,7 +293,7 @@ dma_map_region(dma_controller_t *dma, dma_memory_region_t *region) dma_memory_region_t * MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size, - int fd, off_t offset, uint32_t prot, + int *fd, off_t offset, uint32_t prot, enum region_access_mode access_mode) { dma_memory_region_t *existing = NULL; @@ -302,12 +302,11 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, int page_size = 0; char rstr[1024]; int ret = 0; - int err; assert(dma != NULL); snprintf(rstr, sizeof(rstr), "[%p, %p) fd=%d offset=%#llx prot=%#x", - dma_addr, dma_addr + size, fd, (ull_t)offset, prot); + dma_addr, dma_addr + size, *fd, (ull_t)offset, prot); if (size > dma->max_size) { vfu_log(dma->vfu_ctx, LOG_ERR, "DMA region size %llu > max %zu", @@ -327,7 +326,7 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, (ull_t)existing->offset); return ERROR_PTR(EINVAL); } - if (fd_cache_is_same_file(existing->fd, fd) != 0) { + if (fd_cache_is_same_file(existing->fd, *fd) != 0) { vfu_log(dma->vfu_ctx, LOG_ERR, "bad fd for new DMA region %s; " "existing=%d", rstr, existing->fd); return ERROR_PTR(EINVAL); @@ -337,7 +336,7 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, "%s; existing=%#x", rstr, existing->info.prot); return ERROR_PTR(EINVAL); } - close_safely(&fd); + close_safely(fd); return existing; } @@ -358,82 +357,81 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, return ERROR_PTR(EINVAL); } - if (access_mode != REGION_ACCESS_MODE_MSG) { - page_size = fd_get_blocksize(fd); - if (page_size < 0) { - vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %d", page_size); - return ERROR_PTR(EINVAL); - } - - fd = fd_cache_get(fd); - if (fd == -1) { - vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to de-duplicate fd for new DMA region %s: %m", - rstr); - return NULL; - } - } - page_size = MAX(page_size, getpagesize()); - region = calloc(1, sizeof(*region)); if (region == NULL) { - errno = ENOMEM; - goto rollback; + vfu_log(dma->vfu_ctx, LOG_ERR, "failed to allocate region entry"); + return ERROR_PTR(ENOMEM); } region->info.iova.iov_base = (void *)dma_addr; region->info.iova.iov_len = size; - region->info.page_size = page_size; region->info.prot = prot; + region->info.page_size = getpagesize(); region->access_mode = access_mode; region->offset = offset; - region->fd = fd; + region->fd = -1; - if (access_mode != REGION_ACCESS_MODE_MSG) { - /* - * TODO introduce a function that tells whether dirty page logging is - * enabled - */ - if (dma->dirty_pgsize != 0) { - if (dirty_page_logging_start_on_region(region, dma->dirty_pgsize) < 0) { - /* - * TODO We don't necessarily have to fail, we can continue - * and fail the get dirty page bitmap request later. - */ - goto rollback; + switch (access_mode) { + case REGION_ACCESS_MODE_MSG: + assert(*fd == -1); + break; + case REGION_ACCESS_MODE_MMAP: + case REGION_ACCESS_MODE_FILE_IO: + assert(*fd != -1); + region->fd = fd_cache_get(*fd); + if (region->fd == -1) { + vfu_log(dma->vfu_ctx, LOG_ERR, + "failed to de-duplicate fd for new DMA region %s: %m", + rstr); + goto destroy_region; } - } + *fd = -1; /* The fd is now owned by the cache */ - if (access_mode == REGION_ACCESS_MODE_MMAP) { - ret = dma_map_region(dma, region); + page_size = fd_get_blocksize(region->fd); + if (page_size < 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %d", page_size); + errno = EINVAL; + goto destroy_region; + } + region->info.page_size = MAX(region->info.page_size, (size_t)page_size); - if (ret != 0) { - vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to memory map DMA region %s: %m", rstr); - goto rollback; + /* + * TODO introduce a function that tells whether dirty page logging is + * enabled + */ + if (dma->dirty_pgsize != 0) { + if (dirty_page_logging_start_on_region(region, dma->dirty_pgsize) < 0) { + /* + * TODO We don't necessarily have to fail, we can continue + * and fail the get dirty page bitmap request later. + */ + goto destroy_region; + } } - } + + if (access_mode == REGION_ACCESS_MODE_MMAP) { + ret = dma_map_region(dma, region); + + if (ret != 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, + "failed to memory map DMA region %s: %m", rstr); + goto destroy_region; + } + } + break; } if (btree_iter_insert(&iter, (uintptr_t)dma_addr + size - 1, region) != 0) { - goto rollback; + goto destroy_region; } dma_controller_increment_regions_generation(dma); return region; -rollback: +destroy_region: ret = errno; - if (region != NULL) { - if (region->info.vaddr != NULL) { - dma_controller_unmap_region(dma, region); - } - dirty_page_logging_stop_on_region(region); - free(region); - } - err = fd_cache_put(&fd); - assert(err == 0); + dma_controller_destroy_region(dma, region, NULL, NULL); return ERROR_PTR(ret); } diff --git a/lib/dma.h b/lib/dma.h index 917a429c..c36e9366 100644 --- a/lib/dma.h +++ b/lib/dma.h @@ -138,7 +138,7 @@ dma_controller_destroy(dma_controller_t *dma); */ MOCK_DECLARE(dma_memory_region_t *, dma_controller_add_region, dma_controller_t *dma, vfu_dma_addr_t dma_addr, uint64_t size, - int fd, off_t offset, uint32_t prot, + int *fd, off_t offset, uint32_t prot, enum region_access_mode access_mode); MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma, diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 0f9ca8c2..2ef0a66c 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -742,7 +742,7 @@ handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, region = dma_controller_add_region( vfu_ctx->dma, (vfu_dma_addr_t)(uintptr_t)dma_map->addr, dma_map->size, - fd, dma_map->offset, prot, access_mode); + &fd, dma_map->offset, prot, access_mode); if (region == NULL) { vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %m", rstr); close_safely(&fd); diff --git a/test/mocks.c b/test/mocks.c index d7bd456a..1364c56a 100644 --- a/test/mocks.c +++ b/test/mocks.c @@ -113,7 +113,7 @@ unpatch_all(void) dma_memory_region_t * dma_controller_add_region(dma_controller_t *dma, void *dma_addr, uint64_t size, - int fd, off_t offset, uint32_t prot, + int *fd, off_t offset, uint32_t prot, enum region_access_mode access_mode) { if (!is_patched("dma_controller_add_region")) { diff --git a/test/unit-tests.c b/test/unit-tests.c index 2d676ea9..e004e795 100644 --- a/test/unit-tests.c +++ b/test/unit-tests.c @@ -145,6 +145,13 @@ test_dma_map_mappable_without_fd(void **state UNUSED) assert_int_equal(0, ret); } +static int check_fd_ptr(const uintmax_t value, const uintmax_t fd_val) +{ + int *fd_ptr = (int*)value; + int expected_fd = (int)fd_val; + return fd_ptr != NULL && *fd_ptr == expected_fd; +} + static void test_dma_map_without_fd(void **state UNUSED) { @@ -162,7 +169,7 @@ test_dma_map_without_fd(void **state UNUSED) expect_value(dma_controller_add_region, dma, vfu_ctx.dma); expect_value(dma_controller_add_region, dma_addr, dma_map.addr); expect_value(dma_controller_add_region, size, dma_map.size); - expect_value(dma_controller_add_region, fd, -1); + expect_check(dma_controller_add_region, fd, check_fd_ptr, -1); expect_value(dma_controller_add_region, offset, dma_map.offset); expect_value(dma_controller_add_region, prot, PROT_NONE); expect_value(dma_controller_add_region, access_mode, @@ -248,7 +255,7 @@ test_dma_map_return_value(void **state UNUSED) expect_value(dma_controller_add_region, dma, (uintptr_t)vfu_ctx.dma); expect_value(dma_controller_add_region, dma_addr, dma_map.addr); expect_value(dma_controller_add_region, size, dma_map.size); - expect_value(dma_controller_add_region, fd, -1); + expect_check(dma_controller_add_region, fd, check_fd_ptr, -1); expect_value(dma_controller_add_region, offset, dma_map.offset); expect_value(dma_controller_add_region, prot, PROT_NONE); expect_value(dma_controller_add_region, access_mode, @@ -321,7 +328,7 @@ test_dma_controller_add_region_no_fd(void **state UNUSED) size_t size = 0; int fd = -1; - r = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd, offset, + r = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd, offset, PROT_NONE, REGION_ACCESS_MODE_MSG); assert_non_null(r); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); @@ -355,7 +362,7 @@ test_dma_controller_add_region_fd_deduplication(void **state UNUSED) fd2 = dup(fd1); assert_int_not_equal(-1, fd2); - r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset, + r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd1, offset, PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r1); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); @@ -372,7 +379,7 @@ test_dma_controller_add_region_fd_deduplication(void **state UNUSED) * Add another mapping for the same file and verify that the file * descriptor gets de-duplicated. */ - r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr + size, size, fd2, + r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr + size, size, &fd2, offset + size, PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r2); @@ -398,13 +405,13 @@ test_dma_controller_add_region_twice(void **state UNUSED) fd2 = dup(fd1); assert_int_not_equal(-1, fd2); - r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd1, offset, + r1 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd1, offset, PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r1); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); /* Once more to confirm that identical regions are accepted. */ - r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, fd2, offset, + r2 = dma_controller_add_region(vfu_ctx.dma, dma_addr, size, &fd2, offset, PROT_NONE, REGION_ACCESS_MODE_MMAP); assert_non_null(r2); assert_int_equal(1, btree_size(&vfu_ctx.dma->regions)); From db093c434f92a683593684067e88813adbd5c73b Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Tue, 19 May 2026 06:53:17 -0700 Subject: [PATCH 5/8] Clarify access mode selection for DMA_MAP Signed-off-by: Mattias Nissler --- lib/libvfio-user.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 2ef0a66c..755c474a 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -718,14 +718,26 @@ handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, dma_map->flags &= ~VFIO_USER_F_DMA_REGION_WRITE; } - /* If not specified, default to mmap()-ed access if an fd is provided. */ - access_mode = fd != -1 ? REGION_ACCESS_MODE_MMAP : REGION_ACCESS_MODE_MSG; if (dma_map->flags & VFIO_USER_F_DMA_REGION_MMAP) { + if (fd == -1) { + vfu_log(vfu_ctx, LOG_ERR, + "file descriptor required for MMAP DMA region"); + return ERROR_INT(EINVAL); + } access_mode = REGION_ACCESS_MODE_MMAP; dma_map->flags &= ~VFIO_USER_F_DMA_REGION_MMAP; } else if (dma_map->flags & VFIO_USER_F_DMA_REGION_FILE_IO) { + if (fd == -1) { + vfu_log(vfu_ctx, LOG_ERR, + "file descriptor required for FILE_IO DMA region"); + return ERROR_INT(EINVAL); + } access_mode = REGION_ACCESS_MODE_FILE_IO; dma_map->flags &= ~VFIO_USER_F_DMA_REGION_FILE_IO; + } else { + /* Compatibility: default to mmap()-ed access if an fd is provided. */ + access_mode = + fd != -1 ? REGION_ACCESS_MODE_MMAP : REGION_ACCESS_MODE_MSG; } if (dma_map->flags != 0) { @@ -734,12 +746,6 @@ handle_dma_map(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, return ERROR_INT(EINVAL); } - if (access_mode != REGION_ACCESS_MODE_MSG && fd == -1) { - vfu_log(vfu_ctx, LOG_ERR, "file descriptor required for mode=%u", - access_mode); - return ERROR_INT(EINVAL); - } - region = dma_controller_add_region( vfu_ctx->dma, (vfu_dma_addr_t)(uintptr_t)dma_map->addr, dma_map->size, &fd, dma_map->offset, prot, access_mode); From 9564b2ac9131a9782be16138d505a8d9a98af17a Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Tue, 19 May 2026 09:09:59 -0700 Subject: [PATCH 6/8] API level adjustments Put file descriptor based direct access behind the new VFU_SGL_DIRECT_ACCESS flag. Update documentation to clarify behavior. Signed-off-by: Mattias Nissler --- include/libvfio-user.h | 24 ++++++++++---- lib/libvfio-user.c | 74 +++++++++++++++++++++++------------------- 2 files changed, 59 insertions(+), 39 deletions(-) diff --git a/include/libvfio-user.h b/include/libvfio-user.h index 9b814208..e771e1ac 100644 --- a/include/libvfio-user.h +++ b/include/libvfio-user.h @@ -697,7 +697,9 @@ vfu_addr_to_sgl(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr, size_t len, * vfu_sgl_put(). * * This is only supported when a @dma_unregister callback is provided to - * vfu_setup_device_dma(). + * vfu_setup_device_dma(). In addition, the client must have registered the + * relevant DMA regions to allow mmap()-ed access and supplied file + * descriptors, otherwise this function will fail with EFAULT. * * @vfu_ctx: the libvfio-user context * @sgl: array of scatter/gather entries returned by vfu_addr_to_sg. These @@ -738,12 +740,19 @@ vfu_sgl_mark_dirty(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t cnt); void vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, struct iovec *iov, size_t cnt); +/* Flags to vfu_sgl_read() and vfu_sgl_write(). */ + +/* Perform direct access via file descriptor, if possible. */ +#define VFU_SGL_DIRECT_ACCESS (1) + /** * Read from the dma region exposed by the client. This can be used as an * alternative to reading from a vfu_sgl_get() mapping, if the region is not * directly mappable, or DMA notification callbacks have not been provided. * - * The implementation involves a round-trip communication with the client. + * When the VFU_SGL_DIRECT_ACCESS flag is specified, the DMA operation is + * performed against a client-supplied file descriptor, if applicable. + * Otherwise, the operation incurs an IPC round-trip with the client. * * Note that currently, only one @sg entry is supported (@sg_cnt must be 1). * @@ -751,7 +760,7 @@ vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, struct iovec *iov, size_t cnt); * @sg: array of scatter/gather entries * @sg_cnt: number of scatter/gather entries * @data: data buffer to read - * @flags: must be 0 + * @flags: VFU_SGL_* flags * * @returns 0 on success, -1 on failure. Sets errno. */ @@ -764,12 +773,15 @@ vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, size_t sg_cnt, * alternative to reading from a vfu_sgl_get() mapping, if the region is not * directly mappable, or DMA notification callbacks have not been provided. * - * The implementation involves a round-trip communication with the client. + * When the VFU_SGL_DIRECT_ACCESS flag is specified, the DMA operation is + * performed against a client-supplied file descriptor, if applicable. + * Otherwise, the operation incurs an IPC round-trip with the client. * * Note that currently, only one @sg entry is supported (@sg_cnt must be 1). * - * During live migration, this call does not mark any of the written pages as - * dirty; the client is expected to track this. + * During live migration, this call does only mark the written pages as dirty + * if the access is performed directly against a file descriptor. For access + * via IPC, the client is expected to track which pages have been written. * * @vfu_ctx: the libvfio-user context * @sg: array of scatter/gather entries diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 755c474a..407061a7 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -2396,7 +2396,7 @@ vfu_sgl_put(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, static int vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, - dma_sg_t *sg, void *data) + int flags, dma_sg_t *sg, void *data) { struct vfio_user_dma_region_access *dma_reply; struct vfio_user_dma_region_access *dma_req; @@ -2415,39 +2415,47 @@ vfu_dma_transfer(vfu_ctx_t *vfu_ctx, enum vfio_user_command cmd, return ERROR_INT(EPERM); } - if (sg->region->access_mode == REGION_ACCESS_MODE_MMAP) { - void *target, *src, *dst; - assert(sg->region->info.vaddr != NULL); - target = sg->region->info.vaddr + sg->offset; - src = cmd == VFIO_USER_DMA_READ ? target : data; - dst = cmd == VFIO_USER_DMA_READ ? data : target; - memcpy(dst, src, sg->length); - return 0; - } + if (flags & VFU_SGL_DIRECT_ACCESS) { + if (sg->region->access_mode == REGION_ACCESS_MODE_MMAP) { + void *target, *src, *dst; + assert(sg->region->info.vaddr != NULL); + target = sg->region->info.vaddr + sg->offset; + src = cmd == VFIO_USER_DMA_READ ? target : data; + dst = cmd == VFIO_USER_DMA_READ ? data : target; + if (cmd == VFIO_USER_DMA_WRITE) { + dma_sgl_mark_dirty(vfu_ctx->dma, sg, 1); + } + memcpy(dst, src, sg->length); + return 0; + } - if (sg->region->access_mode == REGION_ACCESS_MODE_FILE_IO) { - size_t length, offset; - assert(sg->region->fd != -1); - length = sg->length; - offset = sg->offset + sg->region->offset; - while (length > 0) { - ssize_t ret; - if (cmd == VFIO_USER_DMA_READ) { - ret = pread(sg->region->fd, data, length, offset); - } else { - ret = pwrite(sg->region->fd, data, length, offset); + if (sg->region->access_mode == REGION_ACCESS_MODE_FILE_IO) { + size_t length, offset; + assert(sg->region->fd != -1); + length = sg->length; + offset = sg->offset + sg->region->offset; + if (cmd == VFIO_USER_DMA_WRITE) { + dma_sgl_mark_dirty(vfu_ctx->dma, sg, 1); } - if (ret <= 0) { - return ERROR_INT(EIO); + while (length > 0) { + ssize_t ret; + if (cmd == VFIO_USER_DMA_READ) { + ret = pread(sg->region->fd, data, length, offset); + } else { + ret = pwrite(sg->region->fd, data, length, offset); + } + if (ret <= 0) { + return ERROR_INT(EIO); + } + data += ret; + offset += ret; + length -= ret; } - data += ret; - offset += ret; - length -= ret; + return 0; } - return 0; - } - assert(sg->region->access_mode == REGION_ACCESS_MODE_MSG); + assert(sg->region->access_mode == REGION_ACCESS_MODE_MSG); + } rlen = sizeof(struct vfio_user_dma_region_access) + MIN(sg->length, vfu_ctx->client_max_data_xfer_size); @@ -2531,7 +2539,7 @@ vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, { assert(vfu_ctx->pending.state == VFU_CTX_PENDING_NONE); - if (flags != 0) { + if (flags & ~VFU_SGL_DIRECT_ACCESS) { return ERROR_INT(EINVAL); } @@ -2540,7 +2548,7 @@ vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, return ERROR_INT(ENOTSUP); } - return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, sgl, data); + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_READ, flags, sgl, data); } EXPORT int @@ -2549,7 +2557,7 @@ vfu_sgl_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, { assert(vfu_ctx->pending.state == VFU_CTX_PENDING_NONE); - if (flags != 0) { + if (flags & ~VFU_SGL_DIRECT_ACCESS) { return ERROR_INT(EINVAL); } @@ -2558,7 +2566,7 @@ vfu_sgl_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sgl, size_t sg_cnt, return ERROR_INT(ENOTSUP); } - return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, sgl, data); + return vfu_dma_transfer(vfu_ctx, VFIO_USER_DMA_WRITE, flags, sgl, data); } EXPORT bool From c35569ad696d662e401d5b9fdb7baa7caebf7d88 Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Tue, 19 May 2026 08:03:43 -0700 Subject: [PATCH 7/8] Extend test_sgl_read_write.py to exercise all access modes Signed-off-by: Mattias Nissler --- test/py/libvfio_user.py | 4 +++ test/py/test_sgl_read_write.py | 65 +++++++++++++++++++++++++++------- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/test/py/libvfio_user.py b/test/py/libvfio_user.py index e756780d..0efbc592 100644 --- a/test/py/libvfio_user.py +++ b/test/py/libvfio_user.py @@ -214,6 +214,8 @@ def is_32bit(): VFIO_USER_F_DMA_REGION_READ = (1 << 0) VFIO_USER_F_DMA_REGION_WRITE = (1 << 1) +VFIO_USER_F_DMA_REGION_MMAP = (1 << 2) +VFIO_USER_F_DMA_REGION_FILE_IO = (1 << 3) VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP = (1 << 0) @@ -279,6 +281,8 @@ def is_32bit(): VFU_MIGR_CALLBACKS_VERS = 2 +VFU_SGL_DIRECT_ACCESS = 1 + SOCK_PATH = b"/tmp/vfio-user.sock.%d" % os.getpid() topdir = os.path.realpath(os.path.dirname(__file__) + "/../..") diff --git a/test/py/test_sgl_read_write.py b/test/py/test_sgl_read_write.py index 4fd31b62..6ec4779a 100644 --- a/test/py/test_sgl_read_write.py +++ b/test/py/test_sgl_read_write.py @@ -29,7 +29,10 @@ # from libvfio_user import * +import mmap +import pytest import select +import tempfile import threading MAP_ADDR = 0x10000000 @@ -39,6 +42,11 @@ client = None +class Counter: + def __init__(self): + self.value = 0 + + class DMARegionHandler: """ A helper to service DMA region accesses arriving over a socket. Accesses @@ -46,7 +54,7 @@ class DMARegionHandler: takes place on a separate thread so as to not block the test code. """ - def __handle_requests(sock, pipe, buf, lock, addr, error_no): + def __handle_requests(sock, pipe, buf, counter, lock, addr, error_no): while True: (ready, _, _) = select.select([sock, pipe], [], []) if pipe in ready: @@ -63,6 +71,7 @@ def __handle_requests(sock, pipe, buf, lock, addr, error_no): offset = access.addr - addr with lock: + counter.value += 1 if cmd == VFIO_USER_DMA_READ: data = buf[offset:offset + access.count] else: @@ -80,16 +89,20 @@ def __handle_requests(sock, pipe, buf, lock, addr, error_no): sock.close() def __init__(self, sock, addr, size, error_no=0): - self.data = bytearray(size) + self.file = tempfile.TemporaryFile() + self.file.truncate(size) + self.data = mmap.mmap(self.file.fileno(), 0) self.data_lock = threading.Lock() self.addr = addr (pipe_r, self.pipe_w) = os.pipe() # Duplicate the socket file descriptor so the thread can own it and # make sure it gets closed only when terminating the thread. sock = socket.socket(fileno=os.dup(sock.fileno())) + self.counter = Counter() thread = threading.Thread( target=DMARegionHandler.__handle_requests, - args=[sock, pipe_r, self.data, self.data_lock, addr, error_no]) + args=[sock, pipe_r, self.data, self.counter, self.data_lock, addr, + error_no]) thread.start() def shutdown(self): @@ -102,7 +115,15 @@ def read(self, addr, size): return self.data[offset:offset + size] -def setup_function(function): +access_mode_flags = [ + 0, + VFIO_USER_F_DMA_REGION_MMAP, + VFIO_USER_F_DMA_REGION_FILE_IO, +] + + +@pytest.fixture(autouse=True, params=access_mode_flags) +def flags(request): global ctx, client, dma_handler ctx = prepare_ctx_for_dma() assert ctx is not None @@ -117,26 +138,35 @@ def setup_function(function): client = connect_client(ctx, caps) assert client.client_cmd_socket is not None + dma_handler = DMARegionHandler(client.client_cmd_socket, MAP_ADDR, + MAP_SIZE) + + flags = (request.param | VFIO_USER_F_DMA_REGION_READ + | VFIO_USER_F_DMA_REGION_WRITE) payload = vfio_user_dma_map(argsz=len(vfio_user_dma_map()), - flags=(VFIO_USER_F_DMA_REGION_READ - | VFIO_USER_F_DMA_REGION_WRITE), + flags=flags, offset=0, addr=MAP_ADDR, size=MAP_SIZE) - msg(ctx, client.sock, VFIO_USER_DMA_MAP, payload) + if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO): + fds = [dma_handler.file.fileno()] + else: + fds = [] - dma_handler = DMARegionHandler(client.client_cmd_socket, payload.addr, - payload.size) + msg(ctx, client.sock, VFIO_USER_DMA_MAP, payload, fds=fds) + + return flags def teardown_function(function): + global ctx, client, dma_handler dma_handler.shutdown() client.disconnect(ctx) vfu_destroy_ctx(ctx) -def test_dma_read_write(): +def test_dma_read_write(flags): ret, sg = vfu_addr_to_sgl(ctx, dma_addr=MAP_ADDR + 0x1000, length=64, @@ -145,13 +175,19 @@ def test_dma_read_write(): assert ret == 1 data = bytearray([x & 0xff for x in range(0, sg[0].length)]) - assert vfu_sgl_write(ctx, sg, 1, data, 0) == 0 + assert vfu_sgl_write(ctx, sg, 1, data, VFU_SGL_DIRECT_ACCESS) == 0 - assert vfu_sgl_read(ctx, sg, 1, 0) == (0, data) + assert vfu_sgl_read(ctx, sg, 1, VFU_SGL_DIRECT_ACCESS) == (0, data) assert dma_handler.read(sg[0].dma_addr + sg[0].offset, sg[0].length) == data + # Make sure we only see DMA IPC when there is no other option. + if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO): + assert dma_handler.counter.value == 0 + else: + assert dma_handler.counter.value == 2 + def test_dma_read_write_large(): ret, sg = vfu_addr_to_sgl(ctx, @@ -170,7 +206,10 @@ def test_dma_read_write_large(): sg[0].length) == data -def test_dma_read_write_error(): +def test_dma_read_write_error(flags): + if flags & (VFIO_USER_F_DMA_REGION_MMAP | VFIO_USER_F_DMA_REGION_FILE_IO): + pytest.skip("Can't generate error for direct file access modes") + # Reinitialize the handler to return EIO. global dma_handler dma_handler.shutdown() From d6729f97d56e3bb426d45bd68626b67d7a060388 Mon Sep 17 00:00:00 2001 From: Mattias Nissler Date: Tue, 2 Jun 2026 05:29:30 -0700 Subject: [PATCH 8/8] Address review feedback Signed-off-by: Mattias Nissler --- include/libvfio-user.h | 2 +- lib/dma.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/libvfio-user.h b/include/libvfio-user.h index e771e1ac..76181ad2 100644 --- a/include/libvfio-user.h +++ b/include/libvfio-user.h @@ -787,7 +787,7 @@ vfu_sgl_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, size_t sg_cnt, * @sg: array of scatter/gather entries * @sg_cnt: number of scatter/gather entries * @data: data buffer to write - * @flags: must be 0 + * @flags: VFU_SGL_* flags * * @returns 0 on success, -1 on failure. Sets errno. */ diff --git a/lib/dma.h b/lib/dma.h index c36e9366..890143f5 100644 --- a/lib/dma.h +++ b/lib/dma.h @@ -309,6 +309,7 @@ dma_sgl_get(dma_controller_t *dma, dma_sg_t *sgl, struct iovec *iov, size_t cnt) if (sg->region->access_mode != REGION_ACCESS_MODE_MMAP) { return ERROR_INT(EFAULT); } + assert(sg->region->info.vaddr != NULL); #ifdef DEBUG_SGL vfu_log(dma->vfu_ctx, LOG_DEBUG, "map %p-%p",