608 lines
15 KiB
C
608 lines
15 KiB
C
|
// SPDX-License-Identifier: GPL-2.0-only
|
||
|
/*
|
||
|
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
||
|
*
|
||
|
* VFIO container (/dev/vfio/vfio)
|
||
|
*/
|
||
|
#include <linux/file.h>
|
||
|
#include <linux/slab.h>
|
||
|
#include <linux/fs.h>
|
||
|
#include <linux/capability.h>
|
||
|
#include <linux/iommu.h>
|
||
|
#include <linux/miscdevice.h>
|
||
|
#include <linux/vfio.h>
|
||
|
#include <uapi/linux/vfio.h>
|
||
|
|
||
|
#include "vfio.h"
|
||
|
|
||
|
struct vfio_container {
|
||
|
struct kref kref;
|
||
|
struct list_head group_list;
|
||
|
struct rw_semaphore group_lock;
|
||
|
struct vfio_iommu_driver *iommu_driver;
|
||
|
void *iommu_data;
|
||
|
bool noiommu;
|
||
|
};
|
||
|
|
||
|
static struct vfio {
|
||
|
struct list_head iommu_drivers_list;
|
||
|
struct mutex iommu_drivers_lock;
|
||
|
} vfio;
|
||
|
|
||
|
static void *vfio_noiommu_open(unsigned long arg)
|
||
|
{
|
||
|
if (arg != VFIO_NOIOMMU_IOMMU)
|
||
|
return ERR_PTR(-EINVAL);
|
||
|
if (!capable(CAP_SYS_RAWIO))
|
||
|
return ERR_PTR(-EPERM);
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
static void vfio_noiommu_release(void *iommu_data)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
static long vfio_noiommu_ioctl(void *iommu_data,
|
||
|
unsigned int cmd, unsigned long arg)
|
||
|
{
|
||
|
if (cmd == VFIO_CHECK_EXTENSION)
|
||
|
return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
|
||
|
|
||
|
return -ENOTTY;
|
||
|
}
|
||
|
|
||
|
static int vfio_noiommu_attach_group(void *iommu_data,
|
||
|
struct iommu_group *iommu_group, enum vfio_group_type type)
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static void vfio_noiommu_detach_group(void *iommu_data,
|
||
|
struct iommu_group *iommu_group)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
|
||
|
.name = "vfio-noiommu",
|
||
|
.owner = THIS_MODULE,
|
||
|
.open = vfio_noiommu_open,
|
||
|
.release = vfio_noiommu_release,
|
||
|
.ioctl = vfio_noiommu_ioctl,
|
||
|
.attach_group = vfio_noiommu_attach_group,
|
||
|
.detach_group = vfio_noiommu_detach_group,
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Only noiommu containers can use vfio-noiommu and noiommu containers can only
|
||
|
* use vfio-noiommu.
|
||
|
*/
|
||
|
static bool vfio_iommu_driver_allowed(struct vfio_container *container,
|
||
|
const struct vfio_iommu_driver *driver)
|
||
|
{
|
||
|
if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
|
||
|
return true;
|
||
|
return container->noiommu == (driver->ops == &vfio_noiommu_ops);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* IOMMU driver registration
|
||
|
*/
|
||
|
int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
|
||
|
{
|
||
|
struct vfio_iommu_driver *driver, *tmp;
|
||
|
|
||
|
if (WARN_ON(!ops->register_device != !ops->unregister_device))
|
||
|
return -EINVAL;
|
||
|
|
||
|
driver = kzalloc(sizeof(*driver), GFP_KERNEL);
|
||
|
if (!driver)
|
||
|
return -ENOMEM;
|
||
|
|
||
|
driver->ops = ops;
|
||
|
|
||
|
mutex_lock(&vfio.iommu_drivers_lock);
|
||
|
|
||
|
/* Check for duplicates */
|
||
|
list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
|
||
|
if (tmp->ops == ops) {
|
||
|
mutex_unlock(&vfio.iommu_drivers_lock);
|
||
|
kfree(driver);
|
||
|
return -EINVAL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
|
||
|
|
||
|
mutex_unlock(&vfio.iommu_drivers_lock);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
|
||
|
|
||
|
void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
|
||
|
{
|
||
|
struct vfio_iommu_driver *driver;
|
||
|
|
||
|
mutex_lock(&vfio.iommu_drivers_lock);
|
||
|
list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
|
||
|
if (driver->ops == ops) {
|
||
|
list_del(&driver->vfio_next);
|
||
|
mutex_unlock(&vfio.iommu_drivers_lock);
|
||
|
kfree(driver);
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
mutex_unlock(&vfio.iommu_drivers_lock);
|
||
|
}
|
||
|
EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
|
||
|
|
||
|
/*
|
||
|
* Container objects - containers are created when /dev/vfio/vfio is
|
||
|
* opened, but their lifecycle extends until the last user is done, so
|
||
|
* it's freed via kref. Must support container/group/device being
|
||
|
* closed in any order.
|
||
|
*/
|
||
|
static void vfio_container_release(struct kref *kref)
|
||
|
{
|
||
|
struct vfio_container *container;
|
||
|
container = container_of(kref, struct vfio_container, kref);
|
||
|
|
||
|
kfree(container);
|
||
|
}
|
||
|
|
||
|
static void vfio_container_get(struct vfio_container *container)
|
||
|
{
|
||
|
kref_get(&container->kref);
|
||
|
}
|
||
|
|
||
|
static void vfio_container_put(struct vfio_container *container)
|
||
|
{
|
||
|
kref_put(&container->kref, vfio_container_release);
|
||
|
}
|
||
|
|
||
|
void vfio_device_container_register(struct vfio_device *device)
|
||
|
{
|
||
|
struct vfio_iommu_driver *iommu_driver =
|
||
|
device->group->container->iommu_driver;
|
||
|
|
||
|
if (iommu_driver && iommu_driver->ops->register_device)
|
||
|
iommu_driver->ops->register_device(
|
||
|
device->group->container->iommu_data, device);
|
||
|
}
|
||
|
|
||
|
void vfio_device_container_unregister(struct vfio_device *device)
|
||
|
{
|
||
|
struct vfio_iommu_driver *iommu_driver =
|
||
|
device->group->container->iommu_driver;
|
||
|
|
||
|
if (iommu_driver && iommu_driver->ops->unregister_device)
|
||
|
iommu_driver->ops->unregister_device(
|
||
|
device->group->container->iommu_data, device);
|
||
|
}
|
||
|
|
||
|
static long
|
||
|
vfio_container_ioctl_check_extension(struct vfio_container *container,
|
||
|
unsigned long arg)
|
||
|
{
|
||
|
struct vfio_iommu_driver *driver;
|
||
|
long ret = 0;
|
||
|
|
||
|
down_read(&container->group_lock);
|
||
|
|
||
|
driver = container->iommu_driver;
|
||
|
|
||
|
switch (arg) {
|
||
|
/* No base extensions yet */
|
||
|
default:
|
||
|
/*
|
||
|
* If no driver is set, poll all registered drivers for
|
||
|
* extensions and return the first positive result. If
|
||
|
* a driver is already set, further queries will be passed
|
||
|
* only to that driver.
|
||
|
*/
|
||
|
if (!driver) {
|
||
|
mutex_lock(&vfio.iommu_drivers_lock);
|
||
|
list_for_each_entry(driver, &vfio.iommu_drivers_list,
|
||
|
vfio_next) {
|
||
|
|
||
|
if (!list_empty(&container->group_list) &&
|
||
|
!vfio_iommu_driver_allowed(container,
|
||
|
driver))
|
||
|
continue;
|
||
|
if (!try_module_get(driver->ops->owner))
|
||
|
continue;
|
||
|
|
||
|
ret = driver->ops->ioctl(NULL,
|
||
|
VFIO_CHECK_EXTENSION,
|
||
|
arg);
|
||
|
module_put(driver->ops->owner);
|
||
|
if (ret > 0)
|
||
|
break;
|
||
|
}
|
||
|
mutex_unlock(&vfio.iommu_drivers_lock);
|
||
|
} else
|
||
|
ret = driver->ops->ioctl(container->iommu_data,
|
||
|
VFIO_CHECK_EXTENSION, arg);
|
||
|
}
|
||
|
|
||
|
up_read(&container->group_lock);
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
/* hold write lock on container->group_lock */
|
||
|
static int __vfio_container_attach_groups(struct vfio_container *container,
|
||
|
struct vfio_iommu_driver *driver,
|
||
|
void *data)
|
||
|
{
|
||
|
struct vfio_group *group;
|
||
|
int ret = -ENODEV;
|
||
|
|
||
|
list_for_each_entry(group, &container->group_list, container_next) {
|
||
|
ret = driver->ops->attach_group(data, group->iommu_group,
|
||
|
group->type);
|
||
|
if (ret)
|
||
|
goto unwind;
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
|
||
|
unwind:
|
||
|
list_for_each_entry_continue_reverse(group, &container->group_list,
|
||
|
container_next) {
|
||
|
driver->ops->detach_group(data, group->iommu_group);
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static long vfio_ioctl_set_iommu(struct vfio_container *container,
|
||
|
unsigned long arg)
|
||
|
{
|
||
|
struct vfio_iommu_driver *driver;
|
||
|
long ret = -ENODEV;
|
||
|
|
||
|
down_write(&container->group_lock);
|
||
|
|
||
|
/*
|
||
|
* The container is designed to be an unprivileged interface while
|
||
|
* the group can be assigned to specific users. Therefore, only by
|
||
|
* adding a group to a container does the user get the privilege of
|
||
|
* enabling the iommu, which may allocate finite resources. There
|
||
|
* is no unset_iommu, but by removing all the groups from a container,
|
||
|
* the container is deprivileged and returns to an unset state.
|
||
|
*/
|
||
|
if (list_empty(&container->group_list) || container->iommu_driver) {
|
||
|
up_write(&container->group_lock);
|
||
|
return -EINVAL;
|
||
|
}
|
||
|
|
||
|
mutex_lock(&vfio.iommu_drivers_lock);
|
||
|
list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
|
||
|
void *data;
|
||
|
|
||
|
if (!vfio_iommu_driver_allowed(container, driver))
|
||
|
continue;
|
||
|
if (!try_module_get(driver->ops->owner))
|
||
|
continue;
|
||
|
|
||
|
/*
|
||
|
* The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
|
||
|
* so test which iommu driver reported support for this
|
||
|
* extension and call open on them. We also pass them the
|
||
|
* magic, allowing a single driver to support multiple
|
||
|
* interfaces if they'd like.
|
||
|
*/
|
||
|
if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
|
||
|
module_put(driver->ops->owner);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
data = driver->ops->open(arg);
|
||
|
if (IS_ERR(data)) {
|
||
|
ret = PTR_ERR(data);
|
||
|
module_put(driver->ops->owner);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
ret = __vfio_container_attach_groups(container, driver, data);
|
||
|
if (ret) {
|
||
|
driver->ops->release(data);
|
||
|
module_put(driver->ops->owner);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
container->iommu_driver = driver;
|
||
|
container->iommu_data = data;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
mutex_unlock(&vfio.iommu_drivers_lock);
|
||
|
up_write(&container->group_lock);
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static long vfio_fops_unl_ioctl(struct file *filep,
|
||
|
unsigned int cmd, unsigned long arg)
|
||
|
{
|
||
|
struct vfio_container *container = filep->private_data;
|
||
|
struct vfio_iommu_driver *driver;
|
||
|
void *data;
|
||
|
long ret = -EINVAL;
|
||
|
|
||
|
if (!container)
|
||
|
return ret;
|
||
|
|
||
|
switch (cmd) {
|
||
|
case VFIO_GET_API_VERSION:
|
||
|
ret = VFIO_API_VERSION;
|
||
|
break;
|
||
|
case VFIO_CHECK_EXTENSION:
|
||
|
ret = vfio_container_ioctl_check_extension(container, arg);
|
||
|
break;
|
||
|
case VFIO_SET_IOMMU:
|
||
|
ret = vfio_ioctl_set_iommu(container, arg);
|
||
|
break;
|
||
|
default:
|
||
|
driver = container->iommu_driver;
|
||
|
data = container->iommu_data;
|
||
|
|
||
|
if (driver) /* passthrough all unrecognized ioctls */
|
||
|
ret = driver->ops->ioctl(data, cmd, arg);
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
static int vfio_fops_open(struct inode *inode, struct file *filep)
|
||
|
{
|
||
|
struct vfio_container *container;
|
||
|
|
||
|
container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
|
||
|
if (!container)
|
||
|
return -ENOMEM;
|
||
|
|
||
|
INIT_LIST_HEAD(&container->group_list);
|
||
|
init_rwsem(&container->group_lock);
|
||
|
kref_init(&container->kref);
|
||
|
|
||
|
filep->private_data = container;
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int vfio_fops_release(struct inode *inode, struct file *filep)
|
||
|
{
|
||
|
struct vfio_container *container = filep->private_data;
|
||
|
|
||
|
filep->private_data = NULL;
|
||
|
|
||
|
vfio_container_put(container);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static const struct file_operations vfio_fops = {
|
||
|
.owner = THIS_MODULE,
|
||
|
.open = vfio_fops_open,
|
||
|
.release = vfio_fops_release,
|
||
|
.unlocked_ioctl = vfio_fops_unl_ioctl,
|
||
|
.compat_ioctl = compat_ptr_ioctl,
|
||
|
};
|
||
|
|
||
|
struct vfio_container *vfio_container_from_file(struct file *file)
|
||
|
{
|
||
|
struct vfio_container *container;
|
||
|
|
||
|
/* Sanity check, is this really our fd? */
|
||
|
if (file->f_op != &vfio_fops)
|
||
|
return NULL;
|
||
|
|
||
|
container = file->private_data;
|
||
|
WARN_ON(!container); /* fget ensures we don't race vfio_release */
|
||
|
return container;
|
||
|
}
|
||
|
|
||
|
static struct miscdevice vfio_dev = {
|
||
|
.minor = VFIO_MINOR,
|
||
|
.name = "vfio",
|
||
|
.fops = &vfio_fops,
|
||
|
.nodename = "vfio/vfio",
|
||
|
.mode = S_IRUGO | S_IWUGO,
|
||
|
};
|
||
|
|
||
|
int vfio_container_attach_group(struct vfio_container *container,
|
||
|
struct vfio_group *group)
|
||
|
{
|
||
|
struct vfio_iommu_driver *driver;
|
||
|
int ret = 0;
|
||
|
|
||
|
lockdep_assert_held(&group->group_lock);
|
||
|
|
||
|
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
|
||
|
return -EPERM;
|
||
|
|
||
|
down_write(&container->group_lock);
|
||
|
|
||
|
/* Real groups and fake groups cannot mix */
|
||
|
if (!list_empty(&container->group_list) &&
|
||
|
container->noiommu != (group->type == VFIO_NO_IOMMU)) {
|
||
|
ret = -EPERM;
|
||
|
goto out_unlock_container;
|
||
|
}
|
||
|
|
||
|
if (group->type == VFIO_IOMMU) {
|
||
|
ret = iommu_group_claim_dma_owner(group->iommu_group, group);
|
||
|
if (ret)
|
||
|
goto out_unlock_container;
|
||
|
}
|
||
|
|
||
|
driver = container->iommu_driver;
|
||
|
if (driver) {
|
||
|
ret = driver->ops->attach_group(container->iommu_data,
|
||
|
group->iommu_group,
|
||
|
group->type);
|
||
|
if (ret) {
|
||
|
if (group->type == VFIO_IOMMU)
|
||
|
iommu_group_release_dma_owner(
|
||
|
group->iommu_group);
|
||
|
goto out_unlock_container;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
group->container = container;
|
||
|
group->container_users = 1;
|
||
|
container->noiommu = (group->type == VFIO_NO_IOMMU);
|
||
|
list_add(&group->container_next, &container->group_list);
|
||
|
|
||
|
/* Get a reference on the container and mark a user within the group */
|
||
|
vfio_container_get(container);
|
||
|
|
||
|
out_unlock_container:
|
||
|
up_write(&container->group_lock);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
void vfio_group_detach_container(struct vfio_group *group)
|
||
|
{
|
||
|
struct vfio_container *container = group->container;
|
||
|
struct vfio_iommu_driver *driver;
|
||
|
|
||
|
lockdep_assert_held(&group->group_lock);
|
||
|
WARN_ON(group->container_users != 1);
|
||
|
|
||
|
down_write(&container->group_lock);
|
||
|
|
||
|
driver = container->iommu_driver;
|
||
|
if (driver)
|
||
|
driver->ops->detach_group(container->iommu_data,
|
||
|
group->iommu_group);
|
||
|
|
||
|
if (group->type == VFIO_IOMMU)
|
||
|
iommu_group_release_dma_owner(group->iommu_group);
|
||
|
|
||
|
group->container = NULL;
|
||
|
group->container_users = 0;
|
||
|
list_del(&group->container_next);
|
||
|
|
||
|
/* Detaching the last group deprivileges a container, remove iommu */
|
||
|
if (driver && list_empty(&container->group_list)) {
|
||
|
driver->ops->release(container->iommu_data);
|
||
|
module_put(driver->ops->owner);
|
||
|
container->iommu_driver = NULL;
|
||
|
container->iommu_data = NULL;
|
||
|
}
|
||
|
|
||
|
up_write(&container->group_lock);
|
||
|
|
||
|
vfio_container_put(container);
|
||
|
}
|
||
|
|
||
|
int vfio_group_use_container(struct vfio_group *group)
|
||
|
{
|
||
|
lockdep_assert_held(&group->group_lock);
|
||
|
|
||
|
/*
|
||
|
* The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
|
||
|
* VFIO_SET_IOMMU hasn't been done yet.
|
||
|
*/
|
||
|
if (!group->container->iommu_driver)
|
||
|
return -EINVAL;
|
||
|
|
||
|
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
|
||
|
return -EPERM;
|
||
|
|
||
|
get_file(group->opened_file);
|
||
|
group->container_users++;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
void vfio_group_unuse_container(struct vfio_group *group)
|
||
|
{
|
||
|
lockdep_assert_held(&group->group_lock);
|
||
|
|
||
|
WARN_ON(group->container_users <= 1);
|
||
|
group->container_users--;
|
||
|
fput(group->opened_file);
|
||
|
}
|
||
|
|
||
|
int vfio_device_container_pin_pages(struct vfio_device *device,
|
||
|
dma_addr_t iova, int npage,
|
||
|
int prot, struct page **pages)
|
||
|
{
|
||
|
struct vfio_container *container = device->group->container;
|
||
|
struct iommu_group *iommu_group = device->group->iommu_group;
|
||
|
struct vfio_iommu_driver *driver = container->iommu_driver;
|
||
|
|
||
|
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
|
||
|
return -E2BIG;
|
||
|
|
||
|
if (unlikely(!driver || !driver->ops->pin_pages))
|
||
|
return -ENOTTY;
|
||
|
return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
|
||
|
npage, prot, pages);
|
||
|
}
|
||
|
|
||
|
void vfio_device_container_unpin_pages(struct vfio_device *device,
|
||
|
dma_addr_t iova, int npage)
|
||
|
{
|
||
|
struct vfio_container *container = device->group->container;
|
||
|
|
||
|
if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
|
||
|
return;
|
||
|
|
||
|
container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
|
||
|
npage);
|
||
|
}
|
||
|
|
||
|
int vfio_device_container_dma_rw(struct vfio_device *device,
|
||
|
dma_addr_t iova, void *data,
|
||
|
size_t len, bool write)
|
||
|
{
|
||
|
struct vfio_container *container = device->group->container;
|
||
|
struct vfio_iommu_driver *driver = container->iommu_driver;
|
||
|
|
||
|
if (unlikely(!driver || !driver->ops->dma_rw))
|
||
|
return -ENOTTY;
|
||
|
return driver->ops->dma_rw(container->iommu_data, iova, data, len,
|
||
|
write);
|
||
|
}
|
||
|
|
||
|
int __init vfio_container_init(void)
|
||
|
{
|
||
|
int ret;
|
||
|
|
||
|
mutex_init(&vfio.iommu_drivers_lock);
|
||
|
INIT_LIST_HEAD(&vfio.iommu_drivers_list);
|
||
|
|
||
|
ret = misc_register(&vfio_dev);
|
||
|
if (ret) {
|
||
|
pr_err("vfio: misc device register failed\n");
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
|
||
|
ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
|
||
|
if (ret)
|
||
|
goto err_misc;
|
||
|
}
|
||
|
return 0;
|
||
|
|
||
|
err_misc:
|
||
|
misc_deregister(&vfio_dev);
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
void vfio_container_cleanup(void)
|
||
|
{
|
||
|
if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
|
||
|
vfio_unregister_iommu_driver(&vfio_noiommu_ops);
|
||
|
misc_deregister(&vfio_dev);
|
||
|
mutex_destroy(&vfio.iommu_drivers_lock);
|
||
|
}
|
||
|
|
||
|
MODULE_ALIAS_MISCDEV(VFIO_MINOR);
|
||
|
MODULE_ALIAS("devname:vfio/vfio");
|