From 1460432cb513f0c16136ed132c20ecfbf8ccf942 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:05 -0400 Subject: iommu: Add iommu_device_group callback and iommu_group sysfs entry An IOMMU group is a set of devices for which the IOMMU cannot distinguish transactions. For PCI devices, a group often occurs when a PCI bridge is involved. Transactions from any device behind the bridge appear to be sourced from the bridge itself. We leave it to the IOMMU driver to define the grouping restraints for their platform. Using this new interface, the group for a device can be retrieved using the iommu_device_group() callback. Users will compare the value returned against the value returned for other devices to determine whether they are part of the same group. Devices with no group are not translated by the IOMMU. There should be no expectations about the group numbers as they may be arbitrarily assigned by the IOMMU driver and may not be persistent across boots. We also provide a sysfs interface to the group numbers here so that userspace can understand IOMMU dependencies between devices for managing safe, userspace drivers. [Some code changes by Joerg Roedel ] Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 2fb2963df553..9c35be4b333f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -25,8 +25,59 @@ #include #include +static ssize_t show_iommu_group(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid)) + return 0; + + return sprintf(buf, "%u", groupid); +} +static DEVICE_ATTR(iommu_group, S_IRUGO, show_iommu_group, NULL); + +static int add_iommu_group(struct device *dev, void *data) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid) == 0) + return device_create_file(dev, &dev_attr_iommu_group); + + return 0; +} + +static int remove_iommu_group(struct device *dev) +{ + unsigned int groupid; + + if (iommu_device_group(dev, &groupid) == 0) + device_remove_file(dev, &dev_attr_iommu_group); + + return 0; +} + +static int iommu_device_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + + if (action == BUS_NOTIFY_ADD_DEVICE) + return add_iommu_group(dev, NULL); + else if (action == BUS_NOTIFY_DEL_DEVICE) + return remove_iommu_group(dev); + + return 0; +} + +static struct notifier_block iommu_device_nb = { + .notifier_call = iommu_device_notifier, +}; + static void iommu_bus_init(struct bus_type *bus, struct iommu_ops *ops) { + bus_register_notifier(bus, &iommu_device_nb); + bus_for_each_dev(bus, NULL, NULL, add_iommu_group); } /** @@ -186,3 +237,12 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order) return domain->ops->unmap(domain, iova, gfp_order); } EXPORT_SYMBOL_GPL(iommu_unmap); + +int iommu_device_group(struct device *dev, unsigned int *groupid) +{ + if (iommu_present(dev->bus) && dev->bus->iommu_ops->device_group) + return dev->bus->iommu_ops->device_group(dev, groupid); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_device_group); -- cgit v1.2.3-55-g7522 From 70ae6f0d55bd216b2f773fa5fa5018c0490a9e50 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:11 -0400 Subject: iommu/intel: Implement iommu_device_group We generally have BDF granularity for devices, so we just need to make sure devices aren't hidden behind PCIe-to-PCI bridges. We can then make up a group number that's simply the concatenated seg|bus|dev|fn so we don't have to track them (not that users should depend on that). Signed-off-by: Alex Williamson Acked-By: David Woodhouse Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c0c7820d4c46..39ca6bb17e13 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4060,6 +4060,51 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } +/* + * Group numbers are arbitrary. Device with the same group number + * indicate the iommu cannot differentiate between them. To avoid + * tracking used groups we just use the seg|bus|devfn of the lowest + * level we're able to differentiate devices + */ +static int intel_iommu_device_group(struct device *dev, unsigned int *groupid) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev *bridge; + union { + struct { + u8 devfn; + u8 bus; + u16 segment; + } pci; + u32 group; + } id; + + if (iommu_no_mapping(dev)) + return -ENODEV; + + id.pci.segment = pci_domain_nr(pdev->bus); + id.pci.bus = pdev->bus->number; + id.pci.devfn = pdev->devfn; + + if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn)) + return -ENODEV; + + bridge = pci_find_upstream_pcie_bridge(pdev); + if (bridge) { + if (pci_is_pcie(bridge)) { + id.pci.bus = bridge->subordinate->number; + id.pci.devfn = 0; + } else { + id.pci.bus = bridge->bus->number; + id.pci.devfn = bridge->devfn; + } + } + + *groupid = id.group; + + return 0; +} + static struct iommu_ops intel_iommu_ops = { .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, @@ -4069,6 +4114,7 @@ static struct iommu_ops intel_iommu_ops = { .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, .domain_has_cap = intel_iommu_domain_has_cap, + .device_group = intel_iommu_device_group, }; static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) -- cgit v1.2.3-55-g7522 From 8fbdce659549d93dfb257ec4eabacf63a188e506 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:18 -0400 Subject: iommu/amd: Implement iommu_device_group Just use the amd_iommu_alias_table directly. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'drivers') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 4ee277a8521a..1d82b631d09c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2773,6 +2773,18 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } +static int amd_iommu_device_group(struct device *dev, unsigned int *groupid) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + + if (!dev_data) + return -ENODEV; + + *groupid = amd_iommu_alias_table[dev_data->devid]; + + return 0; +} + static struct iommu_ops amd_iommu_ops = { .domain_init = amd_iommu_domain_init, .domain_destroy = amd_iommu_domain_destroy, @@ -2782,6 +2794,7 @@ static struct iommu_ops amd_iommu_ops = { .unmap = amd_iommu_unmap, .iova_to_phys = amd_iommu_iova_to_phys, .domain_has_cap = amd_iommu_domain_has_cap, + .device_group = amd_iommu_device_group, }; /***************************************************************************** -- cgit v1.2.3-55-g7522 From bcb71abe7d4c5a0d0368c67da0a7def4fc73497a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 21 Oct 2011 15:56:24 -0400 Subject: iommu: Add option to group multi-function devices The option iommu=group_mf indicates the that the iommu driver should expose all functions of a multi-function PCI device as the same iommu_device_group. This is useful for disallowing individual functions being exposed as independent devices to userspace as there are often hidden dependencies. Virtual functions are not affected by this option. Signed-off-by: Alex Williamson Signed-off-by: Joerg Roedel --- Documentation/kernel-parameters.txt | 4 +++- arch/ia64/include/asm/iommu.h | 2 ++ arch/ia64/kernel/pci-dma.c | 1 + arch/x86/include/asm/iommu.h | 1 + arch/x86/kernel/pci-dma.c | 11 +++++++++++ drivers/iommu/amd_iommu.c | 10 +++++++++- drivers/iommu/intel-iommu.c | 3 +++ 7 files changed, 30 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a0c5c5f4fce6..e1b6e4469845 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1059,7 +1059,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nomerge forcesac soft - pt [x86, IA-64] + pt [x86, IA-64] + group_mf [x86, IA-64] + io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h index 105c93b00b1b..b6a809fa2995 100644 --- a/arch/ia64/include/asm/iommu.h +++ b/arch/ia64/include/asm/iommu.h @@ -11,10 +11,12 @@ extern void no_iommu_init(void); extern int force_iommu, no_iommu; extern int iommu_pass_through; extern int iommu_detected; +extern int iommu_group_mf; #else #define iommu_pass_through (0) #define no_iommu (1) #define iommu_detected (0) +#define iommu_group_mf (0) #endif extern void iommu_dma_init(void); extern void machvec_init(const char *name); diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index c16162c70860..eb1175720050 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -33,6 +33,7 @@ int force_iommu __read_mostly; #endif int iommu_pass_through; +int iommu_group_mf; /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 345c99cef152..dffc38ee6255 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; +extern int iommu_group_mf; /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 80dc793b3f63..1c4d769e21ea 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0; */ int iommu_pass_through __read_mostly; +/* + * Group multi-function PCI devices into a single device-group for the + * iommu_device_group interface. This tells the iommu driver to pretend + * it cannot distinguish between functions of a device, exposing only one + * group for the device. Useful for disallowing use of individual PCI + * functions from userspace drivers. + */ +int iommu_group_mf __read_mostly; + extern struct iommu_table_entry __iommu_table[], __iommu_table_end[]; /* Dummy device used for NULL arguments (normally ISA). */ @@ -169,6 +178,8 @@ static __init int iommu_setup(char *p) #endif if (!strncmp(p, "pt", 2)) iommu_pass_through = 1; + if (!strncmp(p, "group_mf", 8)) + iommu_group_mf = 1; gart_parse_options(p); diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1d82b631d09c..6f7553684c1e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2776,11 +2776,19 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain, static int amd_iommu_device_group(struct device *dev, unsigned int *groupid) { struct iommu_dev_data *dev_data = dev->archdata.iommu; + struct pci_dev *pdev = to_pci_dev(dev); + u16 devid; if (!dev_data) return -ENODEV; - *groupid = amd_iommu_alias_table[dev_data->devid]; + if (pdev->is_virtfn || !iommu_group_mf) + devid = dev_data->devid; + else + devid = calc_devid(pdev->bus->number, + PCI_DEVFN(PCI_SLOT(pdev->devfn), 0)); + + *groupid = amd_iommu_alias_table[devid]; return 0; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 39ca6bb17e13..9ef16d664a99 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4100,6 +4100,9 @@ static int intel_iommu_device_group(struct device *dev, unsigned int *groupid) } } + if (!pdev->is_virtfn && iommu_group_mf) + id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0); + *groupid = id.group; return 0; -- cgit v1.2.3-55-g7522