summaryrefslogtreecommitdiffstats
path: root/drivers/iommu/intel-iommu.c
diff options
context:
space:
mode:
authorLinus Torvalds2017-09-10 00:03:24 +0200
committerLinus Torvalds2017-09-10 00:03:24 +0200
commit4dfc2788033d30dfccfd4268e06dd73ce2c654ed (patch)
treef6675b959f4aa12e64e68383874683c4cea46334 /drivers/iommu/intel-iommu.c
parentMerge tag 'for-linus-20170904' of git://git.infradead.org/linux-mtd (diff)
parentMerge branches 'arm/exynos', 'arm/renesas', 'arm/rockchip', 'arm/omap', 'arm/... (diff)
downloadkernel-qcow2-linux-4dfc2788033d30dfccfd4268e06dd73ce2c654ed.tar.gz
kernel-qcow2-linux-4dfc2788033d30dfccfd4268e06dd73ce2c654ed.tar.xz
kernel-qcow2-linux-4dfc2788033d30dfccfd4268e06dd73ce2c654ed.zip
Merge tag 'iommu-updates-v4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel: "Slightly more changes than usual this time: - KDump Kernel IOMMU take-over code for AMD IOMMU. The code now tries to preserve the mappings of the kernel so that master aborts for devices are avoided. Master aborts cause some devices to fail in the kdump kernel, so this code makes the dump more likely to succeed when AMD IOMMU is enabled. - common flush queue implementation for IOVA code users. The code is still optional, but AMD and Intel IOMMU drivers had their own implementation which is now unified. - finish support for iommu-groups. All drivers implement this feature now so that IOMMU core code can rely on it. - finish support for 'struct iommu_device' in iommu drivers. All drivers now use the interface. - new functions in the IOMMU-API for explicit IO/TLB flushing. This will help to reduce the number of IO/TLB flushes when IOMMU drivers support this interface. - support for mt2712 in the Mediatek IOMMU driver - new IOMMU driver for QCOM hardware - system PM support for ARM-SMMU - shutdown method for ARM-SMMU-v3 - some constification patches - various other small improvements and fixes" * tag 'iommu-updates-v4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (87 commits) iommu/vt-d: Don't be too aggressive when clearing one context entry iommu: Introduce Interface for IOMMU TLB Flushing iommu/s390: Constify iommu_ops iommu/vt-d: Avoid calling virt_to_phys() on null pointer iommu/vt-d: IOMMU Page Request needs to check if address is canonical. arm/tegra: Call bus_set_iommu() after iommu_device_register() iommu/exynos: Constify iommu_ops iommu/ipmmu-vmsa: Make ipmmu_gather_ops const iommu/ipmmu-vmsa: Rereserving a free context before setting up a pagetable iommu/amd: Rename a few flush functions iommu/amd: Check if domain is NULL in get_domain() and return -EBUSY iommu/mediatek: Fix a build warning of BIT(32) in ARM iommu/mediatek: Fix a build fail of m4u_type iommu: qcom: annotate PM functions as __maybe_unused iommu/pamu: Fix PAMU boot crash memory: mtk-smi: Degrade SMI init to module_init iommu/mediatek: Enlarge the validate PA range for 4GB mode iommu/mediatek: Disable iommu clock when system suspend iommu/mediatek: Move pgtable allocation into domain_alloc iommu/mediatek: Merge 2 M4U HWs into one iommu domain ...
Diffstat (limited to 'drivers/iommu/intel-iommu.c')
-rw-r--r--drivers/iommu/intel-iommu.c280
1 files changed, 90 insertions, 190 deletions
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index bb05fc50ee2e..6784a05dd6b2 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -458,31 +458,6 @@ static LIST_HEAD(dmar_rmrr_units);
#define for_each_rmrr_units(rmrr) \
list_for_each_entry(rmrr, &dmar_rmrr_units, list)
-static void flush_unmaps_timeout(unsigned long data);
-
-struct deferred_flush_entry {
- unsigned long iova_pfn;
- unsigned long nrpages;
- struct dmar_domain *domain;
- struct page *freelist;
-};
-
-#define HIGH_WATER_MARK 250
-struct deferred_flush_table {
- int next;
- struct deferred_flush_entry entries[HIGH_WATER_MARK];
-};
-
-struct deferred_flush_data {
- spinlock_t lock;
- int timer_on;
- struct timer_list timer;
- long size;
- struct deferred_flush_table *tables;
-};
-
-static DEFINE_PER_CPU(struct deferred_flush_data, deferred_flush);
-
/* bitmap for indexing intel_iommus */
static int g_num_of_iommus;
@@ -981,20 +956,6 @@ static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
return ret;
}
-static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
-{
- struct context_entry *context;
- unsigned long flags;
-
- spin_lock_irqsave(&iommu->lock, flags);
- context = iommu_context_addr(iommu, bus, devfn, 0);
- if (context) {
- context_clear_entry(context);
- __iommu_flush_cache(iommu, context, sizeof(*context));
- }
- spin_unlock_irqrestore(&iommu->lock, flags);
-}
-
static void free_context_table(struct intel_iommu *iommu)
{
int i;
@@ -1144,8 +1105,9 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
}
static void dma_pte_free_level(struct dmar_domain *domain, int level,
- struct dma_pte *pte, unsigned long pfn,
- unsigned long start_pfn, unsigned long last_pfn)
+ int retain_level, struct dma_pte *pte,
+ unsigned long pfn, unsigned long start_pfn,
+ unsigned long last_pfn)
{
pfn = max(start_pfn, pfn);
pte = &pte[pfn_level_offset(pfn, level)];
@@ -1160,12 +1122,17 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level,
level_pfn = pfn & level_mask(level);
level_pte = phys_to_virt(dma_pte_addr(pte));
- if (level > 2)
- dma_pte_free_level(domain, level - 1, level_pte,
- level_pfn, start_pfn, last_pfn);
+ if (level > 2) {
+ dma_pte_free_level(domain, level - 1, retain_level,
+ level_pte, level_pfn, start_pfn,
+ last_pfn);
+ }
- /* If range covers entire pagetable, free it */
- if (!(start_pfn > level_pfn ||
+ /*
+ * Free the page table if we're below the level we want to
+ * retain and the range covers the entire table.
+ */
+ if (level < retain_level && !(start_pfn > level_pfn ||
last_pfn < level_pfn + level_size(level) - 1)) {
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
@@ -1176,10 +1143,14 @@ next:
} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
}
-/* clear last level (leaf) ptes and free page table pages. */
+/*
+ * clear last level (leaf) ptes and free page table pages below the
+ * level we wish to keep intact.
+ */
static void dma_pte_free_pagetable(struct dmar_domain *domain,
unsigned long start_pfn,
- unsigned long last_pfn)
+ unsigned long last_pfn,
+ int retain_level)
{
BUG_ON(!domain_pfn_supported(domain, start_pfn));
BUG_ON(!domain_pfn_supported(domain, last_pfn));
@@ -1188,7 +1159,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
dma_pte_clear_range(domain, start_pfn, last_pfn);
/* We don't need lock here; nobody else touches the iova range */
- dma_pte_free_level(domain, agaw_to_level(domain->agaw),
+ dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
domain->pgd, 0, start_pfn, last_pfn);
/* free pgd */
@@ -1316,6 +1287,13 @@ static void dma_free_pagelist(struct page *freelist)
}
}
+static void iova_entry_free(unsigned long data)
+{
+ struct page *freelist = (struct page *)data;
+
+ dma_free_pagelist(freelist);
+}
+
/* iommu handling */
static int iommu_alloc_root_entry(struct intel_iommu *iommu)
{
@@ -1629,6 +1607,25 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
addr, mask);
}
+static void iommu_flush_iova(struct iova_domain *iovad)
+{
+ struct dmar_domain *domain;
+ int idx;
+
+ domain = container_of(iovad, struct dmar_domain, iovad);
+
+ for_each_domain_iommu(idx, domain) {
+ struct intel_iommu *iommu = g_iommus[idx];
+ u16 did = domain->iommu_did[iommu->seq_id];
+
+ iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
+
+ if (!cap_caching_mode(iommu->cap))
+ iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
+ 0, MAX_AGAW_PFN_WIDTH);
+ }
+}
+
static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
{
u32 pmen;
@@ -1939,9 +1936,16 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
{
int adjust_width, agaw;
unsigned long sagaw;
+ int err;
init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
DMA_32BIT_PFN);
+
+ err = init_iova_flush_queue(&domain->iovad,
+ iommu_flush_iova, iova_entry_free);
+ if (err)
+ return err;
+
domain_reserve_special_ranges(domain);
/* calculate AGAW */
@@ -1993,14 +1997,6 @@ static void domain_exit(struct dmar_domain *domain)
if (!domain)
return;
- /* Flush any lazy unmaps that may reference this domain */
- if (!intel_iommu_strict) {
- int cpu;
-
- for_each_possible_cpu(cpu)
- flush_unmaps_timeout(cpu);
- }
-
/* Remove associated devices and clear attached or cached domains */
rcu_read_lock();
domain_remove_dev_info(domain);
@@ -2284,8 +2280,11 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
/*
* Ensure that old small page tables are
* removed to make room for superpage(s).
+ * We're adding new large pages, so make sure
+ * we don't remove their parent tables.
*/
- dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
+ dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
+ largepage_lvl + 1);
} else {
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
}
@@ -2358,13 +2357,33 @@ static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long i
static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
{
+ unsigned long flags;
+ struct context_entry *context;
+ u16 did_old;
+
if (!iommu)
return;
- clear_context_table(iommu, bus, devfn);
- iommu->flush.flush_context(iommu, 0, 0, 0,
- DMA_CCMD_GLOBAL_INVL);
- iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+ spin_lock_irqsave(&iommu->lock, flags);
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context) {
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ return;
+ }
+ did_old = context_domain_id(context);
+ context_clear_entry(context);
+ __iommu_flush_cache(iommu, context, sizeof(*context));
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ iommu->flush.flush_context(iommu,
+ did_old,
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT,
+ DMA_CCMD_DEVICE_INVL);
+ iommu->flush.flush_iotlb(iommu,
+ did_old,
+ 0,
+ 0,
+ DMA_TLB_DSI_FLUSH);
}
static inline void unlink_domain_info(struct device_domain_info *info)
@@ -3213,7 +3232,7 @@ static int __init init_dmars(void)
bool copied_tables = false;
struct device *dev;
struct intel_iommu *iommu;
- int i, ret, cpu;
+ int i, ret;
/*
* for each drhd
@@ -3246,22 +3265,6 @@ static int __init init_dmars(void)
goto error;
}
- for_each_possible_cpu(cpu) {
- struct deferred_flush_data *dfd = per_cpu_ptr(&deferred_flush,
- cpu);
-
- dfd->tables = kzalloc(g_num_of_iommus *
- sizeof(struct deferred_flush_table),
- GFP_KERNEL);
- if (!dfd->tables) {
- ret = -ENOMEM;
- goto free_g_iommus;
- }
-
- spin_lock_init(&dfd->lock);
- setup_timer(&dfd->timer, flush_unmaps_timeout, cpu);
- }
-
for_each_active_iommu(iommu, drhd) {
g_iommus[iommu->seq_id] = iommu;
@@ -3444,10 +3447,9 @@ free_iommu:
disable_dmar_iommu(iommu);
free_dmar_iommu(iommu);
}
-free_g_iommus:
- for_each_possible_cpu(cpu)
- kfree(per_cpu_ptr(&deferred_flush, cpu)->tables);
+
kfree(g_iommus);
+
error:
return ret;
}
@@ -3652,110 +3654,6 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page,
dir, *dev->dma_mask);
}
-static void flush_unmaps(struct deferred_flush_data *flush_data)
-{
- int i, j;
-
- flush_data->timer_on = 0;
-
- /* just flush them all */
- for (i = 0; i < g_num_of_iommus; i++) {
- struct intel_iommu *iommu = g_iommus[i];
- struct deferred_flush_table *flush_table =
- &flush_data->tables[i];
- if (!iommu)
- continue;
-
- if (!flush_table->next)
- continue;
-
- /* In caching mode, global flushes turn emulation expensive */
- if (!cap_caching_mode(iommu->cap))
- iommu->flush.flush_iotlb(iommu, 0, 0, 0,
- DMA_TLB_GLOBAL_FLUSH);
- for (j = 0; j < flush_table->next; j++) {
- unsigned long mask;
- struct deferred_flush_entry *entry =
- &flush_table->entries[j];
- unsigned long iova_pfn = entry->iova_pfn;
- unsigned long nrpages = entry->nrpages;
- struct dmar_domain *domain = entry->domain;
- struct page *freelist = entry->freelist;
-
- /* On real hardware multiple invalidations are expensive */
- if (cap_caching_mode(iommu->cap))
- iommu_flush_iotlb_psi(iommu, domain,
- mm_to_dma_pfn(iova_pfn),
- nrpages, !freelist, 0);
- else {
- mask = ilog2(nrpages);
- iommu_flush_dev_iotlb(domain,
- (uint64_t)iova_pfn << PAGE_SHIFT, mask);
- }
- free_iova_fast(&domain->iovad, iova_pfn, nrpages);
- if (freelist)
- dma_free_pagelist(freelist);
- }
- flush_table->next = 0;
- }
-
- flush_data->size = 0;
-}
-
-static void flush_unmaps_timeout(unsigned long cpuid)
-{
- struct deferred_flush_data *flush_data = per_cpu_ptr(&deferred_flush, cpuid);
- unsigned long flags;
-
- spin_lock_irqsave(&flush_data->lock, flags);
- flush_unmaps(flush_data);
- spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
-static void add_unmap(struct dmar_domain *dom, unsigned long iova_pfn,
- unsigned long nrpages, struct page *freelist)
-{
- unsigned long flags;
- int entry_id, iommu_id;
- struct intel_iommu *iommu;
- struct deferred_flush_entry *entry;
- struct deferred_flush_data *flush_data;
-
- flush_data = raw_cpu_ptr(&deferred_flush);
-
- /* Flush all CPUs' entries to avoid deferring too much. If
- * this becomes a bottleneck, can just flush us, and rely on
- * flush timer for the rest.
- */
- if (flush_data->size == HIGH_WATER_MARK) {
- int cpu;
-
- for_each_online_cpu(cpu)
- flush_unmaps_timeout(cpu);
- }
-
- spin_lock_irqsave(&flush_data->lock, flags);
-
- iommu = domain_get_iommu(dom);
- iommu_id = iommu->seq_id;
-
- entry_id = flush_data->tables[iommu_id].next;
- ++(flush_data->tables[iommu_id].next);
-
- entry = &flush_data->tables[iommu_id].entries[entry_id];
- entry->domain = dom;
- entry->iova_pfn = iova_pfn;
- entry->nrpages = nrpages;
- entry->freelist = freelist;
-
- if (!flush_data->timer_on) {
- mod_timer(&flush_data->timer, jiffies + msecs_to_jiffies(10));
- flush_data->timer_on = 1;
- }
- flush_data->size++;
- spin_unlock_irqrestore(&flush_data->lock, flags);
-}
-
static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
{
struct dmar_domain *domain;
@@ -3791,7 +3689,8 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(nrpages));
dma_free_pagelist(freelist);
} else {
- add_unmap(domain, iova_pfn, nrpages, freelist);
+ queue_iova(&domain->iovad, iova_pfn, nrpages,
+ (unsigned long)freelist);
/*
* queue up the release of the unmap to save the 1/6th of the
* cpu used up by the iotlb flush operation...
@@ -3945,7 +3844,8 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
if (unlikely(ret)) {
dma_pte_free_pagetable(domain, start_vpfn,
- start_vpfn + size - 1);
+ start_vpfn + size - 1,
+ agaw_to_level(domain->agaw) + 1);
free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size));
return 0;
}
@@ -4728,7 +4628,6 @@ static void free_all_cpu_cached_iovas(unsigned int cpu)
static int intel_iommu_cpu_dead(unsigned int cpu)
{
free_all_cpu_cached_iovas(cpu);
- flush_unmaps_timeout(cpu);
return 0;
}
@@ -5350,7 +5249,8 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd
sdev->sid = PCI_DEVID(info->bus, info->devfn);
if (!(ctx_lo & CONTEXT_PASIDE)) {
- context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
+ if (iommu->pasid_state_table)
+ context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
context[1].lo = (u64)virt_to_phys(iommu->pasid_table) |
intel_iommu_get_pts(iommu);