From 9494ea90a56d013f4257686c8daf49203cd900c0 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:35 +0800 Subject: Revert "iommu/amd: Suppress IO_PAGE_FAULTs in kdump kernel" This reverts commit 54bd63570484167cb13edf81e31fff107b879981. We still need the IO_PAGE_FAULT message to warn error after the issue of on-flight dma in kdump kernel is fixed. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 354cbd6392cd..6d2fc40a086d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2086,8 +2086,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) flags |= tmp; } - - flags &= ~(DTE_FLAG_SA | 0xffffULL); + flags &= ~(0xffffUL); flags |= domain->id; amd_iommu_dev_table[devid].data[1] = flags; -- cgit v1.2.3 From 07a80a6b5920873a8b161ac49c5c12db7af30c0f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:36 +0800 Subject: iommu/amd: Define bit fields for DTE particularly In AMD-Vi spec several bits of IO PTE fields and DTE fields are similar so that both of them can share the same MACRO definition. However defining them respectively can make code more read-able. Do it now. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 8 ++++---- drivers/iommu/amd_iommu_types.h | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 6d2fc40a086d..097db07354b4 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1537,9 +1537,9 @@ static int iommu_map_page(struct protection_domain *dom, if (count > 1) { __pte = PAGE_SIZE_PTE(phys_addr, page_size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC; + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; } else - __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC; + __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC; if (prot & IOMMU_PROT_IR) __pte |= IOMMU_PTE_IR; @@ -2053,7 +2053,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; + pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; flags = amd_iommu_dev_table[devid].data[1]; @@ -2096,7 +2096,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) static void clear_dte_entry(u16 devid) { /* remove entry from the device table seen by the hardware */ - amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; + amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV; amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK; amd_iommu_apply_erratum_63(devid); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index db7ceb4d0957..f88e802481a3 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -265,7 +265,7 @@ #define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) #define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ - IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) + IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW) #define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) #define PM_MAP_4k 0 @@ -314,13 +314,23 @@ #define PTE_LEVEL_PAGE_SIZE(level) \ (1ULL << (12 + (9 * (level)))) -#define IOMMU_PTE_P (1ULL << 0) -#define IOMMU_PTE_TV (1ULL << 1) +/* + * Bit value definition for I/O PTE fields + */ +#define IOMMU_PTE_PR (1ULL << 0) #define IOMMU_PTE_U (1ULL << 59) #define IOMMU_PTE_FC (1ULL << 60) #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) +/* + * Bit value definition for DTE fields + */ +#define DTE_FLAG_V (1ULL << 0) +#define DTE_FLAG_TV (1ULL << 1) +#define DTE_FLAG_IR (1ULL << 61) +#define DTE_FLAG_IW (1ULL << 62) + #define DTE_FLAG_IOTLB (1ULL << 32) #define DTE_FLAG_GV (1ULL << 55) #define DTE_FLAG_MASK (0x3ffULL << 32) @@ -342,7 +352,7 @@ #define GCR3_VALID 0x01ULL #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) -#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) +#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR) #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07) -- cgit v1.2.3 From 45a01c42933b93e59811099f97aa4179d499a42c Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:37 +0800 Subject: iommu/amd: Add function copy_dev_tables() Add function copy_dev_tables to copy the old DEV table entries of the panicked kernel to the new allocated device table. Since all iommus share the same device table the copy only need be done one time. Here add a new global old_dev_tbl_cpy to point to the newly allocated device table which the content of old device table will be copied to. Besides, we also need to: - Check whether all IOMMUs actually use the same device table with the same size - Verify that the size of the old device table is the expected size. - Reserve the old domain id occupied in 1st kernel to avoid touching the old io-page tables. Then on-flight DMA can continue looking it up. And also define MACRO DEV_DOMID_MASK to replace magic number 0xffffULL, it can be reused in copy_dev_tables(). Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/amd_iommu_init.c | 62 +++++++++++++++++++++++++++++++++++++++++ drivers/iommu/amd_iommu_types.h | 1 + 3 files changed, 64 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 097db07354b4..b22b58b33400 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2086,7 +2086,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats) flags |= tmp; } - flags &= ~(0xffffUL); + flags &= ~DEV_DOMID_MASK; flags |= domain->id; amd_iommu_dev_table[devid].data[1] = flags; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 7044510654fe..e2857204d32a 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -195,6 +195,11 @@ spinlock_t amd_iommu_pd_lock; * page table root pointer. */ struct dev_table_entry *amd_iommu_dev_table; +/* + * Pointer to a device table which the content of old device table + * will be copied to. It's only be used in kdump kernel. + */ +static struct dev_table_entry *old_dev_tbl_cpy; /* * The alias table is a driver specific data structure which contains the @@ -842,6 +847,63 @@ static int get_dev_entry_bit(u16 devid, u8 bit) } +static bool copy_device_table(void) +{ + struct dev_table_entry *old_devtb = NULL; + u32 lo, hi, devid, old_devtb_size; + phys_addr_t old_devtb_phys; + u64 entry, last_entry = 0; + struct amd_iommu *iommu; + u16 dom_id, dte_v; + gfp_t gfp_flag; + + + pr_warn("Translation is already enabled - trying to copy translation structures\n"); + for_each_iommu(iommu) { + /* All IOMMUs should use the same device table with the same size */ + lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET); + hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); + entry = (((u64) hi) << 32) + lo; + if (last_entry && last_entry != entry) { + pr_err("IOMMU:%d should use the same dev table as others!/n", + iommu->index); + return false; + } + last_entry = entry; + + old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; + if (old_devtb_size != dev_table_size) { + pr_err("The device table size of IOMMU:%d is not expected!/n", + iommu->index); + return false; + } + } + + old_devtb_phys = entry & PAGE_MASK; + old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); + if (!old_devtb) + return false; + + gfp_flag = GFP_KERNEL | __GFP_ZERO; + old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, + get_order(dev_table_size)); + if (old_dev_tbl_cpy == NULL) { + pr_err("Failed to allocate memory for copying old device table!/n"); + return false; + } + + for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { + old_dev_tbl_cpy[devid] = old_devtb[devid]; + dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; + dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; + if (dte_v && dom_id) + __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); + } + memunmap(old_devtb); + + return true; +} + void amd_iommu_apply_erratum_63(u16 devid) { int sysmgt; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f88e802481a3..a7f6cf8c841e 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -336,6 +336,7 @@ #define DTE_FLAG_MASK (0x3ffULL << 32) #define DTE_GLX_SHIFT (56) #define DTE_GLX_MASK (3) +#define DEV_DOMID_MASK 0xffffULL #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) #define DTE_GCR3_VAL_B(x) (((x) >> 15) & 0x0ffffULL) -- cgit v1.2.3 From 53019a9e88cc14bae2780ba807faba87a5829891 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:39 +0800 Subject: iommu/amd: Do sanity check for address translation and irq remap of old dev table entry Firstly split the dev table entry copy into address translation part and irq remapping part. Because these two parts could be enabled independently. Secondly do sanity check for address translation and irq remap of old dev table entry separately. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 5 ----- drivers/iommu/amd_iommu_init.c | 23 ++++++++++++++++++++--- drivers/iommu/amd_iommu_types.h | 8 ++++++++ 3 files changed, 28 insertions(+), 8 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b22b58b33400..dab901b4f0f9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3776,11 +3776,6 @@ EXPORT_SYMBOL(amd_iommu_device_info); static struct irq_chip amd_ir_chip; -#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) -#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) -#define DTE_IRQ_TABLE_LEN (8ULL << 1) -#define DTE_IRQ_REMAP_ENABLE 1ULL - static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) { u64 dte; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 959c25d997e1..d08ad74b0928 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -852,12 +852,12 @@ static int get_dev_entry_bit(u16 devid, u8 bit) static bool copy_device_table(void) { + u64 int_ctl, int_tab_len, entry, last_entry = 0; struct dev_table_entry *old_devtb = NULL; u32 lo, hi, devid, old_devtb_size; phys_addr_t old_devtb_phys; - u64 entry, last_entry = 0; struct amd_iommu *iommu; - u16 dom_id, dte_v; + u16 dom_id, dte_v, irq_v; gfp_t gfp_flag; if (!amd_iommu_pre_enabled) @@ -901,8 +901,25 @@ static bool copy_device_table(void) old_dev_tbl_cpy[devid] = old_devtb[devid]; dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK; dte_v = old_devtb[devid].data[0] & DTE_FLAG_V; - if (dte_v && dom_id) + + if (dte_v && dom_id) { + old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; + old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); + } + + irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; + int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; + int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK; + if (irq_v && (int_ctl || int_tab_len)) { + if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || + (int_tab_len != DTE_IRQ_TABLE_LEN)) { + pr_err("Wrong old irq remapping flag: %#x\n", devid); + return false; + } + + old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2]; + } } memunmap(old_devtb); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index a7f6cf8c841e..f0979183ec9b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -250,6 +250,14 @@ #define GA_GUEST_NR 0x1 +/* Bit value definition for dte irq remapping fields*/ +#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) +#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) +#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1) +#define DTE_IRQ_REMAP_INTCTL (2ULL << 60) +#define DTE_IRQ_TABLE_LEN (8ULL << 1) +#define DTE_IRQ_REMAP_ENABLE 1ULL + #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 -- cgit v1.2.3 From df3f7a6e8e855e4ff533508807cd7c3723faa51f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:41 +0800 Subject: iommu/amd: Use is_attach_deferred call-back Implement call-back is_attach_deferred and use it to defer the domain attach from iommu driver init to device driver init when iommu is pre-enabled in kdump kernel. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index dab901b4f0f9..eebf4590cef9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -121,6 +121,7 @@ struct iommu_dev_data { PPR completions */ u32 errata; /* Bitmap for errata to apply */ bool use_vapic; /* Enable device to use vapic mode */ + bool defer_attach; struct ratelimit_state rs; /* Ratelimit IOPF messages */ }; @@ -371,12 +372,17 @@ static u16 get_alias(struct device *dev) static struct iommu_dev_data *find_dev_data(u16 devid) { struct iommu_dev_data *dev_data; + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; dev_data = search_dev_data(devid); - if (dev_data == NULL) + if (dev_data == NULL) { dev_data = alloc_dev_data(devid); + if (translation_pre_enabled(iommu)) + dev_data->defer_attach = true; + } + return dev_data; } @@ -2477,11 +2483,18 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) static struct protection_domain *get_domain(struct device *dev) { struct protection_domain *domain; + struct iommu_domain *io_domain; if (!check_device(dev)) return ERR_PTR(-EINVAL); domain = get_dev_data(dev)->domain; + if (domain == NULL && get_dev_data(dev)->defer_attach) { + get_dev_data(dev)->defer_attach = false; + io_domain = iommu_get_domain_for_dev(dev); + domain = to_pdomain(io_domain); + attach_device(dev, domain); + } if (!dma_ops_domain(domain)) return ERR_PTR(-EBUSY); @@ -3372,6 +3385,13 @@ static void amd_iommu_apply_resv_region(struct device *dev, WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL); } +static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, + struct device *dev) +{ + struct iommu_dev_data *dev_data = dev->archdata.iommu; + return dev_data->defer_attach; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -3388,6 +3408,7 @@ const struct iommu_ops amd_iommu_ops = { .get_resv_regions = amd_iommu_get_resv_regions, .put_resv_regions = amd_iommu_put_resv_regions, .apply_resv_region = amd_iommu_apply_resv_region, + .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, }; -- cgit v1.2.3 From daae2d25a4779b272a66ddd01f5810bcee822b9e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 9 Aug 2017 16:33:43 +0800 Subject: iommu/amd: Don't copy GCR3 table root pointer When iommu is pre_enabled in kdump kernel, if a device is set up with guest translations (DTE.GV=1), then don't copy GCR3 table root pointer but move the device over to an empty guest-cr3 table and handle the faults in the PPR log (which answer them with INVALID). After all these PPR faults are recoverable for the device and we should not allow the device to change old-kernels data when we don't have to. Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 28 +++------------------------- drivers/iommu/amd_iommu_init.c | 12 ++++++++++++ drivers/iommu/amd_iommu_proto.h | 1 + drivers/iommu/amd_iommu_types.h | 24 ++++++++++++++++++++++++ drivers/iommu/amd_iommu_v2.c | 18 +++++++++++++++++- 5 files changed, 57 insertions(+), 26 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index eebf4590cef9..9e8ea1907796 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -102,30 +102,6 @@ int amd_iommu_max_glx_val = -1; static const struct dma_map_ops amd_iommu_dma_ops; -/* - * This struct contains device specific data for the IOMMU - */ -struct iommu_dev_data { - struct list_head list; /* For domain->dev_list */ - struct list_head dev_data_list; /* For global dev_data_list */ - struct protection_domain *domain; /* Domain the device is bound to */ - u16 devid; /* PCI Device ID */ - u16 alias; /* Alias Device ID */ - bool iommu_v2; /* Device can make use of IOMMUv2 */ - bool passthrough; /* Device is identity mapped */ - struct { - bool enabled; - int qdep; - } ats; /* ATS state */ - bool pri_tlp; /* PASID TLB required for - PPR completions */ - u32 errata; /* Bitmap for errata to apply */ - bool use_vapic; /* Enable device to use vapic mode */ - bool defer_attach; - - struct ratelimit_state rs; /* Ratelimit IOPF messages */ -}; - /* * general struct to manage commands send to an IOMMU */ @@ -386,10 +362,11 @@ static struct iommu_dev_data *find_dev_data(u16 devid) return dev_data; } -static struct iommu_dev_data *get_dev_data(struct device *dev) +struct iommu_dev_data *get_dev_data(struct device *dev) { return dev->archdata.iommu; } +EXPORT_SYMBOL(get_dev_data); /* * Find or create an IOMMU group for a acpihid device. @@ -2540,6 +2517,7 @@ static int dir2prot(enum dma_data_direction direction) else return 0; } + /* * This function contains common code for mapping of a physically * contiguous memory region into DMA address space. It is used by all diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index c348732f27d7..88e7a6e950ae 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -214,6 +214,7 @@ u16 *amd_iommu_alias_table; * for a specific device. It is also indexed by the PCI device id. */ struct amd_iommu **amd_iommu_rlookup_table; +EXPORT_SYMBOL(amd_iommu_rlookup_table); /* * This table is used to find the irq remapping table for a given device id @@ -269,6 +270,7 @@ bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); } +EXPORT_SYMBOL(translation_pre_enabled); static void clear_translation_pre_enabled(struct amd_iommu *iommu) { @@ -859,6 +861,7 @@ static bool copy_device_table(void) struct amd_iommu *iommu; u16 dom_id, dte_v, irq_v; gfp_t gfp_flag; + u64 tmp; if (!amd_iommu_pre_enabled) return false; @@ -910,6 +913,15 @@ static bool copy_device_table(void) old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0]; old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1]; __set_bit(dom_id, amd_iommu_pd_alloc_bitmap); + /* If gcr3 table existed, mask it out */ + if (old_devtb[devid].data[0] & DTE_FLAG_GV) { + tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B; + tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C; + old_dev_tbl_cpy[devid].data[1] &= ~tmp; + tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A; + tmp |= DTE_FLAG_GV; + old_dev_tbl_cpy[devid].data[0] &= ~tmp; + } } irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index a9666d2005bb..90e62e9b01c5 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -88,4 +88,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) } extern bool translation_pre_enabled(struct amd_iommu *iommu); +extern struct iommu_dev_data *get_dev_data(struct device *dev); #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index f0979183ec9b..9e5af13be7c5 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -618,6 +618,30 @@ struct devid_map { bool cmd_line; }; +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct list_head dev_data_list; /* For global dev_data_list */ + struct protection_domain *domain; /* Domain the device is bound to */ + u16 devid; /* PCI Device ID */ + u16 alias; /* Alias Device ID */ + bool iommu_v2; /* Device can make use of IOMMUv2 */ + bool passthrough; /* Device is identity mapped */ + struct { + bool enabled; + int qdep; + } ats; /* ATS state */ + bool pri_tlp; /* PASID TLB required for + PPR completions */ + u32 errata; /* Bitmap for errata to apply */ + bool use_vapic; /* Enable device to use vapic mode */ + bool defer_attach; + + struct ratelimit_state rs; /* Ratelimit IOPF messages */ +}; + /* Map HPET and IOAPIC ids to the devid used by the IOMMU */ extern struct list_head ioapic_map; extern struct list_head hpet_map; diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 6629c472eafd..e705fac89cb4 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -562,14 +562,30 @@ static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) unsigned long flags; struct fault *fault; bool finish; - u16 tag; + u16 tag, devid; int ret; + struct iommu_dev_data *dev_data; + struct pci_dev *pdev = NULL; iommu_fault = data; tag = iommu_fault->tag & 0x1ff; finish = (iommu_fault->tag >> 9) & 1; + devid = iommu_fault->device_id; + pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff); + if (!pdev) + return -ENODEV; + dev_data = get_dev_data(&pdev->dev); + + /* In kdump kernel pci dev is not initialized yet -> send INVALID */ ret = NOTIFY_DONE; + if (translation_pre_enabled(amd_iommu_rlookup_table[devid]) + && dev_data->defer_attach) { + amd_iommu_complete_ppr(pdev, iommu_fault->pasid, + PPR_INVALID, tag); + goto out; + } + dev_state = get_device_state(iommu_fault->device_id); if (dev_state == NULL) goto out; -- cgit v1.2.3 From 9003d6186321e22b19125721b6fb2aa390ff8be6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 10 Aug 2017 17:19:13 +0200 Subject: iommu/amd: Make use of iova queue flushing Rip out the implementation in the AMD IOMMU driver and use the one in the common iova code instead. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 229 ++-------------------------------------------- 1 file changed, 9 insertions(+), 220 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 688e77576e5a..cabcaa506ed6 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -137,20 +137,7 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); - -#define FLUSH_QUEUE_SIZE 256 - -struct flush_queue_entry { - unsigned long iova_pfn; - unsigned long pages; - u64 counter; /* Flush counter when this entry was added to the queue */ -}; - -struct flush_queue { - struct flush_queue_entry *entries; - unsigned head, tail; - spinlock_t lock; -}; +static void iova_domain_flush_tlb(struct iova_domain *iovad); /* * Data container for a dma_ops specific protection domain @@ -161,36 +148,6 @@ struct dma_ops_domain { /* IOVA RB-Tree */ struct iova_domain iovad; - - struct flush_queue __percpu *flush_queue; - - /* - * We need two counter here to be race-free wrt. IOTLB flushing and - * adding entries to the flush queue. - * - * The flush_start_cnt is incremented _before_ the IOTLB flush starts. - * New entries added to the flush ring-buffer get their 'counter' value - * from here. This way we can make sure that entries added to the queue - * (or other per-cpu queues of the same domain) while the TLB is about - * to be flushed are not considered to be flushed already. - */ - atomic64_t flush_start_cnt; - - /* - * The flush_finish_cnt is incremented when an IOTLB flush is complete. - * This value is always smaller than flush_start_cnt. The queue_add - * function frees all IOVAs that have a counter value smaller than - * flush_finish_cnt. This makes sure that we only free IOVAs that are - * flushed out of the IOTLB of the domain. - */ - atomic64_t flush_finish_cnt; - - /* - * Timer to make sure we don't keep IOVAs around unflushed - * for too long - */ - struct timer_list flush_timer; - atomic_t flush_timer_on; }; static struct iova_domain reserved_iova_ranges; @@ -1788,178 +1745,19 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } -static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - kfree(queue->entries); - } - - free_percpu(dom->flush_queue); - - dom->flush_queue = NULL; -} - -static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom) -{ - int cpu; - - atomic64_set(&dom->flush_start_cnt, 0); - atomic64_set(&dom->flush_finish_cnt, 0); - - dom->flush_queue = alloc_percpu(struct flush_queue); - if (!dom->flush_queue) - return -ENOMEM; - - /* First make sure everything is cleared */ - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - queue->head = 0; - queue->tail = 0; - queue->entries = NULL; - } - - /* Now start doing the allocation */ - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries), - GFP_KERNEL); - if (!queue->entries) { - dma_ops_domain_free_flush_queue(dom); - return -ENOMEM; - } - - spin_lock_init(&queue->lock); - } - - return 0; -} - static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) { - atomic64_inc(&dom->flush_start_cnt); domain_flush_tlb(&dom->domain); domain_flush_complete(&dom->domain); - atomic64_inc(&dom->flush_finish_cnt); } -static inline bool queue_ring_full(struct flush_queue *queue) +static void iova_domain_flush_tlb(struct iova_domain *iovad) { - assert_spin_locked(&queue->lock); - - return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head); -} + struct dma_ops_domain *dom; -#define queue_ring_for_each(i, q) \ - for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE) - -static inline unsigned queue_ring_add(struct flush_queue *queue) -{ - unsigned idx = queue->tail; - - assert_spin_locked(&queue->lock); - queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE; - - return idx; -} - -static inline void queue_ring_remove_head(struct flush_queue *queue) -{ - assert_spin_locked(&queue->lock); - queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE; -} - -static void queue_ring_free_flushed(struct dma_ops_domain *dom, - struct flush_queue *queue) -{ - u64 counter = atomic64_read(&dom->flush_finish_cnt); - int idx; - - queue_ring_for_each(idx, queue) { - /* - * This assumes that counter values in the ring-buffer are - * monotonously rising. - */ - if (queue->entries[idx].counter >= counter) - break; - - free_iova_fast(&dom->iovad, - queue->entries[idx].iova_pfn, - queue->entries[idx].pages); - - queue_ring_remove_head(queue); - } -} - -static void queue_add(struct dma_ops_domain *dom, - unsigned long address, unsigned long pages) -{ - struct flush_queue *queue; - unsigned long flags; - int idx; - - pages = __roundup_pow_of_two(pages); - address >>= PAGE_SHIFT; - - queue = get_cpu_ptr(dom->flush_queue); - spin_lock_irqsave(&queue->lock, flags); - - /* - * First remove the enries from the ring-buffer that are already - * flushed to make the below queue_ring_full() check less likely - */ - queue_ring_free_flushed(dom, queue); - - /* - * When ring-queue is full, flush the entries from the IOTLB so - * that we can free all entries with queue_ring_free_flushed() - * below. - */ - if (queue_ring_full(queue)) { - dma_ops_domain_flush_tlb(dom); - queue_ring_free_flushed(dom, queue); - } - - idx = queue_ring_add(queue); - - queue->entries[idx].iova_pfn = address; - queue->entries[idx].pages = pages; - queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt); - - spin_unlock_irqrestore(&queue->lock, flags); - - if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0) - mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10)); - - put_cpu_ptr(dom->flush_queue); -} - -static void queue_flush_timeout(unsigned long data) -{ - struct dma_ops_domain *dom = (struct dma_ops_domain *)data; - int cpu; - - atomic_set(&dom->flush_timer_on, 0); + dom = container_of(iovad, struct dma_ops_domain, iovad); dma_ops_domain_flush_tlb(dom); - - for_each_possible_cpu(cpu) { - struct flush_queue *queue; - unsigned long flags; - - queue = per_cpu_ptr(dom->flush_queue, cpu); - spin_lock_irqsave(&queue->lock, flags); - queue_ring_free_flushed(dom, queue); - spin_unlock_irqrestore(&queue->lock, flags); - } } /* @@ -1973,11 +1771,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) del_domain_from_list(&dom->domain); - if (timer_pending(&dom->flush_timer)) - del_timer(&dom->flush_timer); - - dma_ops_domain_free_flush_queue(dom); - put_iova_domain(&dom->iovad); free_pagetable(&dom->domain); @@ -2013,16 +1806,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN, DMA_32BIT_PFN); - /* Initialize reserved ranges */ - copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); - - if (dma_ops_domain_alloc_flush_queue(dma_dom)) + if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) goto free_dma_dom; - setup_timer(&dma_dom->flush_timer, queue_flush_timeout, - (unsigned long)dma_dom); - - atomic_set(&dma_dom->flush_timer_on, 0); + /* Initialize reserved ranges */ + copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad); add_domain_to_list(&dma_dom->domain); @@ -2619,7 +2407,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, domain_flush_tlb(&dma_dom->domain); domain_flush_complete(&dma_dom->domain); } else { - queue_add(dma_dom, dma_addr, pages); + pages = __roundup_pow_of_two(pages); + queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0); } } -- cgit v1.2.3 From ec62b1ab0f4ccbc48aa8b9852cc25b38a1f12d1e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Thu, 24 Aug 2017 21:13:57 +0800 Subject: iommu/amd: Check if domain is NULL in get_domain() and return -EBUSY In get_domain(), 'domain' could be NULL before it's passed to dma_ops_domain() to dereference. And the current code calling get_domain() can't deal with the returned 'domain' well if its value is NULL. So before dma_ops_domain() calling, check if 'domain' is NULL, If yes just return ERR_PTR(-EBUSY) directly. Reported-by: Dan Carpenter Fixes: df3f7a6e8e85 ('iommu/amd: Use is_attach_deferred call-back') Signed-off-by: Baoquan He Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 9e8ea1907796..b531307a9360 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2472,6 +2472,9 @@ static struct protection_domain *get_domain(struct device *dev) domain = to_pdomain(io_domain); attach_device(dev, domain); } + if (domain == NULL) + return ERR_PTR(-EBUSY); + if (!dma_ops_domain(domain)) return ERR_PTR(-EBUSY); -- cgit v1.2.3 From 0688a09990986cd8c2fda26afb04ce0a599ced3f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 23 Aug 2017 15:50:03 +0200 Subject: iommu/amd: Rename a few flush functions Rename a few iommu cache-flush functions that start with iommu_ to start with amd_iommu now. This is to prevent name collisions with generic iommu code later on. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/iommu/amd_iommu.c') diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index cabcaa506ed6..7798fcf41b9b 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1122,7 +1122,7 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid) return iommu_queue_command(iommu, &cmd); } -static void iommu_flush_dte_all(struct amd_iommu *iommu) +static void amd_iommu_flush_dte_all(struct amd_iommu *iommu) { u32 devid; @@ -1136,7 +1136,7 @@ static void iommu_flush_dte_all(struct amd_iommu *iommu) * This function uses heavy locking and may disable irqs for some time. But * this is no issue because it is only called during resume. */ -static void iommu_flush_tlb_all(struct amd_iommu *iommu) +static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu) { u32 dom_id; @@ -1150,7 +1150,7 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu) iommu_completion_wait(iommu); } -static void iommu_flush_all(struct amd_iommu *iommu) +static void amd_iommu_flush_all(struct amd_iommu *iommu) { struct iommu_cmd cmd; @@ -1169,7 +1169,7 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid) iommu_queue_command(iommu, &cmd); } -static void iommu_flush_irt_all(struct amd_iommu *iommu) +static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) { u32 devid; @@ -1182,11 +1182,11 @@ static void iommu_flush_irt_all(struct amd_iommu *iommu) void iommu_flush_all_caches(struct amd_iommu *iommu) { if (iommu_feature(iommu, FEATURE_IA)) { - iommu_flush_all(iommu); + amd_iommu_flush_all(iommu); } else { - iommu_flush_dte_all(iommu); - iommu_flush_irt_all(iommu); - iommu_flush_tlb_all(iommu); + amd_iommu_flush_dte_all(iommu); + amd_iommu_flush_irt_all(iommu); + amd_iommu_flush_tlb_all(iommu); } } -- cgit v1.2.3