From 5815d9303c67cef5f47cd01e73b671e6b9c40ef3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 24 Jan 2026 17:00:21 -0400 Subject: [PATCH 1/2] iommupt: Only cache flush memory changed by unmap The cache flush was happening on every level across the whole range of iteration, even if no leafs or tables were cleared. Instead flush only the sub range that was actually written. Overflushing isn't a correctness problem but it does impact the performance of unmap. After this series the performance compared to the original VT-d implementation with cache flushing turned on is: map_pages pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 253,266 , 213,227 , 6.06 2^21, 246,244 , 221,219 , 0.00 2^30, 231,240 , 209,217 , 3.03 256*2^12, 2604,2668 , 2415,2540 , 4.04 256*2^21, 2495,2824 , 2390,2734 , 12.12 256*2^30, 2542,2845 , 2380,2718 , 12.12 unmap_pages pgsz ,avg new,old ns, min new,old ns , min % (+ve is better) 2^12, 259,292 , 222,251 , 11.11 2^21, 255,259 , 227,236 , 3.03 2^30, 238,254 , 217,230 , 5.05 256*2^12, 2751,2620 , 2417,2437 , 0.00 256*2^21, 2461,2526 , 2377,2423 , 1.01 256*2^30, 2498,2543 , 2370,2404 , 1.01 Fixes: efa03dab7ce4 ("iommupt: Flush the CPU cache after any writes to the page table") Reported-by: Francois Dugast Closes: https://lore.kernel.org/all/20260121130233.257428-1-francois.dugast@intel.com/ Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Tested-by: Francois Dugast Reviewed-by: Kevin Tian Signed-off-by: Joerg Roedel --- drivers/iommu/generic_pt/iommu_pt.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/generic_pt/iommu_pt.h b/drivers/iommu/generic_pt/iommu_pt.h index 52ef028ed2db..d575f3ba9d34 100644 --- a/drivers/iommu/generic_pt/iommu_pt.h +++ b/drivers/iommu/generic_pt/iommu_pt.h @@ -931,6 +931,8 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg, struct pt_table_p *table) { struct pt_state pts = pt_init(range, level, table); + unsigned int flush_start_index = UINT_MAX; + unsigned int flush_end_index = UINT_MAX; struct pt_unmap_args *unmap = arg; unsigned int num_oas = 0; unsigned int start_index; @@ -986,6 +988,9 @@ static __maybe_unused int __unmap_range(struct pt_range *range, void *arg, iommu_pages_list_add(&unmap->free_list, pts.table_lower); pt_clear_entries(&pts, ilog2(1)); + if (pts.index < flush_start_index) + flush_start_index = pts.index; + flush_end_index = pts.index + 1; } pts.index++; } else { @@ -999,7 +1004,10 @@ start_oa: num_contig_lg2 = pt_entry_num_contig_lg2(&pts); pt_clear_entries(&pts, num_contig_lg2); num_oas += log2_to_int(num_contig_lg2); + if (pts.index < flush_start_index) + flush_start_index = pts.index; pts.index += log2_to_int(num_contig_lg2); + flush_end_index = pts.index; } if (pts.index >= pts.end_index) break; @@ -1007,7 +1015,8 @@ start_oa: } while (true); unmap->unmapped += log2_mul(num_oas, pt_table_item_lg2sz(&pts)); - flush_writes_range(&pts, start_index, pts.index); + if (flush_start_index != flush_end_index) + flush_writes_range(&pts, flush_start_index, flush_end_index); return ret; } From 80f1a2c2332fee0edccd006fe87fc8a6db94bab3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Jan 2026 14:43:41 -0800 Subject: [PATCH 2/2] iommu/tegra241-cmdqv: Reset VCMDQ in tegra241_vcmdq_hw_init_user() The Enable bits in CMDQV/VINTF/VCMDQ_CONFIG registers do not actually reset the HW registers. So, the driver explicitly clears all the registers when a VINTF or VCMDQ is being initialized calling its hw_deinit() function. However, a userspace VCMDQ is not properly reset, unlike an in-kernel VCMDQ getting reset in tegra241_vcmdq_hw_init(). Meanwhile, tegra241_vintf_hw_init() calling tegra241_vintf_hw_deinit() will not deinit any VCMDQ, since there is no userspace VCMDQ mapped to the VINTF at that stage. Then, this may result in dirty VCMDQ registers, which can fail the VM. Like tegra241_vcmdq_hw_init(), reset a VCMDQ in tegra241_vcmdq_hw_init() to fix this bug. This is required by a host kernel. Fixes: 6717f26ab1e7 ("iommu/tegra241-cmdqv: Add user-space use support") Cc: stable@vger.kernel.org Reported-by: Bao Nguyen Signed-off-by: Nicolin Chen Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 378104cd395e..04cc7a9036e4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -1078,6 +1078,9 @@ static int tegra241_vcmdq_hw_init_user(struct tegra241_vcmdq *vcmdq) { char header[64]; + /* Reset VCMDQ */ + tegra241_vcmdq_hw_deinit(vcmdq); + /* Configure the vcmdq only; User space does the enabling */ writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE));