diff mbox

[PART2,RFC,v1,5/9] iommu/amd: Introduce amd_iommu_update_ga()

Message ID 1460119770-2896-6-git-send-email-Suravee.Suthikulpanit@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Suthikulpanit, Suravee April 8, 2016, 12:49 p.m. UTC
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

This patch introduces a new IOMMU interface, amd_iommu_update_ga(),
which allows KVM (SVM) to update existing posted interrupt IOMMU IRTE when
load/unload vcpu.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd_iommu.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/amd-iommu.h |  8 ++++++
 2 files changed, 78 insertions(+)

Comments

Radim Krčmář April 13, 2016, 5:06 p.m. UTC | #1
2016-04-08 07:49-0500, Suravee Suthikulpanit:
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> 
> This patch introduces a new IOMMU interface, amd_iommu_update_ga(),
> which allows KVM (SVM) to update existing posted interrupt IOMMU IRTE when
> load/unload vcpu.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> ---
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> @@ -4330,4 +4330,74 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
> +int amd_iommu_update_ga(u32 vcpu_id, u32 cpu, u32 ga_tag,

It'd be nicer to generate the tag on SVM side and pass it whole -- IOMMU
doesn't have to care how hypervisors use the tag.

> +			u64 base, bool is_run)
> +{
> +	unsigned long flags;
> +	struct amd_iommu *iommu;
> +
> +	if (amd_iommu_guest_ir < AMD_IOMMU_GUEST_IR_GA)
> +		return 0;
> +
> +	for_each_iommu(iommu) {
> +		struct amd_ir_data *ir_data;
> +
> +		spin_lock_irqsave(&iommu->ga_hash_lock, flags);
> +
> +		hash_for_each_possible(iommu->ga_hash, ir_data, hnode,
> +				       AMD_IOMMU_GATAG(ga_tag, vcpu_id)) {

All tags can map into the same bucket.  Code below doesn't check that
the ir_data belongs to the tag and will modify unrelated IRTEs.

Have you considered a per-VCPU list of IRTEs on the SVM side?

> +			struct iommu_dev_data *dev_data;
> +			if (!ir_data)

(ir_data can't be NULL.)

> +				break;
> +
> +			dev_data = search_dev_data(ir_data->irq_2_irte.devid);
> +
> +			if (!dev_data || !dev_data->guest_mode)
> +				continue;

(guest_mode can be also read from the irte.)

> +			set_irte_ga(iommu, ir_data->irq_2_irte.devid,
> +				    base, cpu, is_run);

set_irte_ga() is pretty expensive -- do we need to invalidate the irt
when changing cpu and is_run?

2.2.5.2 Interrupt Virtualization Tables with Guest Virtual APIC Enabled,
point 9, bullet 5 says that IRTE is read from memory before considering
IsRun, GATag and Destination, which makes me think that avoiding races
can be faster in the common case.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Suthikulpanit, Suravee June 9, 2016, 11:59 p.m. UTC | #2
Hi Radim,

On 4/13/16 12:06, Radim Krčmář wrote:
> 2016-04-08 07:49-0500, Suravee Suthikulpanit:
>> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>>
>> This patch introduces a new IOMMU interface, amd_iommu_update_ga(),
>> which allows KVM (SVM) to update existing posted interrupt IOMMU IRTE when
>> load/unload vcpu.
>>
>> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>> ---
>> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
>> @@ -4330,4 +4330,74 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
>> +int amd_iommu_update_ga(u32 vcpu_id, u32 cpu, u32 ga_tag,
>
> It'd be nicer to generate the tag on SVM side and pass it whole -- IOMMU
> doesn't have to care how hypervisors use the tag.

Actually, we are generating the tag from the SVM side currently (please 
see avic_get_next_tag() in patch 8). The amd_iommu_update_ga() is meant 
to be called from SVM side and we are passing in the tag here.

>
>> +			u64 base, bool is_run)
>> +{
>> +	unsigned long flags;
>> +	struct amd_iommu *iommu;
>> +
>> +	if (amd_iommu_guest_ir < AMD_IOMMU_GUEST_IR_GA)
>> +		return 0;
>> +
>> +	for_each_iommu(iommu) {
>> +		struct amd_ir_data *ir_data;
>> +
>> +		spin_lock_irqsave(&iommu->ga_hash_lock, flags);
>> +
>> +		hash_for_each_possible(iommu->ga_hash, ir_data, hnode,
>> +				       AMD_IOMMU_GATAG(ga_tag, vcpu_id)) {
>
> All tags can map into the same bucket.  Code below doesn't check that
> the ir_data belongs to the tag and will modify unrelated IRTEs.
>
> Have you considered a per-VCPU list of IRTEs on the SVM side?

Actually, the hash key is basically vm-id and vcpu-id. So, this should 
get us all the ir_data for a specific vcpu in a particular VM.

>> +			set_irte_ga(iommu, ir_data->irq_2_irte.devid,
>> +				    base, cpu, is_run);
>
> set_irte_ga() is pretty expensive -- do we need to invalidate the irt
> when changing cpu and is_run?

You are right -- I think we can actually keep a pointer to IRTE in the 
amd_ir_data, and use that to directly get to the IRTE when we need to 
update the GA mode related stuff. That way, we don't need to go through 
the whole interrupt remapping table.

> 2.2.5.2 Interrupt Virtualization Tables with Guest Virtual APIC Enabled,
> point 9, bullet 5 says that IRTE is read from memory before considering
> IsRun, GATag and Destination, which makes me think that avoiding races
> can be faster in the common case.

Right.

I'm working on sending out V2 soon.

Thanks,
Suravee



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 8cdde339..1d17597 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -4330,4 +4330,74 @@  int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
 
 	return 0;
 }
+
+static int
+set_irte_ga(struct amd_iommu *iommu, unsigned int devid,
+	    u64 base, int cpu, bool is_run)
+{
+	struct irq_remap_table *irt = get_irq_table(devid, false);
+	unsigned long flags;
+	int index;
+
+	if (!irt)
+		return -ENODEV;
+
+	spin_lock_irqsave(&irt->lock, flags);
+
+	for (index = irt->min_index; index < MAX_IRQS_PER_TABLE; ++index) {
+		struct irte_ga *irte = amd_iommu_get_irte(irt, index);
+
+		if (!irte->lo.fields_vapic.guest_mode)
+			continue;
+
+		irte->hi.fields.ga_root_ptr = (base >> 12);
+		irte->lo.fields_vapic.destination = cpu;
+		irte->lo.fields_vapic.is_run = is_run;
+		barrier();
+	}
+
+	spin_unlock_irqrestore(&irt->lock, flags);
+
+	iommu_flush_irt(iommu, devid);
+	iommu_completion_wait(iommu);
+
+	return 0;
+}
+
+int amd_iommu_update_ga(u32 vcpu_id, u32 cpu, u32 ga_tag,
+			u64 base, bool is_run)
+{
+	unsigned long flags;
+	struct amd_iommu *iommu;
+
+	if (amd_iommu_guest_ir < AMD_IOMMU_GUEST_IR_GA)
+		return 0;
+
+	for_each_iommu(iommu) {
+		struct amd_ir_data *ir_data;
+
+		spin_lock_irqsave(&iommu->ga_hash_lock, flags);
+
+		hash_for_each_possible(iommu->ga_hash, ir_data, hnode,
+				       AMD_IOMMU_GATAG(ga_tag, vcpu_id)) {
+			struct iommu_dev_data *dev_data;
+
+			if (!ir_data)
+				break;
+
+			dev_data = search_dev_data(ir_data->irq_2_irte.devid);
+
+			if (!dev_data || !dev_data->guest_mode)
+				continue;
+
+			set_irte_ga(iommu, ir_data->irq_2_irte.devid,
+				    base, cpu, is_run);
+		}
+
+		spin_unlock_irqrestore(&iommu->ga_hash_lock, flags);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(amd_iommu_update_ga);
 #endif
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 36648fe..e52cee5 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -173,6 +173,9 @@  extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
 extern int
 amd_iommu_register_ga_log_notifier(int (*notifier)(int, int, int));
 
+extern int
+amd_iommu_update_ga(u32 vcpu_id, u32 cpu, u32 ga_tag, u64 base, bool is_run);
+
 #else
 
 static inline int amd_iommu_detect(void) { return -ENODEV; }
@@ -183,6 +186,11 @@  amd_iommu_register_ga_log_notifier(int (*notifier)(int, int, int))
 	return 0;
 }
 
+static inline int
+amd_iommu_update_ga(u32 vcpu_id, u32 cpu, u32 ga_tag, u64 base, bool is_run)
+{
+	return 0;
+}
 #endif
 
 #endif /* _ASM_X86_AMD_IOMMU_H */