diff mbox

[v10,1/6] VT-d: Introduce new fields in msi_desc to track binding with guest interrupt

Message ID 1489554682-6126-2-git-send-email-chao.gao@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Chao Gao March 15, 2017, 5:11 a.m. UTC
Currently, msi_msg_to_remap_entry is buggy when the live IRTE is in posted
format. Straightforwardly, we can let caller specify which format of IRTE they
want update to. But the problem is current callers are not aware of the
binding with guest interrupt. Making all callers be aware of the binding with
guest interrupt will cause a far more complicated change. Also some callings
happen in interrupt context where they can't acquire d->event_lock to read
struct hvm_pirq_dpci.

This patch introduces two new fields in msi_desc to track binding with a guest
interrupt such that msi_msg_to_remap_entry() can get the binding and update
IRTE accordingly. After that change, pi_update_irte() can utilize
msi_msg_to_remap_entry() to update IRTE to posted format.

Signed-off-by: Feng Wu <feng.wu@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
---
v10:
- Newly added.

 xen/arch/x86/msi.c                     |   1 +
 xen/drivers/passthrough/vtd/intremap.c | 148 +++++++--------------------------
 xen/include/asm-x86/msi.h              |   2 +
 3 files changed, 34 insertions(+), 117 deletions(-)

Comments

Jan Beulich March 15, 2017, 4:41 p.m. UTC | #1
>>> On 15.03.17 at 06:11, <chao.gao@intel.com> wrote:
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -552,11 +552,12 @@ static int msi_msg_to_remap_entry(
>      struct msi_desc *msi_desc, struct msi_msg *msg)
>  {
>      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
> -    struct iremap_entry new_ire;
> +    struct iremap_entry new_ire = {{0}};

Any reason this isn't simple "{ }"?

> @@ -595,33 +596,35 @@ static int msi_msg_to_remap_entry(
>      GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
>                       iremap_entries, iremap_entry);
>  
> -    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
> -
> -    /* Set interrupt remapping table entry */
> -    new_ire.remap.fpd = 0;
> -    new_ire.remap.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
> -    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
> -    new_ire.remap.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
> -    /* Hardware require RH = 1 for LPR delivery mode */
> -    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> -    new_ire.remap.avail = 0;
> -    new_ire.remap.res_1 = 0;
> -    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
> -                            MSI_DATA_VECTOR_MASK;
> -    new_ire.remap.res_2 = 0;
> -    if ( x2apic_enabled )
> -        new_ire.remap.dst = msg->dest32;
> +    if ( !pi_desc )
> +    {
> +        new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
> +        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
> +        new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
> +        /* Hardware require RH = 1 for LPR delivery mode */

As you're touching this anyway, please make it "requires" and
"lowest priority" respectively.

> @@ -968,59 +927,14 @@ int pi_update_irte(const struct vcpu *v, const struct pirq *pirq,
>          rc = -ENODEV;
>          goto unlock_out;
>      }
> -
> -    pci_dev = msi_desc->dev;
> -    if ( !pci_dev )
> -    {
> -        rc = -ENODEV;
> -        goto unlock_out;
> -    }
> -
> -    remap_index = msi_desc->remap_index;
> +    msi_desc->pi_desc = pi_desc;
> +    msi_desc->gvec = gvec;

Am I overlooking something - I can't seem to find any place where these
two fields (or at least the former) get cleared again? This may be correct,
but if it is the reason wants recording in the commit message.

> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -118,6 +118,8 @@ struct msi_desc {
>  	struct msi_msg msg;		/* Last set MSI message */
>  
>  	int remap_index;		/* index in interrupt remapping table */
> +	const void *pi_desc;		/* PDA, indicates msi is delivered via VT-d PI */

Why "void"? Please let's play type safe wherever we can.

Jan
Chao Gao March 15, 2017, 9:21 p.m. UTC | #2
On Wed, Mar 15, 2017 at 10:41:13AM -0600, Jan Beulich wrote:
>>>> On 15.03.17 at 06:11, <chao.gao@intel.com> wrote:
>> --- a/xen/drivers/passthrough/vtd/intremap.c
>> +++ b/xen/drivers/passthrough/vtd/intremap.c
>> @@ -552,11 +552,12 @@ static int msi_msg_to_remap_entry(
>>      struct msi_desc *msi_desc, struct msi_msg *msg)
>>  {
>>      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
>> -    struct iremap_entry new_ire;
>> +    struct iremap_entry new_ire = {{0}};
>
>Any reason this isn't simple "{ }"?
>

I also think '{}' can work. But here is the compiler output:
intremap.c: In function ‘msi_msg_to_remap_entry’:
intremap.c:587:12: error: missing braces around initializer [-Werror=missing-braces]
     struct iremap_entry new_ire = {0};
            ^
intremap.c:587:12: error: (near initialization for ‘new_ire.<anonymous>’) [-Werror=missing-braces]

>> @@ -595,33 +596,35 @@ static int msi_msg_to_remap_entry(
>>      GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
>>                       iremap_entries, iremap_entry);
>>  
>> -    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
>> -
>> -    /* Set interrupt remapping table entry */
>> -    new_ire.remap.fpd = 0;
>> -    new_ire.remap.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
>> -    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
>> -    new_ire.remap.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
>> -    /* Hardware require RH = 1 for LPR delivery mode */
>> -    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
>> -    new_ire.remap.avail = 0;
>> -    new_ire.remap.res_1 = 0;
>> -    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
>> -                            MSI_DATA_VECTOR_MASK;
>> -    new_ire.remap.res_2 = 0;
>> -    if ( x2apic_enabled )
>> -        new_ire.remap.dst = msg->dest32;
>> +    if ( !pi_desc )
>> +    {
>> +        new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
>> +        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
>> +        new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
>> +        /* Hardware require RH = 1 for LPR delivery mode */
>
>As you're touching this anyway, please make it "requires" and
>"lowest priority" respectively.

OK.

>
>> @@ -968,59 +927,14 @@ int pi_update_irte(const struct vcpu *v, const struct pirq *pirq,
>>          rc = -ENODEV;
>>          goto unlock_out;
>>      }
>> -
>> -    pci_dev = msi_desc->dev;
>> -    if ( !pci_dev )
>> -    {
>> -        rc = -ENODEV;
>> -        goto unlock_out;
>> -    }
>> -
>> -    remap_index = msi_desc->remap_index;
>> +    msi_desc->pi_desc = pi_desc;
>> +    msi_desc->gvec = gvec;
>
>Am I overlooking something - I can't seem to find any place where these
>two fields (or at least the former) get cleared again? This may be correct,
>but if it is the reason wants recording in the commit message.

IIUC, the current logic is free the whole msi_desc when device deassignment.
But it is better to clear this two fields explicitly. I will call pi_update_irte()
with @vcpu=NULL, just like Patch [6/6] when device deassignment.
Do you think the new added code should be squashed into this one?
Shall I also squash Patch [6/6] to this one? I think it is to fix another flaw.

>
>> --- a/xen/include/asm-x86/msi.h
>> +++ b/xen/include/asm-x86/msi.h
>> @@ -118,6 +118,8 @@ struct msi_desc {
>>  	struct msi_msg msg;		/* Last set MSI message */
>>  
>>  	int remap_index;		/* index in interrupt remapping table */
>> +	const void *pi_desc;		/* PDA, indicates msi is delivered via VT-d PI */
>
>Why "void"? Please let's play type safe wherever we can.

Ok.

Thank
Chao
>
>Jan
>
Jan Beulich March 16, 2017, 10:24 a.m. UTC | #3
>>> On 15.03.17 at 22:21, <chao.gao@intel.com> wrote:
> On Wed, Mar 15, 2017 at 10:41:13AM -0600, Jan Beulich wrote:
>>>>> On 15.03.17 at 06:11, <chao.gao@intel.com> wrote:
>>> --- a/xen/drivers/passthrough/vtd/intremap.c
>>> +++ b/xen/drivers/passthrough/vtd/intremap.c
>>> @@ -552,11 +552,12 @@ static int msi_msg_to_remap_entry(
>>>      struct msi_desc *msi_desc, struct msi_msg *msg)
>>>  {
>>>      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
>>> -    struct iremap_entry new_ire;
>>> +    struct iremap_entry new_ire = {{0}};
>>
>>Any reason this isn't simple "{ }"?
>>
> 
> I also think '{}' can work. But here is the compiler output:
> intremap.c: In function ‘msi_msg_to_remap_entry’:
> intremap.c:587:12: error: missing braces around initializer 
> [-Werror=missing-braces]
>      struct iremap_entry new_ire = {0};
>             ^
> intremap.c:587:12: error: (near initialization for ‘new_ire.<anonymous>’) 
> [-Werror=missing-braces]

Sure - you've left the 0 there, other than suggested.

>>> @@ -968,59 +927,14 @@ int pi_update_irte(const struct vcpu *v, const struct pirq *pirq,
>>>          rc = -ENODEV;
>>>          goto unlock_out;
>>>      }
>>> -
>>> -    pci_dev = msi_desc->dev;
>>> -    if ( !pci_dev )
>>> -    {
>>> -        rc = -ENODEV;
>>> -        goto unlock_out;
>>> -    }
>>> -
>>> -    remap_index = msi_desc->remap_index;
>>> +    msi_desc->pi_desc = pi_desc;
>>> +    msi_desc->gvec = gvec;
>>
>>Am I overlooking something - I can't seem to find any place where these
>>two fields (or at least the former) get cleared again? This may be correct,
>>but if it is the reason wants recording in the commit message.
> 
> IIUC, the current logic is free the whole msi_desc when device deassignment.
> But it is better to clear this two fields explicitly. I will call pi_update_irte()
> with @vcpu=NULL, just like Patch [6/6] when device deassignment.
> Do you think the new added code should be squashed into this one?

Not sure which code specifically you think about.

> Shall I also squash Patch [6/6] to this one? I think it is to fix another 
> flaw.

I haven't got around to look at patch 6 yet.

Jan
Chao Gao March 22, 2017, 12:18 a.m. UTC | #4
On Wed, Mar 22, 2017 at 01:59:19PM +0800, Tian, Kevin wrote:
>> From: Gao, Chao
>> Sent: Wednesday, March 15, 2017 1:11 PM
>> 
>> Currently, msi_msg_to_remap_entry is buggy when the live IRTE is in posted
>> format. Straightforwardly, we can let caller specify which format of IRTE they
>> want update to. But the problem is current callers are not aware of the
>> binding with guest interrupt. Making all callers be aware of the binding with
>> guest interrupt will cause a far more complicated change. Also some callings
>> happen in interrupt context where they can't acquire d->event_lock to read
>> struct hvm_pirq_dpci.
>
>Above text is unclear to me. Are you trying to explain why current 
>code is buggy (which I don't get the point) or simply for mitigation
>options which were once considered but dropped for some reasons?
>

the latter. I will divide them into two sections and add more description about
why current code is buggy.

>> 
>> diff --git a/xen/drivers/passthrough/vtd/intremap.c
>> b/xen/drivers/passthrough/vtd/intremap.c
>> index bfd468b..6202ece 100644
>> --- a/xen/drivers/passthrough/vtd/intremap.c
>> +++ b/xen/drivers/passthrough/vtd/intremap.c
>> @@ -552,11 +552,12 @@ static int msi_msg_to_remap_entry(
>>      struct msi_desc *msi_desc, struct msi_msg *msg)  {
>>      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
>> -    struct iremap_entry new_ire;
>> +    struct iremap_entry new_ire = {{0}};
>>      struct msi_msg_remap_entry *remap_rte;
>>      unsigned int index, i, nr = 1;
>>      unsigned long flags;
>>      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
>> +    const struct pi_desc *pi_desc = msi_desc->pi_desc;
>> 
>>      if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
>>          nr = msi_desc->msi.nvec;
>> @@ -595,33 +596,35 @@ static int msi_msg_to_remap_entry(
>>      GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
>>                       iremap_entries, iremap_entry);
>> 
>> -    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
>> -
>> -    /* Set interrupt remapping table entry */
>> -    new_ire.remap.fpd = 0;
>> -    new_ire.remap.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT)
>> & 0x1;
>> -    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
>> -    new_ire.remap.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT)
>> & 0x1;
>> -    /* Hardware require RH = 1 for LPR delivery mode */
>> -    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
>> -    new_ire.remap.avail = 0;
>> -    new_ire.remap.res_1 = 0;
>> -    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
>> -                            MSI_DATA_VECTOR_MASK;
>> -    new_ire.remap.res_2 = 0;
>> -    if ( x2apic_enabled )
>> -        new_ire.remap.dst = msg->dest32;
>> +    if ( !pi_desc )
>> +    {
>> +        new_ire.remap.dm = msg->address_lo >>
>> MSI_ADDR_DESTMODE_SHIFT;
>> +        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
>> +        new_ire.remap.dlm = msg->data >>
>> MSI_DATA_DELIVERY_MODE_SHIFT;
>> +        /* Hardware require RH = 1 for LPR delivery mode */
>> +        new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
>> +        new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
>> +                                MSI_DATA_VECTOR_MASK;
>> +        if ( x2apic_enabled )
>> +            new_ire.remap.dst = msg->dest32;
>> +        else
>> +            new_ire.remap.dst =
>> +                MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
>> +        new_ire.remap.p = 1;
>
>Old code also touches fpd, res_1/2/3/4, which are abandoned
>above. Can you elaborate?
>

We have initialized new_ire to zero so I remove all the lines that assign 0 to fields.

>> diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h index
>> 9c02945..3286692 100644
>> --- a/xen/include/asm-x86/msi.h
>> +++ b/xen/include/asm-x86/msi.h
>> @@ -118,6 +118,8 @@ struct msi_desc {
>>  	struct msi_msg msg;		/* Last set MSI message */
>> 
>>  	int remap_index;		/* index in interrupt remapping table
>> */
>> +	const void *pi_desc;		/* PDA, indicates msi is delivered via
>> VT-d PI */
>
>what's PDA?

Posted Descriptor Address which is recorded in posted format IRTE.

How about just use "Indicates msi is delivered via VT-d PI" because of the line width
limitation?

>
>> +	uint8_t gvec;			/* guest vector. valid when pi_desc
>> isn't NULL */
>>  };
>> 
>>  /*
>> --
>> 1.8.3.1
>
Tian, Kevin March 22, 2017, 5:59 a.m. UTC | #5
> From: Gao, Chao
> Sent: Wednesday, March 15, 2017 1:11 PM
> 
> Currently, msi_msg_to_remap_entry is buggy when the live IRTE is in posted
> format. Straightforwardly, we can let caller specify which format of IRTE they
> want update to. But the problem is current callers are not aware of the
> binding with guest interrupt. Making all callers be aware of the binding with
> guest interrupt will cause a far more complicated change. Also some callings
> happen in interrupt context where they can't acquire d->event_lock to read
> struct hvm_pirq_dpci.

Above text is unclear to me. Are you trying to explain why current 
code is buggy (which I don't get the point) or simply for mitigation
options which were once considered but dropped for some reasons?

> 
> This patch introduces two new fields in msi_desc to track binding with a
> guest interrupt such that msi_msg_to_remap_entry() can get the binding and
> update IRTE accordingly. After that change, pi_update_irte() can utilize
> msi_msg_to_remap_entry() to update IRTE to posted format.
> 
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> ---
> v10:
> - Newly added.
> 
>  xen/arch/x86/msi.c                     |   1 +
>  xen/drivers/passthrough/vtd/intremap.c | 148 +++++++--------------------------
>  xen/include/asm-x86/msi.h              |   2 +
>  3 files changed, 34 insertions(+), 117 deletions(-)
> 
> diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c index
> a868007..3374cd4 100644
> --- a/xen/arch/x86/msi.c
> +++ b/xen/arch/x86/msi.c
> @@ -578,6 +578,7 @@ static struct msi_desc *alloc_msi_entry(unsigned int
> nr)
>          entry[nr].dev = NULL;
>          entry[nr].irq = -1;
>          entry[nr].remap_index = -1;
> +        entry[nr].pi_desc = NULL;
>      }
> 
>      return entry;
> diff --git a/xen/drivers/passthrough/vtd/intremap.c
> b/xen/drivers/passthrough/vtd/intremap.c
> index bfd468b..6202ece 100644
> --- a/xen/drivers/passthrough/vtd/intremap.c
> +++ b/xen/drivers/passthrough/vtd/intremap.c
> @@ -552,11 +552,12 @@ static int msi_msg_to_remap_entry(
>      struct msi_desc *msi_desc, struct msi_msg *msg)  {
>      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
> -    struct iremap_entry new_ire;
> +    struct iremap_entry new_ire = {{0}};
>      struct msi_msg_remap_entry *remap_rte;
>      unsigned int index, i, nr = 1;
>      unsigned long flags;
>      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
> +    const struct pi_desc *pi_desc = msi_desc->pi_desc;
> 
>      if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
>          nr = msi_desc->msi.nvec;
> @@ -595,33 +596,35 @@ static int msi_msg_to_remap_entry(
>      GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
>                       iremap_entries, iremap_entry);
> 
> -    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
> -
> -    /* Set interrupt remapping table entry */
> -    new_ire.remap.fpd = 0;
> -    new_ire.remap.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT)
> & 0x1;
> -    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
> -    new_ire.remap.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT)
> & 0x1;
> -    /* Hardware require RH = 1 for LPR delivery mode */
> -    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> -    new_ire.remap.avail = 0;
> -    new_ire.remap.res_1 = 0;
> -    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
> -                            MSI_DATA_VECTOR_MASK;
> -    new_ire.remap.res_2 = 0;
> -    if ( x2apic_enabled )
> -        new_ire.remap.dst = msg->dest32;
> +    if ( !pi_desc )
> +    {
> +        new_ire.remap.dm = msg->address_lo >>
> MSI_ADDR_DESTMODE_SHIFT;
> +        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
> +        new_ire.remap.dlm = msg->data >>
> MSI_DATA_DELIVERY_MODE_SHIFT;
> +        /* Hardware require RH = 1 for LPR delivery mode */
> +        new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> +        new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
> +                                MSI_DATA_VECTOR_MASK;
> +        if ( x2apic_enabled )
> +            new_ire.remap.dst = msg->dest32;
> +        else
> +            new_ire.remap.dst =
> +                MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
> +        new_ire.remap.p = 1;

Old code also touches fpd, res_1/2/3/4, which are abandoned
above. Can you elaborate?

> +    }
>      else
> -        new_ire.remap.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
> -                             & 0xff) << 8;
> +    {
> +        new_ire.post.im = 1;
> +        new_ire.post.vector = msi_desc->gvec;
> +        new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
> +        new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
> +        new_ire.post.p = 1;
> +    }
> 
>      if ( pdev )
>          set_msi_source_id(pdev, &new_ire);
>      else
>          set_hpet_source_id(msi_desc->hpet_id, &new_ire);
> -    new_ire.remap.res_3 = 0;
> -    new_ire.remap.res_4 = 0;
> -    new_ire.remap.p = 1;    /* finally, set present bit */
> 
>      /* now construct new MSI/MSI-X rte entry */
>      remap_rte = (struct msi_msg_remap_entry *)msg; @@ -902,42 +905,6
> @@ void iommu_disable_x2apic_IR(void)
>          disable_qinval(drhd->iommu);
>  }
> 
> -static void setup_posted_irte(
> -    struct iremap_entry *new_ire, const struct iremap_entry *old_ire,
> -    const struct pi_desc *pi_desc, const uint8_t gvec)
> -{
> -    memset(new_ire, 0, sizeof(*new_ire));
> -
> -    /*
> -     * 'im' filed decides whether the irte is in posted format (with value 1)
> -     * or remapped format (with value 0), if the old irte is in remapped format,
> -     * we copy things from remapped part in 'struct iremap_entry', otherwise,
> -     * we copy from posted part.
> -     */
> -    if ( !old_ire->remap.im )
> -    {
> -        new_ire->post.p = old_ire->remap.p;
> -        new_ire->post.fpd = old_ire->remap.fpd;
> -        new_ire->post.sid = old_ire->remap.sid;
> -        new_ire->post.sq = old_ire->remap.sq;
> -        new_ire->post.svt = old_ire->remap.svt;
> -    }
> -    else
> -    {
> -        new_ire->post.p = old_ire->post.p;
> -        new_ire->post.fpd = old_ire->post.fpd;
> -        new_ire->post.sid = old_ire->post.sid;
> -        new_ire->post.sq = old_ire->post.sq;
> -        new_ire->post.svt = old_ire->post.svt;
> -        new_ire->post.urg = old_ire->post.urg;
> -    }
> -
> -    new_ire->post.im = 1;
> -    new_ire->post.vector = gvec;
> -    new_ire->post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
> -    new_ire->post.pda_h = virt_to_maddr(pi_desc) >> 32;
> -}
> -
>  /*
>   * This function is used to update the IRTE for posted-interrupt
>   * when guest changes MSI/MSI-X information.
> @@ -946,17 +913,9 @@ int pi_update_irte(const struct vcpu *v, const struct
> pirq *pirq,
>      const uint8_t gvec)
>  {
>      struct irq_desc *desc;
> -    const struct msi_desc *msi_desc;
> -    int remap_index;
> -    int rc = 0;
> -    const struct pci_dev *pci_dev;
> -    const struct acpi_drhd_unit *drhd;
> -    struct iommu *iommu;
> -    struct ir_ctrl *ir_ctrl;
> -    struct iremap_entry *iremap_entries = NULL, *p = NULL;
> -    struct iremap_entry new_ire, old_ire;
> +    struct msi_desc *msi_desc;
>      const struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
> -    __uint128_t ret;
> +    int rc;
> 
>      desc = pirq_spin_lock_irq_desc(pirq, NULL);
>      if ( !desc )
> @@ -968,59 +927,14 @@ int pi_update_irte(const struct vcpu *v, const
> struct pirq *pirq,
>          rc = -ENODEV;
>          goto unlock_out;
>      }
> -
> -    pci_dev = msi_desc->dev;
> -    if ( !pci_dev )
> -    {
> -        rc = -ENODEV;
> -        goto unlock_out;
> -    }
> -
> -    remap_index = msi_desc->remap_index;
> +    msi_desc->pi_desc = pi_desc;
> +    msi_desc->gvec = gvec;
> 
>      spin_unlock_irq(&desc->lock);
> 
>      ASSERT(pcidevs_locked());
> -
> -    /*
> -     * FIXME: For performance reasons we should store the 'iommu' pointer
> in
> -     * 'struct msi_desc' in some other place, so we don't need to waste
> -     * time searching it here.
> -     */
> -    drhd = acpi_find_matched_drhd_unit(pci_dev);
> -    if ( !drhd )
> -        return -ENODEV;
> -
> -    iommu = drhd->iommu;
> -    ir_ctrl = iommu_ir_ctrl(iommu);
> -    if ( !ir_ctrl )
> -        return -ENODEV;
> -
> -    spin_lock_irq(&ir_ctrl->iremap_lock);
> -
> -    GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index,
> iremap_entries, p);
> -
> -    old_ire = *p;
> -
> -    /* Setup/Update interrupt remapping table entry. */
> -    setup_posted_irte(&new_ire, &old_ire, pi_desc, gvec);
> -    ret = cmpxchg16b(p, &old_ire, &new_ire);
> -
> -    /*
> -     * In the above, we use cmpxchg16 to atomically update the 128-bit IRTE,
> -     * and the hardware cannot update the IRTE behind us, so the return
> value
> -     * of cmpxchg16 should be the same as old_ire. This ASSERT validate it.
> -     */
> -    ASSERT(ret == old_ire.val);
> -
> -    iommu_flush_cache_entry(p, sizeof(*p));
> -    iommu_flush_iec_index(iommu, 0, remap_index);
> -
> -    unmap_vtd_domain_page(iremap_entries);
> -
> -    spin_unlock_irq(&ir_ctrl->iremap_lock);
> -
> -    return 0;
> +    rc = iommu_update_ire_from_msi(msi_desc, &msi_desc->msg);
> +    return rc;

just "return iommu_update_ire..."

> 
>   unlock_out:
>      spin_unlock_irq(&desc->lock);
> diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h index
> 9c02945..3286692 100644
> --- a/xen/include/asm-x86/msi.h
> +++ b/xen/include/asm-x86/msi.h
> @@ -118,6 +118,8 @@ struct msi_desc {
>  	struct msi_msg msg;		/* Last set MSI message */
> 
>  	int remap_index;		/* index in interrupt remapping table
> */
> +	const void *pi_desc;		/* PDA, indicates msi is delivered via
> VT-d PI */

what's PDA?

> +	uint8_t gvec;			/* guest vector. valid when pi_desc
> isn't NULL */
>  };
> 
>  /*
> --
> 1.8.3.1
Tian, Kevin March 22, 2017, 8:32 a.m. UTC | #6
> From: Gao, Chao
> Sent: Wednesday, March 22, 2017 8:19 AM
> 
> On Wed, Mar 22, 2017 at 01:59:19PM +0800, Tian, Kevin wrote:
> >> From: Gao, Chao
> >> Sent: Wednesday, March 15, 2017 1:11 PM
> >>
> >> Currently, msi_msg_to_remap_entry is buggy when the live IRTE is in
> >> posted format. Straightforwardly, we can let caller specify which
> >> format of IRTE they want update to. But the problem is current
> >> callers are not aware of the binding with guest interrupt. Making all
> >> callers be aware of the binding with guest interrupt will cause a far
> >> more complicated change. Also some callings happen in interrupt
> >> context where they can't acquire d->event_lock to read struct
> hvm_pirq_dpci.
> >
> >Above text is unclear to me. Are you trying to explain why current code
> >is buggy (which I don't get the point) or simply for mitigation options
> >which were once considered but dropped for some reasons?
> >
> 
> the latter. I will divide them into two sections and add more description
> about why current code is buggy.
> 
> >>
> >> diff --git a/xen/drivers/passthrough/vtd/intremap.c
> >> b/xen/drivers/passthrough/vtd/intremap.c
> >> index bfd468b..6202ece 100644
> >> --- a/xen/drivers/passthrough/vtd/intremap.c
> >> +++ b/xen/drivers/passthrough/vtd/intremap.c
> >> @@ -552,11 +552,12 @@ static int msi_msg_to_remap_entry(
> >>      struct msi_desc *msi_desc, struct msi_msg *msg)  {
> >>      struct iremap_entry *iremap_entry = NULL, *iremap_entries;
> >> -    struct iremap_entry new_ire;
> >> +    struct iremap_entry new_ire = {{0}};
> >>      struct msi_msg_remap_entry *remap_rte;
> >>      unsigned int index, i, nr = 1;
> >>      unsigned long flags;
> >>      struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
> >> +    const struct pi_desc *pi_desc = msi_desc->pi_desc;
> >>
> >>      if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
> >>          nr = msi_desc->msi.nvec;
> >> @@ -595,33 +596,35 @@ static int msi_msg_to_remap_entry(
> >>      GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
> >>                       iremap_entries, iremap_entry);
> >>
> >> -    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
> >> -
> >> -    /* Set interrupt remapping table entry */
> >> -    new_ire.remap.fpd = 0;
> >> -    new_ire.remap.dm = (msg->address_lo >>
> MSI_ADDR_DESTMODE_SHIFT)
> >> & 0x1;
> >> -    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
> >> -    new_ire.remap.dlm = (msg->data >>
> MSI_DATA_DELIVERY_MODE_SHIFT)
> >> & 0x1;
> >> -    /* Hardware require RH = 1 for LPR delivery mode */
> >> -    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> >> -    new_ire.remap.avail = 0;
> >> -    new_ire.remap.res_1 = 0;
> >> -    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
> >> -                            MSI_DATA_VECTOR_MASK;
> >> -    new_ire.remap.res_2 = 0;
> >> -    if ( x2apic_enabled )
> >> -        new_ire.remap.dst = msg->dest32;
> >> +    if ( !pi_desc )
> >> +    {
> >> +        new_ire.remap.dm = msg->address_lo >>
> >> MSI_ADDR_DESTMODE_SHIFT;
> >> +        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
> >> +        new_ire.remap.dlm = msg->data >>
> >> MSI_DATA_DELIVERY_MODE_SHIFT;
> >> +        /* Hardware require RH = 1 for LPR delivery mode */
> >> +        new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
> >> +        new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT)
> &
> >> +                                MSI_DATA_VECTOR_MASK;
> >> +        if ( x2apic_enabled )
> >> +            new_ire.remap.dst = msg->dest32;
> >> +        else
> >> +            new_ire.remap.dst =
> >> +                MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
> >> +        new_ire.remap.p = 1;
> >
> >Old code also touches fpd, res_1/2/3/4, which are abandoned above. Can
> >you elaborate?
> >
> 
> We have initialized new_ire to zero so I remove all the lines that assign 0 to
> fields.

I overlooked this part

> 
> >> diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h
> >> index
> >> 9c02945..3286692 100644
> >> --- a/xen/include/asm-x86/msi.h
> >> +++ b/xen/include/asm-x86/msi.h
> >> @@ -118,6 +118,8 @@ struct msi_desc {
> >>  	struct msi_msg msg;		/* Last set MSI message */
> >>
> >>  	int remap_index;		/* index in interrupt remapping table
> >> */
> >> +	const void *pi_desc;		/* PDA, indicates msi is delivered via
> >> VT-d PI */
> >
> >what's PDA?
> 
> Posted Descriptor Address which is recorded in posted format IRTE.
> 
> How about just use "Indicates msi is delivered via VT-d PI" because of the line
> width limitation?
> 

Just "Pointer to posted descriptor".
diff mbox

Patch

diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index a868007..3374cd4 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -578,6 +578,7 @@  static struct msi_desc *alloc_msi_entry(unsigned int nr)
         entry[nr].dev = NULL;
         entry[nr].irq = -1;
         entry[nr].remap_index = -1;
+        entry[nr].pi_desc = NULL;
     }
 
     return entry;
diff --git a/xen/drivers/passthrough/vtd/intremap.c b/xen/drivers/passthrough/vtd/intremap.c
index bfd468b..6202ece 100644
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -552,11 +552,12 @@  static int msi_msg_to_remap_entry(
     struct msi_desc *msi_desc, struct msi_msg *msg)
 {
     struct iremap_entry *iremap_entry = NULL, *iremap_entries;
-    struct iremap_entry new_ire;
+    struct iremap_entry new_ire = {{0}};
     struct msi_msg_remap_entry *remap_rte;
     unsigned int index, i, nr = 1;
     unsigned long flags;
     struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
+    const struct pi_desc *pi_desc = msi_desc->pi_desc;
 
     if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
         nr = msi_desc->msi.nvec;
@@ -595,33 +596,35 @@  static int msi_msg_to_remap_entry(
     GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, index,
                      iremap_entries, iremap_entry);
 
-    memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
-
-    /* Set interrupt remapping table entry */
-    new_ire.remap.fpd = 0;
-    new_ire.remap.dm = (msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT) & 0x1;
-    new_ire.remap.tm = (msg->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
-    new_ire.remap.dlm = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
-    /* Hardware require RH = 1 for LPR delivery mode */
-    new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
-    new_ire.remap.avail = 0;
-    new_ire.remap.res_1 = 0;
-    new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
-                            MSI_DATA_VECTOR_MASK;
-    new_ire.remap.res_2 = 0;
-    if ( x2apic_enabled )
-        new_ire.remap.dst = msg->dest32;
+    if ( !pi_desc )
+    {
+        new_ire.remap.dm = msg->address_lo >> MSI_ADDR_DESTMODE_SHIFT;
+        new_ire.remap.tm = msg->data >> MSI_DATA_TRIGGER_SHIFT;
+        new_ire.remap.dlm = msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT;
+        /* Hardware require RH = 1 for LPR delivery mode */
+        new_ire.remap.rh = (new_ire.remap.dlm == dest_LowestPrio);
+        new_ire.remap.vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) &
+                                MSI_DATA_VECTOR_MASK;
+        if ( x2apic_enabled )
+            new_ire.remap.dst = msg->dest32;
+        else
+            new_ire.remap.dst =
+                MASK_EXTR(msg->address_lo, MSI_ADDR_DEST_ID_MASK) << 8;
+        new_ire.remap.p = 1;
+    }
     else
-        new_ire.remap.dst = ((msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT)
-                             & 0xff) << 8;
+    {
+        new_ire.post.im = 1;
+        new_ire.post.vector = msi_desc->gvec;
+        new_ire.post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
+        new_ire.post.pda_h = virt_to_maddr(pi_desc) >> 32;
+        new_ire.post.p = 1;
+    }
 
     if ( pdev )
         set_msi_source_id(pdev, &new_ire);
     else
         set_hpet_source_id(msi_desc->hpet_id, &new_ire);
-    new_ire.remap.res_3 = 0;
-    new_ire.remap.res_4 = 0;
-    new_ire.remap.p = 1;    /* finally, set present bit */
 
     /* now construct new MSI/MSI-X rte entry */
     remap_rte = (struct msi_msg_remap_entry *)msg;
@@ -902,42 +905,6 @@  void iommu_disable_x2apic_IR(void)
         disable_qinval(drhd->iommu);
 }
 
-static void setup_posted_irte(
-    struct iremap_entry *new_ire, const struct iremap_entry *old_ire,
-    const struct pi_desc *pi_desc, const uint8_t gvec)
-{
-    memset(new_ire, 0, sizeof(*new_ire));
-
-    /*
-     * 'im' filed decides whether the irte is in posted format (with value 1)
-     * or remapped format (with value 0), if the old irte is in remapped format,
-     * we copy things from remapped part in 'struct iremap_entry', otherwise,
-     * we copy from posted part.
-     */
-    if ( !old_ire->remap.im )
-    {
-        new_ire->post.p = old_ire->remap.p;
-        new_ire->post.fpd = old_ire->remap.fpd;
-        new_ire->post.sid = old_ire->remap.sid;
-        new_ire->post.sq = old_ire->remap.sq;
-        new_ire->post.svt = old_ire->remap.svt;
-    }
-    else
-    {
-        new_ire->post.p = old_ire->post.p;
-        new_ire->post.fpd = old_ire->post.fpd;
-        new_ire->post.sid = old_ire->post.sid;
-        new_ire->post.sq = old_ire->post.sq;
-        new_ire->post.svt = old_ire->post.svt;
-        new_ire->post.urg = old_ire->post.urg;
-    }
-
-    new_ire->post.im = 1;
-    new_ire->post.vector = gvec;
-    new_ire->post.pda_l = virt_to_maddr(pi_desc) >> (32 - PDA_LOW_BIT);
-    new_ire->post.pda_h = virt_to_maddr(pi_desc) >> 32;
-}
-
 /*
  * This function is used to update the IRTE for posted-interrupt
  * when guest changes MSI/MSI-X information.
@@ -946,17 +913,9 @@  int pi_update_irte(const struct vcpu *v, const struct pirq *pirq,
     const uint8_t gvec)
 {
     struct irq_desc *desc;
-    const struct msi_desc *msi_desc;
-    int remap_index;
-    int rc = 0;
-    const struct pci_dev *pci_dev;
-    const struct acpi_drhd_unit *drhd;
-    struct iommu *iommu;
-    struct ir_ctrl *ir_ctrl;
-    struct iremap_entry *iremap_entries = NULL, *p = NULL;
-    struct iremap_entry new_ire, old_ire;
+    struct msi_desc *msi_desc;
     const struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc;
-    __uint128_t ret;
+    int rc;
 
     desc = pirq_spin_lock_irq_desc(pirq, NULL);
     if ( !desc )
@@ -968,59 +927,14 @@  int pi_update_irte(const struct vcpu *v, const struct pirq *pirq,
         rc = -ENODEV;
         goto unlock_out;
     }
-
-    pci_dev = msi_desc->dev;
-    if ( !pci_dev )
-    {
-        rc = -ENODEV;
-        goto unlock_out;
-    }
-
-    remap_index = msi_desc->remap_index;
+    msi_desc->pi_desc = pi_desc;
+    msi_desc->gvec = gvec;
 
     spin_unlock_irq(&desc->lock);
 
     ASSERT(pcidevs_locked());
-
-    /*
-     * FIXME: For performance reasons we should store the 'iommu' pointer in
-     * 'struct msi_desc' in some other place, so we don't need to waste
-     * time searching it here.
-     */
-    drhd = acpi_find_matched_drhd_unit(pci_dev);
-    if ( !drhd )
-        return -ENODEV;
-
-    iommu = drhd->iommu;
-    ir_ctrl = iommu_ir_ctrl(iommu);
-    if ( !ir_ctrl )
-        return -ENODEV;
-
-    spin_lock_irq(&ir_ctrl->iremap_lock);
-
-    GET_IREMAP_ENTRY(ir_ctrl->iremap_maddr, remap_index, iremap_entries, p);
-
-    old_ire = *p;
-
-    /* Setup/Update interrupt remapping table entry. */
-    setup_posted_irte(&new_ire, &old_ire, pi_desc, gvec);
-    ret = cmpxchg16b(p, &old_ire, &new_ire);
-
-    /*
-     * In the above, we use cmpxchg16 to atomically update the 128-bit IRTE,
-     * and the hardware cannot update the IRTE behind us, so the return value
-     * of cmpxchg16 should be the same as old_ire. This ASSERT validate it.
-     */
-    ASSERT(ret == old_ire.val);
-
-    iommu_flush_cache_entry(p, sizeof(*p));
-    iommu_flush_iec_index(iommu, 0, remap_index);
-
-    unmap_vtd_domain_page(iremap_entries);
-
-    spin_unlock_irq(&ir_ctrl->iremap_lock);
-
-    return 0;
+    rc = iommu_update_ire_from_msi(msi_desc, &msi_desc->msg);
+    return rc;
 
  unlock_out:
     spin_unlock_irq(&desc->lock);
diff --git a/xen/include/asm-x86/msi.h b/xen/include/asm-x86/msi.h
index 9c02945..3286692 100644
--- a/xen/include/asm-x86/msi.h
+++ b/xen/include/asm-x86/msi.h
@@ -118,6 +118,8 @@  struct msi_desc {
 	struct msi_msg msg;		/* Last set MSI message */
 
 	int remap_index;		/* index in interrupt remapping table */
+	const void *pi_desc;		/* PDA, indicates msi is delivered via VT-d PI */
+	uint8_t gvec;			/* guest vector. valid when pi_desc isn't NULL */
 };
 
 /*