diff mbox series

[v2] hw/nvme: be more careful when deasserting IRQs

Message ID 20210614135429.56475-1-jakub.jermar@kernkonzept.com (mailing list archive)
State New, archived
Headers show
Series [v2] hw/nvme: be more careful when deasserting IRQs | expand

Commit Message

Jakub Jermář June 14, 2021, 1:54 p.m. UTC
An IRQ vector used by a completion queue cannot be deasserted without
first checking if the same vector does not need to stay asserted for
some other completion queue. To this end the controller structure is
extended by a counter of asserted completion queues.

To prevent incrementing the counter for completion queues that are
asserted repeatedly, each completion queue is extended by a flag which
tells whether the queue is currently asserted.

Signed-off-by: Jakub Jermar <jakub.jermar@kernkonzept.com>
---
 hw/nvme/ctrl.c | 22 ++++++++++++++++------
 hw/nvme/nvme.h |  2 ++
 2 files changed, 18 insertions(+), 6 deletions(-)

Comments

Klaus Jensen June 14, 2021, 6:19 p.m. UTC | #1
On Jun 14 15:54, Jakub Jermář wrote:
>An IRQ vector used by a completion queue cannot be deasserted without
>first checking if the same vector does not need to stay asserted for
>some other completion queue. To this end the controller structure is
>extended by a counter of asserted completion queues.
>
>To prevent incrementing the counter for completion queues that are
>asserted repeatedly, each completion queue is extended by a flag which
>tells whether the queue is currently asserted.
>

I wasn't clear on this on my last review, but the misunderstanding here 
is that the completion queue vector somehow matters for pin-based 
interrupts - it does not. There is only *one* interrupt vector and if 
the controller is not using MSI-X, then the Interrupt Vector (IV) field 
of the Create I/O Completion Queue command *must* be zero.

In other words, all that matters is if there are one or more CQEs posted 
(in any queue).

Would this (untested) patch do the trick?


diff --git i/hw/nvme/nvme.h w/hw/nvme/nvme.h
index 93a7e0e5380e..60250b579464 100644
--- i/hw/nvme/nvme.h
+++ w/hw/nvme/nvme.h
@@ -405,6 +405,7 @@ typedef struct NvmeCtrl {
      uint32_t    max_q_ents;
      uint8_t     outstanding_aers;
      uint32_t    irq_status;
+    int         cq_pending;
      uint64_t    host_timestamp;                 /* Timestamp sent by the host */
      uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
      uint64_t    starttime_ms;
diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
index 7dea64b72e6a..4de4b3177463 100644
--- i/hw/nvme/ctrl.c
+++ w/hw/nvme/ctrl.c
@@ -473,6 +473,10 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
              return;
          } else {
              assert(cq->vector < 32);
+            if (--(n->cq_pending)) {
+                return;
+            }
+
              n->irq_status &= ~(1 << cq->vector);
              nvme_irq_check(n);
          }
@@ -1258,6 +1262,7 @@ static void nvme_post_cqes(void *opaque)
      NvmeCQueue *cq = opaque;
      NvmeCtrl *n = cq->ctrl;
      NvmeRequest *req, *next;
+    bool empty = cq->head == cq->tail;
      int ret;

      QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
@@ -1287,6 +1292,10 @@ static void nvme_post_cqes(void *opaque)
          QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
      }
      if (cq->tail != cq->head) {
+        if (empty) {
+            n->cq_pending++;
+        }
+
          nvme_irq_assert(n, cq);
      }
  }


>Signed-off-by: Jakub Jermar <jakub.jermar@kernkonzept.com>
>---
> hw/nvme/ctrl.c | 22 ++++++++++++++++------
> hw/nvme/nvme.h |  2 ++
> 2 files changed, 18 insertions(+), 6 deletions(-)
>
>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>index 0bcaf7192f..97a5d768ee 100644
>--- a/hw/nvme/ctrl.c
>+++ b/hw/nvme/ctrl.c
>@@ -451,9 +451,13 @@ static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
>             msix_notify(&(n->parent_obj), cq->vector);
>         } else {
>             trace_pci_nvme_irq_pin();
>-            assert(cq->vector < 32);
>-            n->irq_status |= 1 << cq->vector;
>-            nvme_irq_check(n);
>+            if (!cq->irq_asserted) {
>+                cq->irq_asserted = true;
>+                assert(cq->vector < 32);
>+                n->irq_asserted_cnt[cq->vector]++;
>+                n->irq_status |= 1 << cq->vector;
>+                nvme_irq_check(n);
>+            }
>         }
>     } else {
>         trace_pci_nvme_irq_masked();
>@@ -466,9 +470,15 @@ static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
>         if (msix_enabled(&(n->parent_obj))) {
>             return;
>         } else {
>-            assert(cq->vector < 32);
>-            n->irq_status &= ~(1 << cq->vector);
>-            nvme_irq_check(n);
>+            if (cq->irq_asserted) {
>+                cq->irq_asserted = false;
>+                assert(cq->vector < 32);
>+                assert(n->irq_asserted_cnt[cq->vector]);
>+                if (n->irq_asserted_cnt[cq->vector]-- == 1) {
>+                    n->irq_status &= ~(1 << cq->vector);
>+                }
>+                nvme_irq_check(n);
>+            }
>         }
>     }
> }
>diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>index 81a35cda14..753bf7a923 100644
>--- a/hw/nvme/nvme.h
>+++ b/hw/nvme/nvme.h
>@@ -352,6 +352,7 @@ typedef struct NvmeCQueue {
>     uint32_t    head;
>     uint32_t    tail;
>     uint32_t    vector;
>+    bool        irq_asserted;
>     uint32_t    size;
>     uint64_t    dma_addr;
>     QEMUTimer   *timer;
>@@ -404,6 +405,7 @@ typedef struct NvmeCtrl {
>     uint32_t    max_q_ents;
>     uint8_t     outstanding_aers;
>     uint32_t    irq_status;
>+    uint16_t    irq_asserted_cnt[32];
>     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
>     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
>     uint64_t    starttime_ms;
>-- 
>2.31.1
>
>
Jakub Jermář June 15, 2021, 7:42 a.m. UTC | #2
On 6/14/21 8:19 PM, Klaus Jensen wrote:
> On Jun 14 15:54, Jakub Jermář wrote:
>> An IRQ vector used by a completion queue cannot be deasserted without
>> first checking if the same vector does not need to stay asserted for
>> some other completion queue. To this end the controller structure is
>> extended by a counter of asserted completion queues.
>>
>> To prevent incrementing the counter for completion queues that are
>> asserted repeatedly, each completion queue is extended by a flag which
>> tells whether the queue is currently asserted.
>>
> 
> I wasn't clear on this on my last review, but the misunderstanding here 
> is that the completion queue vector somehow matters for pin-based 
> interrupts - it does not. There is only *one* interrupt vector and if 
> the controller is not using MSI-X, then the Interrupt Vector (IV) field 
> of the Create I/O Completion Queue command *must* be zero.
> 
> In other words, all that matters is if there are one or more CQEs posted 
> (in any queue).

Ah, my bad. I got confused by the assert(vector < 32).

> Would this (untested) patch do the trick?

It works for my testcase, but I am not sure this will work when deassert 
is called when the IRQ is not necessarily asserted, such as in nvme_del_cq.

Jakub

> diff --git i/hw/nvme/nvme.h w/hw/nvme/nvme.h
> index 93a7e0e5380e..60250b579464 100644
> --- i/hw/nvme/nvme.h
> +++ w/hw/nvme/nvme.h
> @@ -405,6 +405,7 @@ typedef struct NvmeCtrl {
>       uint32_t    max_q_ents;
>       uint8_t     outstanding_aers;
>       uint32_t    irq_status;
> +    int         cq_pending;
>       uint64_t    host_timestamp;                 /* Timestamp sent by 
> the host */
>       uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
>       uint64_t    starttime_ms;
> diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
> index 7dea64b72e6a..4de4b3177463 100644
> --- i/hw/nvme/ctrl.c
> +++ w/hw/nvme/ctrl.c
> @@ -473,6 +473,10 @@ static void nvme_irq_deassert(NvmeCtrl *n, 
> NvmeCQueue *cq)
>               return;
>           } else {
>               assert(cq->vector < 32);
> +            if (--(n->cq_pending)) {
> +                return;
> +            }
> +
>               n->irq_status &= ~(1 << cq->vector);
>               nvme_irq_check(n);
>           }
> @@ -1258,6 +1262,7 @@ static void nvme_post_cqes(void *opaque)
>       NvmeCQueue *cq = opaque;
>       NvmeCtrl *n = cq->ctrl;
>       NvmeRequest *req, *next;
> +    bool empty = cq->head == cq->tail;
>       int ret;
> 
>       QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
> @@ -1287,6 +1292,10 @@ static void nvme_post_cqes(void *opaque)
>           QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
>       }
>       if (cq->tail != cq->head) {
> +        if (empty) {
> +            n->cq_pending++;
> +        }
> +
>           nvme_irq_assert(n, cq);
>       }
>   }
> 
> 
>> Signed-off-by: Jakub Jermar <jakub.jermar@kernkonzept.com>
>> ---
>> hw/nvme/ctrl.c | 22 ++++++++++++++++------
>> hw/nvme/nvme.h |  2 ++
>> 2 files changed, 18 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>> index 0bcaf7192f..97a5d768ee 100644
>> --- a/hw/nvme/ctrl.c
>> +++ b/hw/nvme/ctrl.c
>> @@ -451,9 +451,13 @@ static void nvme_irq_assert(NvmeCtrl *n, 
>> NvmeCQueue *cq)
>>             msix_notify(&(n->parent_obj), cq->vector);
>>         } else {
>>             trace_pci_nvme_irq_pin();
>> -            assert(cq->vector < 32);
>> -            n->irq_status |= 1 << cq->vector;
>> -            nvme_irq_check(n);
>> +            if (!cq->irq_asserted) {
>> +                cq->irq_asserted = true;
>> +                assert(cq->vector < 32);
>> +                n->irq_asserted_cnt[cq->vector]++;
>> +                n->irq_status |= 1 << cq->vector;
>> +                nvme_irq_check(n);
>> +            }
>>         }
>>     } else {
>>         trace_pci_nvme_irq_masked();
>> @@ -466,9 +470,15 @@ static void nvme_irq_deassert(NvmeCtrl *n, 
>> NvmeCQueue *cq)
>>         if (msix_enabled(&(n->parent_obj))) {
>>             return;
>>         } else {
>> -            assert(cq->vector < 32);
>> -            n->irq_status &= ~(1 << cq->vector);
>> -            nvme_irq_check(n);
>> +            if (cq->irq_asserted) {
>> +                cq->irq_asserted = false;
>> +                assert(cq->vector < 32);
>> +                assert(n->irq_asserted_cnt[cq->vector]);
>> +                if (n->irq_asserted_cnt[cq->vector]-- == 1) {
>> +                    n->irq_status &= ~(1 << cq->vector);
>> +                }
>> +                nvme_irq_check(n);
>> +            }
>>         }
>>     }
>> }
>> diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>> index 81a35cda14..753bf7a923 100644
>> --- a/hw/nvme/nvme.h
>> +++ b/hw/nvme/nvme.h
>> @@ -352,6 +352,7 @@ typedef struct NvmeCQueue {
>>     uint32_t    head;
>>     uint32_t    tail;
>>     uint32_t    vector;
>> +    bool        irq_asserted;
>>     uint32_t    size;
>>     uint64_t    dma_addr;
>>     QEMUTimer   *timer;
>> @@ -404,6 +405,7 @@ typedef struct NvmeCtrl {
>>     uint32_t    max_q_ents;
>>     uint8_t     outstanding_aers;
>>     uint32_t    irq_status;
>> +    uint16_t    irq_asserted_cnt[32];
>>     uint64_t    host_timestamp;                 /* Timestamp sent by 
>> the host */
>>     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
>>     uint64_t    starttime_ms;
>> -- 
>> 2.31.1
>>
>>
>
Klaus Jensen June 15, 2021, 8:12 a.m. UTC | #3
On Jun 15 09:42, Jakub Jermář wrote:
>On 6/14/21 8:19 PM, Klaus Jensen wrote:
>>On Jun 14 15:54, Jakub Jermář wrote:
>>>An IRQ vector used by a completion queue cannot be deasserted without
>>>first checking if the same vector does not need to stay asserted for
>>>some other completion queue. To this end the controller structure is
>>>extended by a counter of asserted completion queues.
>>>
>>>To prevent incrementing the counter for completion queues that are
>>>asserted repeatedly, each completion queue is extended by a flag which
>>>tells whether the queue is currently asserted.
>>>
>>
>>I wasn't clear on this on my last review, but the misunderstanding 
>>here is that the completion queue vector somehow matters for 
>>pin-based interrupts - it does not. There is only *one* interrupt 
>>vector and if the controller is not using MSI-X, then the Interrupt 
>>Vector (IV) field of the Create I/O Completion Queue command *must* 
>>be zero.
>>
>>In other words, all that matters is if there are one or more CQEs 
>>posted (in any queue).
>
>Ah, my bad. I got confused by the assert(vector < 32).
>
>>Would this (untested) patch do the trick?
>
>It works for my testcase, but I am not sure this will work when 
>deassert is called when the IRQ is not necessarily asserted, such as 
>in nvme_del_cq.
>

You are right, I think it needs a check on cq->tail != cq->head before 
decrementing cq_pending.

>Jakub
>
>>diff --git i/hw/nvme/nvme.h w/hw/nvme/nvme.h
>>index 93a7e0e5380e..60250b579464 100644
>>--- i/hw/nvme/nvme.h
>>+++ w/hw/nvme/nvme.h
>>@@ -405,6 +405,7 @@ typedef struct NvmeCtrl {
>>      uint32_t    max_q_ents;
>>      uint8_t     outstanding_aers;
>>      uint32_t    irq_status;
>>+    int         cq_pending;
>>      uint64_t    host_timestamp;                 /* Timestamp sent 
>>by the host */
>>      uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
>>      uint64_t    starttime_ms;
>>diff --git i/hw/nvme/ctrl.c w/hw/nvme/ctrl.c
>>index 7dea64b72e6a..4de4b3177463 100644
>>--- i/hw/nvme/ctrl.c
>>+++ w/hw/nvme/ctrl.c
>>@@ -473,6 +473,10 @@ static void nvme_irq_deassert(NvmeCtrl *n, 
>>NvmeCQueue *cq)
>>              return;
>>          } else {
>>              assert(cq->vector < 32);
>>+            if (--(n->cq_pending)) {
>>+                return;
>>+            }
>>+
>>              n->irq_status &= ~(1 << cq->vector);
>>              nvme_irq_check(n);
>>          }
>>@@ -1258,6 +1262,7 @@ static void nvme_post_cqes(void *opaque)
>>      NvmeCQueue *cq = opaque;
>>      NvmeCtrl *n = cq->ctrl;
>>      NvmeRequest *req, *next;
>>+    bool empty = cq->head == cq->tail;
>>      int ret;
>>
>>      QTAILQ_FOREACH_SAFE(req, &cq->req_list, entry, next) {
>>@@ -1287,6 +1292,10 @@ static void nvme_post_cqes(void *opaque)
>>          QTAILQ_INSERT_TAIL(&sq->req_list, req, entry);
>>      }
>>      if (cq->tail != cq->head) {
>>+        if (empty) {
>>+            n->cq_pending++;
>>+        }
>>+
>>          nvme_irq_assert(n, cq);
>>      }
>>  }
>>
>>
>>>Signed-off-by: Jakub Jermar <jakub.jermar@kernkonzept.com>
>>>---
>>>hw/nvme/ctrl.c | 22 ++++++++++++++++------
>>>hw/nvme/nvme.h |  2 ++
>>>2 files changed, 18 insertions(+), 6 deletions(-)
>>>
>>>diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>>>index 0bcaf7192f..97a5d768ee 100644
>>>--- a/hw/nvme/ctrl.c
>>>+++ b/hw/nvme/ctrl.c
>>>@@ -451,9 +451,13 @@ static void nvme_irq_assert(NvmeCtrl *n, 
>>>NvmeCQueue *cq)
>>>            msix_notify(&(n->parent_obj), cq->vector);
>>>        } else {
>>>            trace_pci_nvme_irq_pin();
>>>-            assert(cq->vector < 32);
>>>-            n->irq_status |= 1 << cq->vector;
>>>-            nvme_irq_check(n);
>>>+            if (!cq->irq_asserted) {
>>>+                cq->irq_asserted = true;
>>>+                assert(cq->vector < 32);
>>>+                n->irq_asserted_cnt[cq->vector]++;
>>>+                n->irq_status |= 1 << cq->vector;
>>>+                nvme_irq_check(n);
>>>+            }
>>>        }
>>>    } else {
>>>        trace_pci_nvme_irq_masked();
>>>@@ -466,9 +470,15 @@ static void nvme_irq_deassert(NvmeCtrl *n, 
>>>NvmeCQueue *cq)
>>>        if (msix_enabled(&(n->parent_obj))) {
>>>            return;
>>>        } else {
>>>-            assert(cq->vector < 32);
>>>-            n->irq_status &= ~(1 << cq->vector);
>>>-            nvme_irq_check(n);
>>>+            if (cq->irq_asserted) {
>>>+                cq->irq_asserted = false;
>>>+                assert(cq->vector < 32);
>>>+                assert(n->irq_asserted_cnt[cq->vector]);
>>>+                if (n->irq_asserted_cnt[cq->vector]-- == 1) {
>>>+                    n->irq_status &= ~(1 << cq->vector);
>>>+                }
>>>+                nvme_irq_check(n);
>>>+            }
>>>        }
>>>    }
>>>}
>>>diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
>>>index 81a35cda14..753bf7a923 100644
>>>--- a/hw/nvme/nvme.h
>>>+++ b/hw/nvme/nvme.h
>>>@@ -352,6 +352,7 @@ typedef struct NvmeCQueue {
>>>    uint32_t    head;
>>>    uint32_t    tail;
>>>    uint32_t    vector;
>>>+    bool        irq_asserted;
>>>    uint32_t    size;
>>>    uint64_t    dma_addr;
>>>    QEMUTimer   *timer;
>>>@@ -404,6 +405,7 @@ typedef struct NvmeCtrl {
>>>    uint32_t    max_q_ents;
>>>    uint8_t     outstanding_aers;
>>>    uint32_t    irq_status;
>>>+    uint16_t    irq_asserted_cnt[32];
>>>    uint64_t    host_timestamp;                 /* Timestamp sent 
>>>by the host */
>>>    uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
>>>    uint64_t    starttime_ms;
>>>-- 
>>>2.31.1
>>>
>>>
>>
>
>
diff mbox series

Patch

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 0bcaf7192f..97a5d768ee 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -451,9 +451,13 @@  static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
             msix_notify(&(n->parent_obj), cq->vector);
         } else {
             trace_pci_nvme_irq_pin();
-            assert(cq->vector < 32);
-            n->irq_status |= 1 << cq->vector;
-            nvme_irq_check(n);
+            if (!cq->irq_asserted) {
+                cq->irq_asserted = true;
+                assert(cq->vector < 32);
+                n->irq_asserted_cnt[cq->vector]++;
+                n->irq_status |= 1 << cq->vector;
+                nvme_irq_check(n);
+            }
         }
     } else {
         trace_pci_nvme_irq_masked();
@@ -466,9 +470,15 @@  static void nvme_irq_deassert(NvmeCtrl *n, NvmeCQueue *cq)
         if (msix_enabled(&(n->parent_obj))) {
             return;
         } else {
-            assert(cq->vector < 32);
-            n->irq_status &= ~(1 << cq->vector);
-            nvme_irq_check(n);
+            if (cq->irq_asserted) {
+                cq->irq_asserted = false;
+                assert(cq->vector < 32);
+                assert(n->irq_asserted_cnt[cq->vector]);
+                if (n->irq_asserted_cnt[cq->vector]-- == 1) {
+                    n->irq_status &= ~(1 << cq->vector);
+                }
+                nvme_irq_check(n);
+            }
         }
     }
 }
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 81a35cda14..753bf7a923 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -352,6 +352,7 @@  typedef struct NvmeCQueue {
     uint32_t    head;
     uint32_t    tail;
     uint32_t    vector;
+    bool        irq_asserted;
     uint32_t    size;
     uint64_t    dma_addr;
     QEMUTimer   *timer;
@@ -404,6 +405,7 @@  typedef struct NvmeCtrl {
     uint32_t    max_q_ents;
     uint8_t     outstanding_aers;
     uint32_t    irq_status;
+    uint16_t    irq_asserted_cnt[32];
     uint64_t    host_timestamp;                 /* Timestamp sent by the host */
     uint64_t    timestamp_set_qemu_clock_ms;    /* QEMU clock time */
     uint64_t    starttime_ms;