Message ID | 20230404145557.2356894-2-sean.anderson@seco.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3,1/2] soc: fsl: qbman: Always disable interrupts when taking cgr_lock | expand |
On Tue, 2023-04-04 at 10:55 -0400, Sean Anderson wrote: > @@ -1456,11 +1456,11 @@ static void qm_congestion_task(struct work_struct > *work) > union qm_mc_result *mcr; > struct qman_cgr *cgr; > > - spin_lock_irq(&p->cgr_lock); > + raw_spin_lock_irq(&p->cgr_lock); > qm_mc_start(&p->p); > qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); > if (!qm_mc_result_timeout(&p->p, &mcr)) { > - spin_unlock_irq(&p->cgr_lock); > + raw_spin_unlock_irq(&p->cgr_lock); qm_mc_result_timeout() spins with a timeout of 10 ms which is very inappropriate for a raw lock. What is the actual expected upper bound? > dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); > qman_p_irqsource_add(p, QM_PIRQ_CSCI); > return; > @@ -1476,7 +1476,7 @@ static void qm_congestion_task(struct work_struct > *work) > list_for_each_entry(cgr, &p->cgr_cbs, node) > if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) > cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); > - spin_unlock_irq(&p->cgr_lock); > + raw_spin_unlock_irq(&p->cgr_lock); > qman_p_irqsource_add(p, QM_PIRQ_CSCI); > } The callback loop is also a bit concerning... -Crystal
On 4/4/23 11:33, Crystal Wood wrote: > On Tue, 2023-04-04 at 10:55 -0400, Sean Anderson wrote: > >> @@ -1456,11 +1456,11 @@ static void tqm_congestion_task(struct work_struct >> *work) >> union qm_mc_result *mcr; >> struct qman_cgr *cgr; >> >> - spin_lock_irq(&p->cgr_lock); >> + raw_spin_lock_irq(&p->cgr_lock); >> qm_mc_start(&p->p); >> qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); >> if (!qm_mc_result_timeout(&p->p, &mcr)) { >> - spin_unlock_irq(&p->cgr_lock); >> + raw_spin_unlock_irq(&p->cgr_lock); > > qm_mc_result_timeout() spins with a timeout of 10 ms which is very > inappropriate for a raw lock. What is the actual expected upper bound? Hm, maybe we can move this qm_mc stuff outside cgr_lock? In most other places they're called without cgr_lock, which implies that its usage here is meant to synchronize against some other function. >> dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); >> qman_p_irqsource_add(p, QM_PIRQ_CSCI); >> return; >> @@ -1476,7 +1476,7 @@ static void qm_congestion_task(struct work_struct >> *work) >> list_for_each_entry(cgr, &p->cgr_cbs, node) >> if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) >> cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); >> - spin_unlock_irq(&p->cgr_lock); >> + raw_spin_unlock_irq(&p->cgr_lock); >> qman_p_irqsource_add(p, QM_PIRQ_CSCI); >> } > > The callback loop is also a bit concerning... The callbacks (in .../dpaa/dpaa_eth.c and .../caam/qi.c) look OK. The only thing which might take a bit is dpaa_eth_refill_bpools, which allocates memory (from the atomic pool). --Sean
Hi Crystal, On 4/4/23 12:04, Sean Anderson wrote: > On 4/4/23 11:33, Crystal Wood wrote: >> On Tue, 2023-04-04 at 10:55 -0400, Sean Anderson wrote: >> >>> @@ -1456,11 +1456,11 @@ static void tqm_congestion_task(struct work_struct >>> *work) >>> union qm_mc_result *mcr; >>> struct qman_cgr *cgr; >>> >>> - spin_lock_irq(&p->cgr_lock); >>> + raw_spin_lock_irq(&p->cgr_lock); >>> qm_mc_start(&p->p); >>> qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); >>> if (!qm_mc_result_timeout(&p->p, &mcr)) { >>> - spin_unlock_irq(&p->cgr_lock); >>> + raw_spin_unlock_irq(&p->cgr_lock); >> >> qm_mc_result_timeout() spins with a timeout of 10 ms which is very >> inappropriate for a raw lock. What is the actual expected upper bound? > > Hm, maybe we can move this qm_mc stuff outside cgr_lock? In most other > places they're called without cgr_lock, which implies that its usage > here is meant to synchronize against some other function. Do you have any suggestions here? I think this should really be handled in a follow-up patch. If you think this code is waiting too long in a raw spinlock, the existing code can wait just as long with IRQs disabled. This patch doesn't change existing system responsiveness. --Sean >>> dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); >>> qman_p_irqsource_add(p, QM_PIRQ_CSCI); >>> return; >>> @@ -1476,7 +1476,7 @@ static void qm_congestion_task(struct work_struct >>> *work) >>> list_for_each_entry(cgr, &p->cgr_cbs, node) >>> if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) >>> cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); >>> - spin_unlock_irq(&p->cgr_lock); >>> + raw_spin_unlock_irq(&p->cgr_lock); >>> qman_p_irqsource_add(p, QM_PIRQ_CSCI); >>> } >> >> The callback loop is also a bit concerning... > > The callbacks (in .../dpaa/dpaa_eth.c and .../caam/qi.c) look OK. The > only thing which might take a bit is dpaa_eth_refill_bpools, which > allocates memory (from the atomic pool). > > --Sean
On Tue, 2023-04-11 at 11:09 -0400, Sean Anderson wrote: > Hi Crystal, > > On 4/4/23 12:04, Sean Anderson wrote: > > On 4/4/23 11:33, Crystal Wood wrote: > > > On Tue, 2023-04-04 at 10:55 -0400, Sean Anderson wrote: > > > > > > > @@ -1456,11 +1456,11 @@ static void tqm_congestion_task(struct > > > > work_struct > > > > *work) > > > > union qm_mc_result *mcr; > > > > struct qman_cgr *cgr; > > > > > > > > - spin_lock_irq(&p->cgr_lock); > > > > + raw_spin_lock_irq(&p->cgr_lock); > > > > qm_mc_start(&p->p); > > > > qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); > > > > if (!qm_mc_result_timeout(&p->p, &mcr)) { > > > > - spin_unlock_irq(&p->cgr_lock); > > > > + raw_spin_unlock_irq(&p->cgr_lock); > > > > > > qm_mc_result_timeout() spins with a timeout of 10 ms which is very > > > inappropriate for a raw lock. What is the actual expected upper bound? > > > > Hm, maybe we can move this qm_mc stuff outside cgr_lock? In most other > > places they're called without cgr_lock, which implies that its usage > > here is meant to synchronize against some other function. > > Do you have any suggestions here? I think this should really be handled > in a follow-up patch. If you think this code is waiting too long in a raw > spinlock, the existing code can wait just as long with IRQs disabled. > This patch doesn't change existing system responsiveness. Well, AFAICT it expands the situations in which it happens from configuration codepaths to stuff like congestion handling. The proper fix would probably be to use some mechanism other than smp_call_function_single() to run code on the target cpu so that it can run with irqs enabled (or get confirmation that the actual worst case is short enough), but barring that I guess at least acknowledge the situation in a comment? -Crystal
On 4/18/23 02:29, Crystal Wood wrote: > On Tue, 2023-04-11 at 11:09 -0400, Sean Anderson wrote: >> Hi Crystal, >> >> On 4/4/23 12:04, Sean Anderson wrote: >> > On 4/4/23 11:33, Crystal Wood wrote: >> > > On Tue, 2023-04-04 at 10:55 -0400, Sean Anderson wrote: >> > > >> > > > @@ -1456,11 +1456,11 @@ static void tqm_congestion_task(struct >> > > > work_struct >> > > > *work) >> > > > union qm_mc_result *mcr; >> > > > struct qman_cgr *cgr; >> > > > >> > > > - spin_lock_irq(&p->cgr_lock); >> > > > + raw_spin_lock_irq(&p->cgr_lock); >> > > > qm_mc_start(&p->p); >> > > > qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); >> > > > if (!qm_mc_result_timeout(&p->p, &mcr)) { >> > > > - spin_unlock_irq(&p->cgr_lock); >> > > > + raw_spin_unlock_irq(&p->cgr_lock); >> > > >> > > qm_mc_result_timeout() spins with a timeout of 10 ms which is very >> > > inappropriate for a raw lock. What is the actual expected upper bound? >> > >> > Hm, maybe we can move this qm_mc stuff outside cgr_lock? In most other >> > places they're called without cgr_lock, which implies that its usage >> > here is meant to synchronize against some other function. >> >> Do you have any suggestions here? I think this should really be handled >> in a follow-up patch. If you think this code is waiting too long in a raw >> spinlock, the existing code can wait just as long with IRQs disabled. >> This patch doesn't change existing system responsiveness. > > Well, AFAICT it expands the situations in which it happens from configuration > codepaths to stuff like congestion handling. The proper fix would probably be > to use some mechanism other than smp_call_function_single() to run code on the > target cpu so that it can run with irqs enabled (or get confirmation that the > actual worst case is short enough), Well, we used to use a kthread/wait_for_completion before 96f413f47677 ("soc/fsl/qbman: fix issue in qman_delete_cgr_safe()"). The commit description there isn't very enlightening as to the actual bug, which is why I CC's Madalin earlier. To be honest, I'm not really familiar with the other options in the kernel. > but barring that I guess at least acknowledge the situation in a > comment? Fine by me. --Sean
diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 1bf1f1ea67f0..7a1558aba523 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -991,7 +991,7 @@ struct qman_portal { /* linked-list of CSCN handlers. */ struct list_head cgr_cbs; /* list lock */ - spinlock_t cgr_lock; + raw_spinlock_t cgr_lock; struct work_struct congestion_work; struct work_struct mr_work; char irqname[MAX_IRQNAME]; @@ -1281,7 +1281,7 @@ static int qman_create_portal(struct qman_portal *portal, /* if the given mask is NULL, assume all CGRs can be seen */ qman_cgrs_fill(&portal->cgrs[0]); INIT_LIST_HEAD(&portal->cgr_cbs); - spin_lock_init(&portal->cgr_lock); + raw_spin_lock_init(&portal->cgr_lock); INIT_WORK(&portal->congestion_work, qm_congestion_task); INIT_WORK(&portal->mr_work, qm_mr_process_task); portal->bits = 0; @@ -1456,11 +1456,11 @@ static void qm_congestion_task(struct work_struct *work) union qm_mc_result *mcr; struct qman_cgr *cgr; - spin_lock_irq(&p->cgr_lock); + raw_spin_lock_irq(&p->cgr_lock); qm_mc_start(&p->p); qm_mc_commit(&p->p, QM_MCC_VERB_QUERYCONGESTION); if (!qm_mc_result_timeout(&p->p, &mcr)) { - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); dev_crit(p->config->dev, "QUERYCONGESTION timeout\n"); qman_p_irqsource_add(p, QM_PIRQ_CSCI); return; @@ -1476,7 +1476,7 @@ static void qm_congestion_task(struct work_struct *work) list_for_each_entry(cgr, &p->cgr_cbs, node) if (cgr->cb && qman_cgrs_get(&c, cgr->cgrid)) cgr->cb(p, cgr, qman_cgrs_get(&rr, cgr->cgrid)); - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); qman_p_irqsource_add(p, QM_PIRQ_CSCI); } @@ -2440,7 +2440,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, preempt_enable(); cgr->chan = p->config->channel; - spin_lock_irq(&p->cgr_lock); + raw_spin_lock_irq(&p->cgr_lock); if (opts) { struct qm_mcc_initcgr local_opts = *opts; @@ -2477,7 +2477,7 @@ int qman_create_cgr(struct qman_cgr *cgr, u32 flags, qman_cgrs_get(&p->cgrs[1], cgr->cgrid)) cgr->cb(p, cgr, 1); out: - spin_unlock_irq(&p->cgr_lock); + raw_spin_unlock_irq(&p->cgr_lock); put_affine_portal(); return ret; } @@ -2512,7 +2512,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) return -EINVAL; memset(&local_opts, 0, sizeof(struct qm_mcc_initcgr)); - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); list_del(&cgr->node); /* * If there are no other CGR objects for this CGRID in the list, @@ -2537,7 +2537,7 @@ int qman_delete_cgr(struct qman_cgr *cgr) /* add back to the list */ list_add(&cgr->node, &p->cgr_cbs); release_lock: - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; } @@ -2577,9 +2577,9 @@ static int qman_update_cgr(struct qman_cgr *cgr, struct qm_mcc_initcgr *opts) if (!p) return -EINVAL; - spin_lock_irqsave(&p->cgr_lock, irqflags); + raw_spin_lock_irqsave(&p->cgr_lock, irqflags); ret = qm_modify_cgr(cgr, 0, opts); - spin_unlock_irqrestore(&p->cgr_lock, irqflags); + raw_spin_unlock_irqrestore(&p->cgr_lock, irqflags); put_affine_portal(); return ret; }