Message ID | 20231117174337.20174-3-quic_jhugo@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Convert to persistent DRM devices | expand |
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com> On 17.11.2023 18:43, Jeffrey Hugo wrote: > From: Carl Vanderlip <quic_carlv@quicinc.com> > > Currently the QAIC DRM device registers itself when the MHI QAIC_CONTROL > channel becomes available. This is when the device is able to process > workloads. However, the DRM driver also provides the debugfs interface > bootlog for the device. If the device fails to boot to the QSM (which > brings up the MHI QAIC_CONTROL channel), the bootlog won't be available for > debugging why it failed to boot. > > Change when the DRM device registers itself from when QAIC_CONTROL is > available to when the card is first probed on the PCI bus. Additionally, > make the DRM driver persist through reset/error cases so the driver > doesn't have to be reloaded to access the card again. Send > KOBJ_ONLINE/OFFLINE uevents so userspace can know when DRM device is > ready to handle requests. > > Signed-off-by: Carl Vanderlip <quic_carlv@quicinc.com> > Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com> > Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> > Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com> > --- > Documentation/accel/qaic/qaic.rst | 9 +++++- > drivers/accel/qaic/mhi_controller.c | 2 +- > drivers/accel/qaic/qaic.h | 2 +- > drivers/accel/qaic/qaic_drv.c | 44 +++++++++++------------------ > 4 files changed, 27 insertions(+), 30 deletions(-) > > diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst > index f81020736ebf..efb7771273bb 100644 > --- a/Documentation/accel/qaic/qaic.rst > +++ b/Documentation/accel/qaic/qaic.rst > @@ -93,8 +93,15 @@ commands (does not impact QAIC). > uAPI > ==== > > +QAIC creates an accel device per phsyical PCIe device. This accel device exists > +for as long as the PCIe device is known to Linux. > + > +The PCIe device may not be in the state to accept requests from userspace at > +all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the > +device can accept requests (ONLINE) and when the device is no longer accepting > +requests (OFFLINE) because of a reset or other state transition. > + > QAIC defines a number of driver specific IOCTLs as part of the userspace API. > -This section describes those APIs. > > DRM_IOCTL_QAIC_MANAGE > This IOCTL allows userspace to send a NNC request to the QSM. The call will > diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c > index 5d3cc30009cc..832464f2833a 100644 > --- a/drivers/accel/qaic/mhi_controller.c > +++ b/drivers/accel/qaic/mhi_controller.c > @@ -469,7 +469,7 @@ static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback re > pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n"); > /* this event occurs in non-atomic context */ > if (reason == MHI_CB_SYS_ERROR) > - qaic_dev_reset_clean_local_state(qdev, true); > + qaic_dev_reset_clean_local_state(qdev); > } > > static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl) > diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h > index bd0c884e6bf7..66f4abf6c4c4 100644 > --- a/drivers/accel/qaic/qaic.h > +++ b/drivers/accel/qaic/qaic.h > @@ -283,7 +283,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id); > void release_dbc(struct qaic_device *qdev, u32 dbc_id); > > void wake_all_cntl(struct qaic_device *qdev); > -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset); > +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev); > > struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); > > diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c > index 02fe23248da4..c19bc83b249c 100644 > --- a/drivers/accel/qaic/qaic_drv.c > +++ b/drivers/accel/qaic/qaic_drv.c > @@ -8,6 +8,7 @@ > #include <linux/idr.h> > #include <linux/interrupt.h> > #include <linux/list.h> > +#include <linux/kobject.h> > #include <linux/kref.h> > #include <linux/mhi.h> > #include <linux/module.h> > @@ -43,9 +44,6 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode"); > static bool link_up; > static DEFINE_IDA(qaic_usrs); > > -static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id); > -static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id); > - > static void free_usr(struct kref *kref) > { > struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count); > @@ -183,13 +181,6 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) > > qddev->partition_id = partition_id; > > - /* > - * drm_dev_unregister() sets the driver data to NULL and > - * drm_dev_register() does not update the driver data. During a SOC > - * reset drm dev is unregistered and registered again leaving the > - * driver data to NULL. > - */ > - dev_set_drvdata(to_accel_kdev(qddev), drm->accel); > ret = drm_dev_register(drm, 0); > if (ret) > pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret); > @@ -203,7 +194,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) > struct drm_device *drm = to_drm(qddev); > struct qaic_user *usr; > > - drm_dev_get(drm); > drm_dev_unregister(drm); > qddev->partition_id = 0; > /* > @@ -232,7 +222,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) > mutex_lock(&qddev->users_mutex); > } > mutex_unlock(&qddev->users_mutex); > - drm_dev_put(drm); > } > > static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) > @@ -254,8 +243,6 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id > > qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); > > - qdev->reset_state = QAIC_ONLINE; > - > dev_set_drvdata(&mhi_dev->dev, qdev); > qdev->cntl_ch = mhi_dev; > > @@ -265,6 +252,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id > return ret; > } > > + qdev->reset_state = QAIC_BOOT; > ret = get_cntl_version(qdev, NULL, &major, &minor); > if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) { > pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n", > @@ -272,8 +260,8 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id > ret = -EINVAL; > goto close_control; > } > - > - ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); > + qdev->reset_state = QAIC_ONLINE; > + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE); > > return ret; > > @@ -291,6 +279,7 @@ static void qaic_notify_reset(struct qaic_device *qdev) > { > int i; > > + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE); > qdev->reset_state = QAIC_OFFLINE; > /* wake up any waiters to avoid waiting for timeouts at sync */ > wake_all_cntl(qdev); > @@ -299,21 +288,15 @@ static void qaic_notify_reset(struct qaic_device *qdev) > synchronize_srcu(&qdev->dev_lock); > } > > -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset) > +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev) > { > int i; > > qaic_notify_reset(qdev); > > - /* remove drmdevs to prevent new users from coming in */ > - qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); > - > /* start tearing things down */ > for (i = 0; i < qdev->num_dbc; ++i) > release_dbc(qdev, i); > - > - if (exit_reset) > - qdev->reset_state = QAIC_ONLINE; > } > > static void cleanup_qdev(struct qaic_device *qdev) > @@ -338,6 +321,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de > if (!qdev) > return NULL; > > + qdev->reset_state = QAIC_OFFLINE; > if (id->device == PCI_DEV_AIC100) { > qdev->num_dbc = 16; > qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); > @@ -499,15 +483,21 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) > goto cleanup_qdev; > } > > + ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); > + if (ret) > + goto cleanup_qdev; > + > qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq, > qdev->single_msi); > if (IS_ERR(qdev->mhi_cntrl)) { > ret = PTR_ERR(qdev->mhi_cntrl); > - goto cleanup_qdev; > + goto cleanup_drm_dev; > } > > return 0; > > +cleanup_drm_dev: > + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); > cleanup_qdev: > cleanup_qdev(qdev); > return ret; > @@ -520,7 +510,8 @@ static void qaic_pci_remove(struct pci_dev *pdev) > if (!qdev) > return; > > - qaic_dev_reset_clean_local_state(qdev, false); > + qaic_dev_reset_clean_local_state(qdev); > + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); > qaic_mhi_free_controller(qdev->mhi_cntrl, link_up); > cleanup_qdev(qdev); > } > @@ -543,14 +534,13 @@ static void qaic_pci_reset_prepare(struct pci_dev *pdev) > > qaic_notify_reset(qdev); > qaic_mhi_start_reset(qdev->mhi_cntrl); > - qaic_dev_reset_clean_local_state(qdev, false); > + qaic_dev_reset_clean_local_state(qdev); > } > > static void qaic_pci_reset_done(struct pci_dev *pdev) > { > struct qaic_device *qdev = pci_get_drvdata(pdev); > > - qdev->reset_state = QAIC_ONLINE; > qaic_mhi_reset_done(qdev->mhi_cntrl); > } >
diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst index f81020736ebf..efb7771273bb 100644 --- a/Documentation/accel/qaic/qaic.rst +++ b/Documentation/accel/qaic/qaic.rst @@ -93,8 +93,15 @@ commands (does not impact QAIC). uAPI ==== +QAIC creates an accel device per phsyical PCIe device. This accel device exists +for as long as the PCIe device is known to Linux. + +The PCIe device may not be in the state to accept requests from userspace at +all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the +device can accept requests (ONLINE) and when the device is no longer accepting +requests (OFFLINE) because of a reset or other state transition. + QAIC defines a number of driver specific IOCTLs as part of the userspace API. -This section describes those APIs. DRM_IOCTL_QAIC_MANAGE This IOCTL allows userspace to send a NNC request to the QSM. The call will diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c index 5d3cc30009cc..832464f2833a 100644 --- a/drivers/accel/qaic/mhi_controller.c +++ b/drivers/accel/qaic/mhi_controller.c @@ -469,7 +469,7 @@ static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback re pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n"); /* this event occurs in non-atomic context */ if (reason == MHI_CB_SYS_ERROR) - qaic_dev_reset_clean_local_state(qdev, true); + qaic_dev_reset_clean_local_state(qdev); } static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl) diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h index bd0c884e6bf7..66f4abf6c4c4 100644 --- a/drivers/accel/qaic/qaic.h +++ b/drivers/accel/qaic/qaic.h @@ -283,7 +283,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id); void release_dbc(struct qaic_device *qdev, u32 dbc_id); void wake_all_cntl(struct qaic_device *qdev); -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset); +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev); struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c index 02fe23248da4..c19bc83b249c 100644 --- a/drivers/accel/qaic/qaic_drv.c +++ b/drivers/accel/qaic/qaic_drv.c @@ -8,6 +8,7 @@ #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/list.h> +#include <linux/kobject.h> #include <linux/kref.h> #include <linux/mhi.h> #include <linux/module.h> @@ -43,9 +44,6 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode"); static bool link_up; static DEFINE_IDA(qaic_usrs); -static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id); -static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id); - static void free_usr(struct kref *kref) { struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count); @@ -183,13 +181,6 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id) qddev->partition_id = partition_id; - /* - * drm_dev_unregister() sets the driver data to NULL and - * drm_dev_register() does not update the driver data. During a SOC - * reset drm dev is unregistered and registered again leaving the - * driver data to NULL. - */ - dev_set_drvdata(to_accel_kdev(qddev), drm->accel); ret = drm_dev_register(drm, 0); if (ret) pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret); @@ -203,7 +194,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) struct drm_device *drm = to_drm(qddev); struct qaic_user *usr; - drm_dev_get(drm); drm_dev_unregister(drm); qddev->partition_id = 0; /* @@ -232,7 +222,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id) mutex_lock(&qddev->users_mutex); } mutex_unlock(&qddev->users_mutex); - drm_dev_put(drm); } static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) @@ -254,8 +243,6 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); - qdev->reset_state = QAIC_ONLINE; - dev_set_drvdata(&mhi_dev->dev, qdev); qdev->cntl_ch = mhi_dev; @@ -265,6 +252,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id return ret; } + qdev->reset_state = QAIC_BOOT; ret = get_cntl_version(qdev, NULL, &major, &minor); if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) { pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n", @@ -272,8 +260,8 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id ret = -EINVAL; goto close_control; } - - ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); + qdev->reset_state = QAIC_ONLINE; + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE); return ret; @@ -291,6 +279,7 @@ static void qaic_notify_reset(struct qaic_device *qdev) { int i; + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE); qdev->reset_state = QAIC_OFFLINE; /* wake up any waiters to avoid waiting for timeouts at sync */ wake_all_cntl(qdev); @@ -299,21 +288,15 @@ static void qaic_notify_reset(struct qaic_device *qdev) synchronize_srcu(&qdev->dev_lock); } -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset) +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev) { int i; qaic_notify_reset(qdev); - /* remove drmdevs to prevent new users from coming in */ - qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); - /* start tearing things down */ for (i = 0; i < qdev->num_dbc; ++i) release_dbc(qdev, i); - - if (exit_reset) - qdev->reset_state = QAIC_ONLINE; } static void cleanup_qdev(struct qaic_device *qdev) @@ -338,6 +321,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de if (!qdev) return NULL; + qdev->reset_state = QAIC_OFFLINE; if (id->device == PCI_DEV_AIC100) { qdev->num_dbc = 16; qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL); @@ -499,15 +483,21 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto cleanup_qdev; } + ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION); + if (ret) + goto cleanup_qdev; + qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq, qdev->single_msi); if (IS_ERR(qdev->mhi_cntrl)) { ret = PTR_ERR(qdev->mhi_cntrl); - goto cleanup_qdev; + goto cleanup_drm_dev; } return 0; +cleanup_drm_dev: + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); cleanup_qdev: cleanup_qdev(qdev); return ret; @@ -520,7 +510,8 @@ static void qaic_pci_remove(struct pci_dev *pdev) if (!qdev) return; - qaic_dev_reset_clean_local_state(qdev, false); + qaic_dev_reset_clean_local_state(qdev); + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION); qaic_mhi_free_controller(qdev->mhi_cntrl, link_up); cleanup_qdev(qdev); } @@ -543,14 +534,13 @@ static void qaic_pci_reset_prepare(struct pci_dev *pdev) qaic_notify_reset(qdev); qaic_mhi_start_reset(qdev->mhi_cntrl); - qaic_dev_reset_clean_local_state(qdev, false); + qaic_dev_reset_clean_local_state(qdev); } static void qaic_pci_reset_done(struct pci_dev *pdev) { struct qaic_device *qdev = pci_get_drvdata(pdev); - qdev->reset_state = QAIC_ONLINE; qaic_mhi_reset_done(qdev->mhi_cntrl); }