diff mbox

[v3,3/6] drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex

Message ID 1516766952-8231-3-git-send-email-sagar.a.kamble@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sagar.a.kamble@intel.com Jan. 24, 2018, 4:09 a.m. UTC
This patch fixes lockdep issue due to circular locking dependency of
struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex.
For GuC log relay channel we create debugfs file that requires i_mutex_key
lock and we are doing that under struct_mutex. So we introduced newer
dependency as:
    &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
However, there is dependency from mmap_sem to struct_mutex. Hence we
separate the relay create/destroy operation from under struct_mutex.

Comments

Chris Wilson Jan. 24, 2018, 10:18 a.m. UTC | #1
Quoting Sagar Arun Kamble (2018-01-24 04:09:09)
> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
> index 8f2da30..35de889 100644
> --- a/drivers/gpu/drm/i915/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
> @@ -153,6 +153,8 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
>         if (!i915_modparams.guc_log_level)
>                 return 0;
>  
> +       mutex_lock(&guc->log.runtime.relay_lock);
> +
>         /* For now create the log file in /sys/kernel/debug/dri/0 dir */
>         log_dir = dev_priv->drm.primary->debugfs_root;
>  
> @@ -169,16 +171,26 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
>          */
>         if (!log_dir) {
>                 DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
> -               return -ENODEV;
> +               ret = -ENODEV;
> +               goto out_unlock;
>         }
>  
>         ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
>         if (ret < 0 && ret != -EEXIST) {
>                 DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
> -               return ret;
> +               goto out_unlock;
>         }
>  
> -       return 0;
> +out_unlock:
> +       mutex_unlock(&guc->log.runtime.relay_lock);
> +       return ret;
> +}
> +
> +static bool guc_log_has_relay(struct intel_guc *guc)
> +{
> +       lockdep_assert_held(&guc->log.runtime.relay_lock);
> +
> +       return guc->log.runtime.relay_chan != NULL;
>  }
>  
>  static void guc_move_to_next_buf(struct intel_guc *guc)
> @@ -188,6 +200,9 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
>          */
>         smp_wmb();
>  
> +       if (!guc_log_has_relay(guc))
> +               return;
> +
>         /* All data has been written, so now move the offset of sub buffer. */
>         relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);
>  
> @@ -197,7 +212,7 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
>  
>  static void *guc_get_write_buffer(struct intel_guc *guc)
>  {
> -       if (!guc->log.runtime.relay_chan)
> +       if (!guc_log_has_relay(guc))
>                 return NULL;
>  
>         /* Just get the base address of a new sub buffer and copy data into it
> @@ -265,6 +280,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>         /* Get the pointer to shared GuC log buffer */
>         log_buf_state = src_data = guc->log.runtime.buf_addr;
>  
> +       mutex_lock(&guc->log.runtime.relay_lock);
> +
>         /* Get the pointer to local buffer to store the logs */
>         log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);

Hmm. The locking here tells me that we are being careful in case the
relay_chan disappears, but we don't handle the NULL pointer here.
 
> @@ -344,6 +361,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>                 DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
>                 guc->log.capture_miss_count++;
>         }
> +
> +       mutex_unlock(&guc->log.runtime.relay_lock);
>  }
>  
>  static void capture_logs_work(struct work_struct *work)
> @@ -363,8 +382,6 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>  {
>         struct drm_i915_private *dev_priv = guc_to_i915(guc);
>         void *vaddr;
> -       struct rchan *guc_log_relay_chan;
> -       size_t n_subbufs, subbuf_size;
>         int ret;
>  
>         lockdep_assert_held(&dev_priv->drm.struct_mutex);
> @@ -387,8 +404,44 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>  
>         guc->log.runtime.buf_addr = vaddr;
>  
> +       return 0;
> +}
> +
> +static void guc_log_runtime_destroy(struct intel_guc *guc)
> +{
> +       /*
> +        * It's possible that the runtime stuff was never allocated because
> +        * GuC log was disabled at the boot time.
> +        **/
> +       if (!guc_log_has_runtime(guc))
> +               return;
> +
> +       i915_gem_object_unpin_map(guc->log.vma->obj);
> +       guc->log.runtime.buf_addr = NULL;
> +}
> +
> +void intel_guc_log_init_early(struct intel_guc *guc)
> +{
> +       mutex_init(&guc->log.runtime.relay_lock);
> +       INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
> +}
> +
> +int intel_guc_log_relay_create(struct intel_guc *guc)
> +{
> +       struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +       struct rchan *guc_log_relay_chan;
> +       size_t n_subbufs, subbuf_size;
> +       int ret;
> +
> +       if (!i915_modparams.guc_log_level)
> +               return 0;
> +
> +       mutex_lock(&guc->log.runtime.relay_lock);
> +
> +       GEM_BUG_ON(guc_log_has_relay(guc));
> +
>          /* Keep the size of sub buffers same as shared log buffer */
> -       subbuf_size = guc->log.vma->obj->base.size;
> +       subbuf_size = GUC_LOG_SIZE;
>  
>         /* Store up to 8 snapshots, which is large enough to buffer sufficient
>          * boot time logs and provides enough leeway to User, in terms of
> @@ -407,33 +460,39 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>                 DRM_ERROR("Couldn't create relay chan for GuC logging\n");
>  
>                 ret = -ENOMEM;
> -               goto err_vaddr;
> +               goto err;
>         }
>  
>         GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
>         guc->log.runtime.relay_chan = guc_log_relay_chan;
>  
> -       INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
> +       mutex_unlock(&guc->log.runtime.relay_lock);
> +
>         return 0;
>  
> -err_vaddr:
> -       i915_gem_object_unpin_map(guc->log.vma->obj);
> -       guc->log.runtime.buf_addr = NULL;
> +err:
> +       mutex_unlock(&guc->log.runtime.relay_lock);
> +       /* logging will be off */
> +       i915_modparams.guc_log_level = 0;
>         return ret;
>  }
>  
> -static void guc_log_runtime_destroy(struct intel_guc *guc)
> +void intel_guc_log_relay_destroy(struct intel_guc *guc)
>  {
> +       mutex_lock(&guc->log.runtime.relay_lock);
> +
>         /*
> -        * It's possible that the runtime stuff was never allocated because
> +        * It's possible that the relay was never allocated because
>          * GuC log was disabled at the boot time.
>          */
> -       if (!guc_log_has_runtime(guc))
> -               return;
> +       if (!guc_log_has_relay(guc))
> +               goto out_unlock;
>  
>         relay_close(guc->log.runtime.relay_chan);
> -       i915_gem_object_unpin_map(guc->log.vma->obj);
> -       guc->log.runtime.buf_addr = NULL;
> +       guc->log.runtime.relay_chan = NULL;
> +
> +out_unlock:
> +       mutex_unlock(&guc->log.runtime.relay_lock);
>  }
>  
>  static int guc_log_late_setup(struct intel_guc *guc)
> @@ -441,17 +500,24 @@ static int guc_log_late_setup(struct intel_guc *guc)
>         struct drm_i915_private *dev_priv = guc_to_i915(guc);
>         int ret;
>  
> -       lockdep_assert_held(&dev_priv->drm.struct_mutex);
> -
>         if (!guc_log_has_runtime(guc)) {
>                 /*
>                  * If log was disabled at boot time, then setup needed to handle
>                  * log buffer flush interrupts would not have been done yet, so
>                  * do that now.
>                  */
> -               ret = guc_log_runtime_create(guc);
> +               ret = intel_guc_log_relay_create(guc);
>                 if (ret)
>                         goto err;
> +
> +               mutex_lock(&dev_priv->drm.struct_mutex);
> +               intel_runtime_pm_get(dev_priv);
> +               ret = guc_log_runtime_create(guc);
> +               intel_runtime_pm_put(dev_priv);
> +               mutex_unlock(&dev_priv->drm.struct_mutex);
> +
> +               if (ret)
> +                       goto err_relay;
>         }
>  
>         ret = guc_log_relay_file_create(guc);
> @@ -461,7 +527,11 @@ static int guc_log_late_setup(struct intel_guc *guc)
>         return 0;
>  
>  err_runtime:
> +       mutex_lock(&dev_priv->drm.struct_mutex);
>         guc_log_runtime_destroy(guc);
> +       mutex_unlock(&dev_priv->drm.struct_mutex);
> +err_relay:
> +       intel_guc_log_relay_destroy(guc);
>  err:
>         /* logging will remain off */
>         i915_modparams.guc_log_level = 0;
> @@ -490,7 +560,11 @@ static void guc_flush_logs(struct intel_guc *guc)
>                 return;
>  
>         /* First disable the interrupts, will be renabled afterwards */
> +       mutex_lock(&dev_priv->drm.struct_mutex);
> +       intel_runtime_pm_get(dev_priv);
>         gen9_disable_guc_interrupts(dev_priv);
> +       intel_runtime_pm_put(dev_priv);
> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>  
>         /* Before initiating the forceful flush, wait for any pending/ongoing
>          * flush to complete otherwise forceful flush may not actually happen.
> @@ -498,7 +572,9 @@ static void guc_flush_logs(struct intel_guc *guc)
>         flush_work(&guc->log.runtime.flush_work);
>  
>         /* Ask GuC to update the log buffer state */
> +       intel_runtime_pm_get(dev_priv);
>         guc_log_flush(guc);
> +       intel_runtime_pm_put(dev_priv);
>  
>         /* GuC would have updated log buffer by now, so capture it */
>         guc_log_capture_logs(guc);
> @@ -509,17 +585,10 @@ int intel_guc_log_create(struct intel_guc *guc)
>         struct i915_vma *vma;
>         unsigned long offset;
>         u32 flags;
> -       u32 size;
>         int ret;
>  
>         GEM_BUG_ON(guc->log.vma);
>  
> -       /* The first page is to save log buffer state. Allocate one
> -        * extra page for others in case for overlap */
> -       size = (1 + GUC_LOG_DPC_PAGES + 1 +
> -               GUC_LOG_ISR_PAGES + 1 +
> -               GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
> -
>         /* We require SSE 4.1 for fast reads from the GuC log buffer and
>          * it should be present on the chipsets supporting GuC based
>          * submisssions.
> @@ -529,7 +598,7 @@ int intel_guc_log_create(struct intel_guc *guc)
>                 goto err;
>         }
>  
> -       vma = intel_guc_allocate_vma(guc, size);
> +       vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
>         if (IS_ERR(vma)) {
>                 ret = PTR_ERR(vma);
>                 goto err;
> @@ -584,7 +653,15 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
>                 return 0;
>  
>         verbosity = enable_logging ? control_val - 1 : 0;
> +
> +       ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
> +       if (ret)
> +               return ret;
> +       intel_runtime_pm_get(dev_priv);
>         ret = guc_log_control(guc, enable_logging, verbosity);
> +       intel_runtime_pm_put(dev_priv);
> +       mutex_unlock(&dev_priv->drm.struct_mutex);
> +
>         if (ret < 0) {
>                 DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
>                 return ret;
> @@ -605,7 +682,11 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
>                 }
>  
>                 /* GuC logging is currently the only user of Guc2Host interrupts */
> +               mutex_lock(&dev_priv->drm.struct_mutex);
> +               intel_runtime_pm_get(dev_priv);
>                 gen9_enable_guc_interrupts(dev_priv);
> +               intel_runtime_pm_put(dev_priv);
> +               mutex_unlock(&dev_priv->drm.struct_mutex);
>         } else {
>                 /*
>                  * Once logging is disabled, GuC won't generate logs & send an
> @@ -627,13 +708,13 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
>         if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
>                 return;
>  
> -       mutex_lock(&dev_priv->drm.struct_mutex);
>         guc_log_late_setup(&dev_priv->guc);
> -       mutex_unlock(&dev_priv->drm.struct_mutex);
>  }
>  
>  void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>  {
> +       struct intel_guc *guc = &dev_priv->guc;
> +
>         if (!USES_GUC_SUBMISSION(dev_priv))
>                 return;
>  
> @@ -643,6 +724,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>         gen9_disable_guc_interrupts(dev_priv);
>         intel_runtime_pm_put(dev_priv);
>  
> -       guc_log_runtime_destroy(&dev_priv->guc);
> +       guc_log_runtime_destroy(guc);
>         mutex_unlock(&dev_priv->drm.struct_mutex);
> +
> +       intel_guc_log_relay_destroy(guc);
>  }

This looks all reasonably well described by the addition of the
relay_lock and the interactions look fine. The only mistake I could see,
in the story told by this patch, was the runtime checking.
-Chris
sagar.a.kamble@intel.com Jan. 24, 2018, 10:52 a.m. UTC | #2
On 1/24/2018 3:48 PM, Chris Wilson wrote:
> Quoting Sagar Arun Kamble (2018-01-24 04:09:09)
>> diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
>> index 8f2da30..35de889 100644
>> --- a/drivers/gpu/drm/i915/intel_guc_log.c
>> +++ b/drivers/gpu/drm/i915/intel_guc_log.c
>> @@ -153,6 +153,8 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
>>          if (!i915_modparams.guc_log_level)
>>                  return 0;
>>   
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>>          /* For now create the log file in /sys/kernel/debug/dri/0 dir */
>>          log_dir = dev_priv->drm.primary->debugfs_root;
>>   
>> @@ -169,16 +171,26 @@ static int guc_log_relay_file_create(struct intel_guc *guc)
>>           */
>>          if (!log_dir) {
>>                  DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
>> -               return -ENODEV;
>> +               ret = -ENODEV;
>> +               goto out_unlock;
>>          }
>>   
>>          ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
>>          if (ret < 0 && ret != -EEXIST) {
>>                  DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
>> -               return ret;
>> +               goto out_unlock;
>>          }
>>   
>> -       return 0;
>> +out_unlock:
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>> +       return ret;
>> +}
>> +
>> +static bool guc_log_has_relay(struct intel_guc *guc)
>> +{
>> +       lockdep_assert_held(&guc->log.runtime.relay_lock);
>> +
>> +       return guc->log.runtime.relay_chan != NULL;
>>   }
>>   
>>   static void guc_move_to_next_buf(struct intel_guc *guc)
>> @@ -188,6 +200,9 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
>>           */
>>          smp_wmb();
>>   
>> +       if (!guc_log_has_relay(guc))
>> +               return;
>> +
>>          /* All data has been written, so now move the offset of sub buffer. */
>>          relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);
>>   
>> @@ -197,7 +212,7 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
>>   
>>   static void *guc_get_write_buffer(struct intel_guc *guc)
>>   {
>> -       if (!guc->log.runtime.relay_chan)
>> +       if (!guc_log_has_relay(guc))
>>                  return NULL;
>>   
>>          /* Just get the base address of a new sub buffer and copy data into it
>> @@ -265,6 +280,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>>          /* Get the pointer to shared GuC log buffer */
>>          log_buf_state = src_data = guc->log.runtime.buf_addr;
>>   
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>>          /* Get the pointer to local buffer to store the logs */
>>          log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);
> Hmm. The locking here tells me that we are being careful in case the
> relay_chan disappears, but we don't handle the NULL pointer here.
>   
There is check for log_bug_snapshot_state below in for loop. But yes, we 
should return from here.
Will update.
>> @@ -344,6 +361,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
>>                  DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
>>                  guc->log.capture_miss_count++;
>>          }
>> +
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>>   }
>>   
>>   static void capture_logs_work(struct work_struct *work)
>> @@ -363,8 +382,6 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>>   {
>>          struct drm_i915_private *dev_priv = guc_to_i915(guc);
>>          void *vaddr;
>> -       struct rchan *guc_log_relay_chan;
>> -       size_t n_subbufs, subbuf_size;
>>          int ret;
>>   
>>          lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> @@ -387,8 +404,44 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>>   
>>          guc->log.runtime.buf_addr = vaddr;
>>   
>> +       return 0;
>> +}
>> +
>> +static void guc_log_runtime_destroy(struct intel_guc *guc)
>> +{
>> +       /*
>> +        * It's possible that the runtime stuff was never allocated because
>> +        * GuC log was disabled at the boot time.
>> +        **/
>> +       if (!guc_log_has_runtime(guc))
>> +               return;
>> +
>> +       i915_gem_object_unpin_map(guc->log.vma->obj);
>> +       guc->log.runtime.buf_addr = NULL;
>> +}
>> +
>> +void intel_guc_log_init_early(struct intel_guc *guc)
>> +{
>> +       mutex_init(&guc->log.runtime.relay_lock);
>> +       INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
>> +}
>> +
>> +int intel_guc_log_relay_create(struct intel_guc *guc)
>> +{
>> +       struct drm_i915_private *dev_priv = guc_to_i915(guc);
>> +       struct rchan *guc_log_relay_chan;
>> +       size_t n_subbufs, subbuf_size;
>> +       int ret;
>> +
>> +       if (!i915_modparams.guc_log_level)
>> +               return 0;
>> +
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>> +       GEM_BUG_ON(guc_log_has_relay(guc));
>> +
>>           /* Keep the size of sub buffers same as shared log buffer */
>> -       subbuf_size = guc->log.vma->obj->base.size;
>> +       subbuf_size = GUC_LOG_SIZE;
>>   
>>          /* Store up to 8 snapshots, which is large enough to buffer sufficient
>>           * boot time logs and provides enough leeway to User, in terms of
>> @@ -407,33 +460,39 @@ static int guc_log_runtime_create(struct intel_guc *guc)
>>                  DRM_ERROR("Couldn't create relay chan for GuC logging\n");
>>   
>>                  ret = -ENOMEM;
>> -               goto err_vaddr;
>> +               goto err;
>>          }
>>   
>>          GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
>>          guc->log.runtime.relay_chan = guc_log_relay_chan;
>>   
>> -       INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>> +
>>          return 0;
>>   
>> -err_vaddr:
>> -       i915_gem_object_unpin_map(guc->log.vma->obj);
>> -       guc->log.runtime.buf_addr = NULL;
>> +err:
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>> +       /* logging will be off */
>> +       i915_modparams.guc_log_level = 0;
>>          return ret;
>>   }
>>   
>> -static void guc_log_runtime_destroy(struct intel_guc *guc)
>> +void intel_guc_log_relay_destroy(struct intel_guc *guc)
>>   {
>> +       mutex_lock(&guc->log.runtime.relay_lock);
>> +
>>          /*
>> -        * It's possible that the runtime stuff was never allocated because
>> +        * It's possible that the relay was never allocated because
>>           * GuC log was disabled at the boot time.
>>           */
>> -       if (!guc_log_has_runtime(guc))
>> -               return;
>> +       if (!guc_log_has_relay(guc))
>> +               goto out_unlock;
>>   
>>          relay_close(guc->log.runtime.relay_chan);
>> -       i915_gem_object_unpin_map(guc->log.vma->obj);
>> -       guc->log.runtime.buf_addr = NULL;
>> +       guc->log.runtime.relay_chan = NULL;
>> +
>> +out_unlock:
>> +       mutex_unlock(&guc->log.runtime.relay_lock);
>>   }
>>   
>>   static int guc_log_late_setup(struct intel_guc *guc)
>> @@ -441,17 +500,24 @@ static int guc_log_late_setup(struct intel_guc *guc)
>>          struct drm_i915_private *dev_priv = guc_to_i915(guc);
>>          int ret;
>>   
>> -       lockdep_assert_held(&dev_priv->drm.struct_mutex);
>> -
>>          if (!guc_log_has_runtime(guc)) {
>>                  /*
>>                   * If log was disabled at boot time, then setup needed to handle
>>                   * log buffer flush interrupts would not have been done yet, so
>>                   * do that now.
>>                   */
>> -               ret = guc_log_runtime_create(guc);
>> +               ret = intel_guc_log_relay_create(guc);
>>                  if (ret)
>>                          goto err;
>> +
>> +               mutex_lock(&dev_priv->drm.struct_mutex);
>> +               intel_runtime_pm_get(dev_priv);
>> +               ret = guc_log_runtime_create(guc);
>> +               intel_runtime_pm_put(dev_priv);
>> +               mutex_unlock(&dev_priv->drm.struct_mutex);
>> +
>> +               if (ret)
>> +                       goto err_relay;
>>          }
>>   
>>          ret = guc_log_relay_file_create(guc);
>> @@ -461,7 +527,11 @@ static int guc_log_late_setup(struct intel_guc *guc)
>>          return 0;
>>   
>>   err_runtime:
>> +       mutex_lock(&dev_priv->drm.struct_mutex);
>>          guc_log_runtime_destroy(guc);
>> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>> +err_relay:
>> +       intel_guc_log_relay_destroy(guc);
>>   err:
>>          /* logging will remain off */
>>          i915_modparams.guc_log_level = 0;
>> @@ -490,7 +560,11 @@ static void guc_flush_logs(struct intel_guc *guc)
>>                  return;
>>   
>>          /* First disable the interrupts, will be renabled afterwards */
>> +       mutex_lock(&dev_priv->drm.struct_mutex);
>> +       intel_runtime_pm_get(dev_priv);
>>          gen9_disable_guc_interrupts(dev_priv);
>> +       intel_runtime_pm_put(dev_priv);
>> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>>   
>>          /* Before initiating the forceful flush, wait for any pending/ongoing
>>           * flush to complete otherwise forceful flush may not actually happen.
>> @@ -498,7 +572,9 @@ static void guc_flush_logs(struct intel_guc *guc)
>>          flush_work(&guc->log.runtime.flush_work);
>>   
>>          /* Ask GuC to update the log buffer state */
>> +       intel_runtime_pm_get(dev_priv);
>>          guc_log_flush(guc);
>> +       intel_runtime_pm_put(dev_priv);
>>   
>>          /* GuC would have updated log buffer by now, so capture it */
>>          guc_log_capture_logs(guc);
>> @@ -509,17 +585,10 @@ int intel_guc_log_create(struct intel_guc *guc)
>>          struct i915_vma *vma;
>>          unsigned long offset;
>>          u32 flags;
>> -       u32 size;
>>          int ret;
>>   
>>          GEM_BUG_ON(guc->log.vma);
>>   
>> -       /* The first page is to save log buffer state. Allocate one
>> -        * extra page for others in case for overlap */
>> -       size = (1 + GUC_LOG_DPC_PAGES + 1 +
>> -               GUC_LOG_ISR_PAGES + 1 +
>> -               GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
>> -
>>          /* We require SSE 4.1 for fast reads from the GuC log buffer and
>>           * it should be present on the chipsets supporting GuC based
>>           * submisssions.
>> @@ -529,7 +598,7 @@ int intel_guc_log_create(struct intel_guc *guc)
>>                  goto err;
>>          }
>>   
>> -       vma = intel_guc_allocate_vma(guc, size);
>> +       vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
>>          if (IS_ERR(vma)) {
>>                  ret = PTR_ERR(vma);
>>                  goto err;
>> @@ -584,7 +653,15 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
>>                  return 0;
>>   
>>          verbosity = enable_logging ? control_val - 1 : 0;
>> +
>> +       ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
>> +       if (ret)
>> +               return ret;
>> +       intel_runtime_pm_get(dev_priv);
>>          ret = guc_log_control(guc, enable_logging, verbosity);
>> +       intel_runtime_pm_put(dev_priv);
>> +       mutex_unlock(&dev_priv->drm.struct_mutex);
>> +
>>          if (ret < 0) {
>>                  DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
>>                  return ret;
>> @@ -605,7 +682,11 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
>>                  }
>>   
>>                  /* GuC logging is currently the only user of Guc2Host interrupts */
>> +               mutex_lock(&dev_priv->drm.struct_mutex);
>> +               intel_runtime_pm_get(dev_priv);
>>                  gen9_enable_guc_interrupts(dev_priv);
>> +               intel_runtime_pm_put(dev_priv);
>> +               mutex_unlock(&dev_priv->drm.struct_mutex);
>>          } else {
>>                  /*
>>                   * Once logging is disabled, GuC won't generate logs & send an
>> @@ -627,13 +708,13 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
>>          if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
>>                  return;
>>   
>> -       mutex_lock(&dev_priv->drm.struct_mutex);
>>          guc_log_late_setup(&dev_priv->guc);
>> -       mutex_unlock(&dev_priv->drm.struct_mutex);
>>   }
>>   
>>   void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>>   {
>> +       struct intel_guc *guc = &dev_priv->guc;
>> +
>>          if (!USES_GUC_SUBMISSION(dev_priv))
>>                  return;
>>   
>> @@ -643,6 +724,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
>>          gen9_disable_guc_interrupts(dev_priv);
>>          intel_runtime_pm_put(dev_priv);
>>   
>> -       guc_log_runtime_destroy(&dev_priv->guc);
>> +       guc_log_runtime_destroy(guc);
>>          mutex_unlock(&dev_priv->drm.struct_mutex);
>> +
>> +       intel_guc_log_relay_destroy(guc);
>>   }
> This looks all reasonably well described by the addition of the
> relay_lock and the interactions look fine. The only mistake I could see,
> in the story told by this patch, was the runtime checking.
Could you please elaborate more on this.
> -Chris
Chris Wilson Jan. 24, 2018, 11:05 a.m. UTC | #3
Quoting Sagar Arun Kamble (2018-01-24 10:52:28)
> 
> 
> On 1/24/2018 3:48 PM, Chris Wilson wrote:
> > Quoting Sagar Arun Kamble (2018-01-24 04:09:09)
> >> @@ -197,7 +212,7 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
> >>   
> >>   static void *guc_get_write_buffer(struct intel_guc *guc)
> >>   {
> >> -       if (!guc->log.runtime.relay_chan)
> >> +       if (!guc_log_has_relay(guc))
> >>                  return NULL;
> >>   
> >>          /* Just get the base address of a new sub buffer and copy data into it
> >> @@ -265,6 +280,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
> >>          /* Get the pointer to shared GuC log buffer */
> >>          log_buf_state = src_data = guc->log.runtime.buf_addr;
> >>   
> >> +       mutex_lock(&guc->log.runtime.relay_lock);
> >> +
> >>          /* Get the pointer to local buffer to store the logs */
> >>          log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);
> > Hmm. The locking here tells me that we are being careful in case the
> > relay_chan disappears, but we don't handle the NULL pointer here.
> >   
> There is check for log_bug_snapshot_state below in for loop. But yes, we 
> should return from here.
> Will update.
> >> @@ -643,6 +724,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
> >>          gen9_disable_guc_interrupts(dev_priv);
> >>          intel_runtime_pm_put(dev_priv);
> >>   
> >> -       guc_log_runtime_destroy(&dev_priv->guc);
> >> +       guc_log_runtime_destroy(guc);
> >>          mutex_unlock(&dev_priv->drm.struct_mutex);
> >> +
> >> +       intel_guc_log_relay_destroy(guc);
> >>   }
> > This looks all reasonably well described by the addition of the
> > relay_lock and the interactions look fine. The only mistake I could see,
> > in the story told by this patch, was the runtime checking.
> Could you please elaborate more on this.

The previous comment :)
-Chris
diff mbox

Patch

======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted
------------------------------------------------------
debugfs_test/1388 is trying to acquire lock:
 (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915]

but task is already holding lock:
 (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 (&mm->mmap_sem){++++}:
       _copy_to_user+0x1e/0x70
       filldir+0x8c/0xf0
       dcache_readdir+0xeb/0x160
       iterate_dir+0xdc/0x140
       SyS_getdents+0xa0/0x130
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #2 (&sb->s_type->i_mutex_key#3){++++}:
       start_creating+0x59/0x110
       __debugfs_create_file+0x2e/0xe0
       relay_create_buf_file+0x62/0x80
       relay_late_setup_files+0x84/0x250
       guc_log_late_setup+0x4f/0x110 [i915]
       i915_guc_log_register+0x32/0x40 [i915]
       i915_driver_load+0x7b6/0x1720 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #1 (relay_channels_mutex){+.+.}:
       relay_open+0x12c/0x2b0
       intel_guc_log_runtime_create+0xab/0x230 [i915]
       intel_guc_init+0x81/0x120 [i915]
       intel_uc_init+0x29/0xa0 [i915]
       i915_gem_init+0x182/0x530 [i915]
       i915_driver_load+0xaa9/0x1720 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #0 (&dev->struct_mutex){+.+.}:
       __mutex_lock+0x81/0x9b0
       i915_mutex_lock_interruptible+0x47/0x130 [i915]
       i915_gem_fault+0x201/0x790 [i915]
       __do_fault+0x15/0x70
       __handle_mm_fault+0x677/0xdc0
       handle_mm_fault+0x14f/0x2f0
       __do_page_fault+0x2d1/0x560
       page_fault+0x4c/0x60

other info that might help us debug this:

Chain exists of:
  &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&mm->mmap_sem);
                               lock(&sb->s_type->i_mutex_key#3);
                               lock(&mm->mmap_sem);
  lock(&dev->struct_mutex);

 *** DEADLOCK ***

1 lock held by debugfs_test/1388:
 #0:  (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560

stack backtrace:
CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016
Call Trace:
 dump_stack+0x5f/0x86
 print_circular_bug.isra.18+0x1d0/0x2c0
 __lock_acquire+0x14ae/0x1b60
 ? lock_acquire+0xaf/0x200
 lock_acquire+0xaf/0x200
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 __mutex_lock+0x81/0x9b0
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? __pm_runtime_resume+0x4f/0x80
 i915_gem_fault+0x201/0x790 [i915]
 __do_fault+0x15/0x70
 ? _raw_spin_unlock+0x29/0x40
 __handle_mm_fault+0x677/0xdc0
 handle_mm_fault+0x14f/0x2f0
 __do_page_fault+0x2d1/0x560
 ? page_fault+0x36/0x60
 page_fault+0x4c/0x60

v2: Added lock protection to guc->log.runtime.relay_chan (Chris)
    Fixed locking inside guc_flush_logs uncovered by new lockdep.

v3: Locking guc_read_update_log_buffer entirely with relay_lock. (Chris)
    Prepared intel_guc_init_early. Moved relay_lock inside relay_create
    relay_destroy, relay_file_create, guc_read_update_log_buffer. (Michal)
    Removed struct_mutex lock around guc_log_flush and removed usage
    of guc_log_has_relay() from runtime_create path as it needs
    struct_mutex lock.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693
Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Marta Lofstedt <marta.lofstedt@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c  |  12 +--
 drivers/gpu/drm/i915/i915_drv.c      |   2 +-
 drivers/gpu/drm/i915/i915_gem.c      |   4 +-
 drivers/gpu/drm/i915/intel_guc.c     |   7 +-
 drivers/gpu/drm/i915/intel_guc_log.c | 147 +++++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_guc_log.h |  12 +++
 drivers/gpu/drm/i915/intel_uc.c      |  26 +++++--
 drivers/gpu/drm/i915/intel_uc.h      |   4 +-
 8 files changed, 160 insertions(+), 54 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 80dc679..b45be0d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2467,7 +2467,6 @@  static int i915_guc_log_control_get(void *data, u64 *val)
 static int i915_guc_log_control_set(void *data, u64 val)
 {
 	struct drm_i915_private *dev_priv = data;
-	int ret;
 
 	if (!HAS_GUC(dev_priv))
 		return -ENODEV;
@@ -2475,16 +2474,7 @@  static int i915_guc_log_control_set(void *data, u64 val)
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
 
-	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
-	if (ret)
-		return ret;
-
-	intel_runtime_pm_get(dev_priv);
-	ret = i915_guc_log_control(dev_priv, val);
-	intel_runtime_pm_put(dev_priv);
-
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-	return ret;
+	return i915_guc_log_control(dev_priv, val);
 }
 
 DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 95e1c16..1ec12ad 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -626,7 +626,7 @@  static void i915_gem_fini(struct drm_i915_private *dev_priv)
 	i915_gem_contexts_fini(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_uc_fini_wq(dev_priv);
+	intel_uc_fini_misc(dev_priv);
 	i915_gem_cleanup_userptr(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7f0684c..34d0d51 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5193,7 +5193,7 @@  int i915_gem_init(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
-	ret = intel_uc_init_wq(dev_priv);
+	ret = intel_uc_init_misc(dev_priv);
 	if (ret)
 		return ret;
 
@@ -5289,7 +5289,7 @@  int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	intel_uc_fini_wq(dev_priv);
+	intel_uc_fini_misc(dev_priv);
 
 	if (ret != -EIO)
 		i915_gem_cleanup_userptr(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index ea30e7c..21140cc 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -64,6 +64,7 @@  void intel_guc_init_early(struct intel_guc *guc)
 {
 	intel_guc_fw_init_early(guc);
 	intel_guc_ct_init_early(&guc->ct);
+	intel_guc_log_init_early(guc);
 
 	mutex_init(&guc->send_mutex);
 	guc->send = intel_guc_send_nop;
@@ -87,8 +88,10 @@  int intel_guc_init_wq(struct intel_guc *guc)
 	 */
 	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
 						WQ_HIGHPRI | WQ_FREEZABLE);
-	if (!guc->log.runtime.flush_wq)
+	if (!guc->log.runtime.flush_wq) {
+		DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
 		return -ENOMEM;
+	}
 
 	/*
 	 * Even though both sending GuC action, and adding a new workitem to
@@ -109,6 +112,8 @@  int intel_guc_init_wq(struct intel_guc *guc)
 							  WQ_HIGHPRI);
 		if (!guc->preempt_wq) {
 			destroy_workqueue(guc->log.runtime.flush_wq);
+			DRM_ERROR("Couldn't allocate workqueue for GuC "
+				  "preemption\n");
 			return -ENOMEM;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 8f2da30..35de889 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -153,6 +153,8 @@  static int guc_log_relay_file_create(struct intel_guc *guc)
 	if (!i915_modparams.guc_log_level)
 		return 0;
 
+	mutex_lock(&guc->log.runtime.relay_lock);
+
 	/* For now create the log file in /sys/kernel/debug/dri/0 dir */
 	log_dir = dev_priv->drm.primary->debugfs_root;
 
@@ -169,16 +171,26 @@  static int guc_log_relay_file_create(struct intel_guc *guc)
 	 */
 	if (!log_dir) {
 		DRM_ERROR("Debugfs dir not available yet for GuC log file\n");
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out_unlock;
 	}
 
 	ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
 	if (ret < 0 && ret != -EEXIST) {
 		DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
-		return ret;
+		goto out_unlock;
 	}
 
-	return 0;
+out_unlock:
+	mutex_unlock(&guc->log.runtime.relay_lock);
+	return ret;
+}
+
+static bool guc_log_has_relay(struct intel_guc *guc)
+{
+	lockdep_assert_held(&guc->log.runtime.relay_lock);
+
+	return guc->log.runtime.relay_chan != NULL;
 }
 
 static void guc_move_to_next_buf(struct intel_guc *guc)
@@ -188,6 +200,9 @@  static void guc_move_to_next_buf(struct intel_guc *guc)
 	 */
 	smp_wmb();
 
+	if (!guc_log_has_relay(guc))
+		return;
+
 	/* All data has been written, so now move the offset of sub buffer. */
 	relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);
 
@@ -197,7 +212,7 @@  static void guc_move_to_next_buf(struct intel_guc *guc)
 
 static void *guc_get_write_buffer(struct intel_guc *guc)
 {
-	if (!guc->log.runtime.relay_chan)
+	if (!guc_log_has_relay(guc))
 		return NULL;
 
 	/* Just get the base address of a new sub buffer and copy data into it
@@ -265,6 +280,8 @@  static void guc_read_update_log_buffer(struct intel_guc *guc)
 	/* Get the pointer to shared GuC log buffer */
 	log_buf_state = src_data = guc->log.runtime.buf_addr;
 
+	mutex_lock(&guc->log.runtime.relay_lock);
+
 	/* Get the pointer to local buffer to store the logs */
 	log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);
 
@@ -344,6 +361,8 @@  static void guc_read_update_log_buffer(struct intel_guc *guc)
 		DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n");
 		guc->log.capture_miss_count++;
 	}
+
+	mutex_unlock(&guc->log.runtime.relay_lock);
 }
 
 static void capture_logs_work(struct work_struct *work)
@@ -363,8 +382,6 @@  static int guc_log_runtime_create(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	void *vaddr;
-	struct rchan *guc_log_relay_chan;
-	size_t n_subbufs, subbuf_size;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -387,8 +404,44 @@  static int guc_log_runtime_create(struct intel_guc *guc)
 
 	guc->log.runtime.buf_addr = vaddr;
 
+	return 0;
+}
+
+static void guc_log_runtime_destroy(struct intel_guc *guc)
+{
+	/*
+	 * It's possible that the runtime stuff was never allocated because
+	 * GuC log was disabled at the boot time.
+	 **/
+	if (!guc_log_has_runtime(guc))
+		return;
+
+	i915_gem_object_unpin_map(guc->log.vma->obj);
+	guc->log.runtime.buf_addr = NULL;
+}
+
+void intel_guc_log_init_early(struct intel_guc *guc)
+{
+	mutex_init(&guc->log.runtime.relay_lock);
+	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
+}
+
+int intel_guc_log_relay_create(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct rchan *guc_log_relay_chan;
+	size_t n_subbufs, subbuf_size;
+	int ret;
+
+	if (!i915_modparams.guc_log_level)
+		return 0;
+
+	mutex_lock(&guc->log.runtime.relay_lock);
+
+	GEM_BUG_ON(guc_log_has_relay(guc));
+
 	 /* Keep the size of sub buffers same as shared log buffer */
-	subbuf_size = guc->log.vma->obj->base.size;
+	subbuf_size = GUC_LOG_SIZE;
 
 	/* Store up to 8 snapshots, which is large enough to buffer sufficient
 	 * boot time logs and provides enough leeway to User, in terms of
@@ -407,33 +460,39 @@  static int guc_log_runtime_create(struct intel_guc *guc)
 		DRM_ERROR("Couldn't create relay chan for GuC logging\n");
 
 		ret = -ENOMEM;
-		goto err_vaddr;
+		goto err;
 	}
 
 	GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
 	guc->log.runtime.relay_chan = guc_log_relay_chan;
 
-	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
+	mutex_unlock(&guc->log.runtime.relay_lock);
+
 	return 0;
 
-err_vaddr:
-	i915_gem_object_unpin_map(guc->log.vma->obj);
-	guc->log.runtime.buf_addr = NULL;
+err:
+	mutex_unlock(&guc->log.runtime.relay_lock);
+	/* logging will be off */
+	i915_modparams.guc_log_level = 0;
 	return ret;
 }
 
-static void guc_log_runtime_destroy(struct intel_guc *guc)
+void intel_guc_log_relay_destroy(struct intel_guc *guc)
 {
+	mutex_lock(&guc->log.runtime.relay_lock);
+
 	/*
-	 * It's possible that the runtime stuff was never allocated because
+	 * It's possible that the relay was never allocated because
 	 * GuC log was disabled at the boot time.
 	 */
-	if (!guc_log_has_runtime(guc))
-		return;
+	if (!guc_log_has_relay(guc))
+		goto out_unlock;
 
 	relay_close(guc->log.runtime.relay_chan);
-	i915_gem_object_unpin_map(guc->log.vma->obj);
-	guc->log.runtime.buf_addr = NULL;
+	guc->log.runtime.relay_chan = NULL;
+
+out_unlock:
+	mutex_unlock(&guc->log.runtime.relay_lock);
 }
 
 static int guc_log_late_setup(struct intel_guc *guc)
@@ -441,17 +500,24 @@  static int guc_log_late_setup(struct intel_guc *guc)
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	int ret;
 
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
 	if (!guc_log_has_runtime(guc)) {
 		/*
 		 * If log was disabled at boot time, then setup needed to handle
 		 * log buffer flush interrupts would not have been done yet, so
 		 * do that now.
 		 */
-		ret = guc_log_runtime_create(guc);
+		ret = intel_guc_log_relay_create(guc);
 		if (ret)
 			goto err;
+
+		mutex_lock(&dev_priv->drm.struct_mutex);
+		intel_runtime_pm_get(dev_priv);
+		ret = guc_log_runtime_create(guc);
+		intel_runtime_pm_put(dev_priv);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
+
+		if (ret)
+			goto err_relay;
 	}
 
 	ret = guc_log_relay_file_create(guc);
@@ -461,7 +527,11 @@  static int guc_log_late_setup(struct intel_guc *guc)
 	return 0;
 
 err_runtime:
+	mutex_lock(&dev_priv->drm.struct_mutex);
 	guc_log_runtime_destroy(guc);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+err_relay:
+	intel_guc_log_relay_destroy(guc);
 err:
 	/* logging will remain off */
 	i915_modparams.guc_log_level = 0;
@@ -490,7 +560,11 @@  static void guc_flush_logs(struct intel_guc *guc)
 		return;
 
 	/* First disable the interrupts, will be renabled afterwards */
+	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_runtime_pm_get(dev_priv);
 	gen9_disable_guc_interrupts(dev_priv);
+	intel_runtime_pm_put(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	/* Before initiating the forceful flush, wait for any pending/ongoing
 	 * flush to complete otherwise forceful flush may not actually happen.
@@ -498,7 +572,9 @@  static void guc_flush_logs(struct intel_guc *guc)
 	flush_work(&guc->log.runtime.flush_work);
 
 	/* Ask GuC to update the log buffer state */
+	intel_runtime_pm_get(dev_priv);
 	guc_log_flush(guc);
+	intel_runtime_pm_put(dev_priv);
 
 	/* GuC would have updated log buffer by now, so capture it */
 	guc_log_capture_logs(guc);
@@ -509,17 +585,10 @@  int intel_guc_log_create(struct intel_guc *guc)
 	struct i915_vma *vma;
 	unsigned long offset;
 	u32 flags;
-	u32 size;
 	int ret;
 
 	GEM_BUG_ON(guc->log.vma);
 
-	/* The first page is to save log buffer state. Allocate one
-	 * extra page for others in case for overlap */
-	size = (1 + GUC_LOG_DPC_PAGES + 1 +
-		GUC_LOG_ISR_PAGES + 1 +
-		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
-
 	/* We require SSE 4.1 for fast reads from the GuC log buffer and
 	 * it should be present on the chipsets supporting GuC based
 	 * submisssions.
@@ -529,7 +598,7 @@  int intel_guc_log_create(struct intel_guc *guc)
 		goto err;
 	}
 
-	vma = intel_guc_allocate_vma(guc, size);
+	vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err;
@@ -584,7 +653,15 @@  int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 		return 0;
 
 	verbosity = enable_logging ? control_val - 1 : 0;
+
+	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
+	if (ret)
+		return ret;
+	intel_runtime_pm_get(dev_priv);
 	ret = guc_log_control(guc, enable_logging, verbosity);
+	intel_runtime_pm_put(dev_priv);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
 	if (ret < 0) {
 		DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret);
 		return ret;
@@ -605,7 +682,11 @@  int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 		}
 
 		/* GuC logging is currently the only user of Guc2Host interrupts */
+		mutex_lock(&dev_priv->drm.struct_mutex);
+		intel_runtime_pm_get(dev_priv);
 		gen9_enable_guc_interrupts(dev_priv);
+		intel_runtime_pm_put(dev_priv);
+		mutex_unlock(&dev_priv->drm.struct_mutex);
 	} else {
 		/*
 		 * Once logging is disabled, GuC won't generate logs & send an
@@ -627,13 +708,13 @@  void i915_guc_log_register(struct drm_i915_private *dev_priv)
 	if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level)
 		return;
 
-	mutex_lock(&dev_priv->drm.struct_mutex);
 	guc_log_late_setup(&dev_priv->guc);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
 
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
 {
+	struct intel_guc *guc = &dev_priv->guc;
+
 	if (!USES_GUC_SUBMISSION(dev_priv))
 		return;
 
@@ -643,6 +724,8 @@  void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
 	gen9_disable_guc_interrupts(dev_priv);
 	intel_runtime_pm_put(dev_priv);
 
-	guc_log_runtime_destroy(&dev_priv->guc);
+	guc_log_runtime_destroy(guc);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	intel_guc_log_relay_destroy(guc);
 }
diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h
index f512cf7..c638b9d 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/intel_guc_log.h
@@ -32,6 +32,13 @@ 
 struct drm_i915_private;
 struct intel_guc;
 
+/*
+ * The first page is to save log buffer state. Allocate one
+ * extra page for others in case for overlap
+ */
+#define GUC_LOG_SIZE	((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \
+			  1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT)
+
 struct intel_guc_log {
 	u32 flags;
 	struct i915_vma *vma;
@@ -41,6 +48,8 @@  struct intel_guc_log {
 		struct workqueue_struct *flush_wq;
 		struct work_struct flush_work;
 		struct rchan *relay_chan;
+		/* To serialize the access to relay_chan */
+		struct mutex relay_lock;
 	} runtime;
 	/* logging related stats */
 	u32 capture_miss_count;
@@ -52,6 +61,9 @@  struct intel_guc_log {
 
 int intel_guc_log_create(struct intel_guc *guc);
 void intel_guc_log_destroy(struct intel_guc *guc);
+void intel_guc_log_init_early(struct intel_guc *guc);
+int intel_guc_log_relay_create(struct intel_guc *guc);
+void intel_guc_log_relay_destroy(struct intel_guc *guc);
 int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
 void i915_guc_log_register(struct drm_i915_private *dev_priv);
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index f78a17b..e3f3509 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -236,28 +236,44 @@  static void guc_disable_communication(struct intel_guc *guc)
 	guc->send = intel_guc_send_nop;
 }
 
-int intel_uc_init_wq(struct drm_i915_private *dev_priv)
+int intel_uc_init_misc(struct drm_i915_private *dev_priv)
 {
+	struct intel_guc *guc = &dev_priv->guc;
 	int ret;
 
 	if (!USES_GUC(dev_priv))
 		return 0;
 
-	ret = intel_guc_init_wq(&dev_priv->guc);
+	ret = intel_guc_init_wq(guc);
 	if (ret) {
 		DRM_ERROR("Couldn't allocate workqueues for GuC\n");
-		return ret;
+		goto err;
+	}
+
+	ret = intel_guc_log_relay_create(guc);
+	if (ret) {
+		DRM_ERROR("Couldn't allocate relay for GuC log\n");
+		goto err_relay;
 	}
 
 	return 0;
+
+err_relay:
+	intel_guc_fini_wq(guc);
+err:
+	return ret;
 }
 
-void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
+void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
 {
+	struct intel_guc *guc = &dev_priv->guc;
+
 	if (!USES_GUC(dev_priv))
 		return;
 
-	intel_guc_fini_wq(&dev_priv->guc);
+	intel_guc_fini_wq(guc);
+
+	intel_guc_log_relay_destroy(guc);
 }
 
 int intel_uc_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 8a72497..f2984e0 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -33,8 +33,8 @@ 
 void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
 void intel_uc_init_fw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
-int intel_uc_init_wq(struct drm_i915_private *dev_priv);
-void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
+int intel_uc_init_misc(struct drm_i915_private *dev_priv);
+void intel_uc_fini_misc(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_uc_init(struct drm_i915_private *dev_priv);