@@ -133,3 +133,9 @@ depends on DRM_I915
depends on EXPERT
source "drivers/gpu/drm/i915/Kconfig.debug"
endmenu
+
+menu "drm/i915 Profile Guided Optimisation"
+ visible if EXPERT
+ depends on DRM_I915
+ source "drivers/gpu/drm/i915/Kconfig.profile"
+endmenu
new file mode 100644
@@ -0,0 +1,13 @@
+config DRM_I915_SPIN_REQUEST
+ int
+ default 5 # microseconds
+ help
+ Before sleeping waiting for a request (GPU operation) to complete,
+ we may spend some time polling for its completion. As the IRQ may
+ take a non-negligible time to setup, we do a short spin first to
+ check if the request will complete in the time it would have taken
+ us to enable the interrupt.
+
+ May be 0 to disable the initial spin. In practice, we estimate
+ the cost of enabling the interrupt (if currently disabled) to be
+ a few microseconds.
@@ -1340,8 +1340,31 @@ long i915_request_wait(struct i915_request *rq,
trace_i915_request_wait_begin(rq, flags);
- /* Optimistic short spin before touching IRQs */
- if (__i915_spin_request(rq, state, 5))
+ /*
+ * Optimistic spin before touching IRQs.
+ *
+ * We may use a rather large value here to offset the penalty of
+ * switching away from the active task. Frequently, the client will
+ * wait upon an old swapbuffer to throttle itself to remain within a
+ * frame of the gpu. If the client is running in lockstep with the gpu,
+ * then it should not be waiting long at all, and a sleep now will incur
+ * extra scheduler latency in producing the next frame. To try to
+ * avoid adding the cost of enabling/disabling the interrupt to the
+ * short wait, we first spin to see if the request would have completed
+ * in the time taken to setup the interrupt.
+ *
+ * We need upto 5us to enable the irq, and upto 20us to hide the
+ * scheduler latency of a context switch, ignoring the secondary
+ * impacts from a context switch such as cache eviction.
+ *
+ * The scheme used for low-latency IO is called "hybrid interrupt
+ * polling". The suggestion there is to sleep until just before you
+ * expect to be woken by the device interrupt and then poll for its
+ * completion. That requires having a good predictor for the request
+ * duration, which we currently lack.
+ */
+ if (CONFIG_DRM_I915_SPIN_REQUEST &&
+ __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST))
goto out;
/*