summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/i915_perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_perf.c')
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c175
1 files changed, 107 insertions, 68 deletions
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index dc4ce694c06a..5140017f9a39 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -195,6 +195,10 @@
#include <linux/sizes.h>
#include <linux/uuid.h>
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_lrc_reg.h"
+
#include "i915_drv.h"
#include "i915_oa_hsw.h"
#include "i915_oa_bdw.h"
@@ -210,7 +214,6 @@
#include "i915_oa_cflgt3.h"
#include "i915_oa_cnl.h"
#include "i915_oa_icl.h"
-#include "intel_lrc_reg.h"
/* HW requires this to be a power of two, between 128k and 16M, though driver
* is currently generally designed assuming the largest 16M size is used such
@@ -271,8 +274,6 @@
#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
-static int zero;
-static int one = 1;
static u32 i915_perf_stream_paranoid = true;
/* The maximum exponent the hardware accepts is 63 (essentially it selects one
@@ -1202,28 +1203,35 @@ static int i915_oa_read(struct i915_perf_stream *stream,
static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
struct i915_gem_context *ctx)
{
- struct intel_engine_cs *engine = i915->engine[RCS0];
+ struct i915_gem_engines_iter it;
struct intel_context *ce;
- int ret;
+ int err;
- ret = i915_mutex_lock_interruptible(&i915->drm);
- if (ret)
- return ERR_PTR(ret);
+ err = i915_mutex_lock_interruptible(&i915->drm);
+ if (err)
+ return ERR_PTR(err);
- /*
- * As the ID is the gtt offset of the context's vma we
- * pin the vma to ensure the ID remains fixed.
- *
- * NB: implied RCS engine...
- */
- ce = intel_context_pin(ctx, engine);
- mutex_unlock(&i915->drm.struct_mutex);
- if (IS_ERR(ce))
- return ce;
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ if (ce->engine->class != RENDER_CLASS)
+ continue;
- i915->perf.oa.pinned_ctx = ce;
+ /*
+ * As the ID is the gtt offset of the context's vma we
+ * pin the vma to ensure the ID remains fixed.
+ */
+ err = intel_context_pin(ce);
+ if (err == 0) {
+ i915->perf.oa.pinned_ctx = ce;
+ break;
+ }
+ }
+ i915_gem_context_unlock_engines(ctx);
+
+ mutex_unlock(&i915->drm.struct_mutex);
+ if (err)
+ return ERR_PTR(err);
- return ce;
+ return i915->perf.oa.pinned_ctx;
}
/**
@@ -1365,7 +1373,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
free_oa_buffer(dev_priv);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
- intel_runtime_pm_put(dev_priv, stream->wakeref);
+ intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
if (stream->ctx)
oa_put_render_ctx_id(stream);
@@ -1502,7 +1510,7 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
- bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
+ bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE);
if (IS_ERR(bo)) {
DRM_ERROR("Failed to allocate OA buffer\n");
ret = PTR_ERR(bo);
@@ -1559,28 +1567,10 @@ static void config_oa_regs(struct drm_i915_private *dev_priv,
}
}
-static int hsw_enable_metric_set(struct i915_perf_stream *stream)
+static void delay_after_mux(void)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
- const struct i915_oa_config *oa_config = stream->oa_config;
-
- /* PRM:
- *
- * OA unit is using “crclk” for its functionality. When trunk
- * level clock gating takes place, OA clock would be gated,
- * unable to count the events from non-render clock domain.
- * Render clock gating must be disabled when OA is enabled to
- * count the events from non-render domain. Unit level clock
- * gating for RCS should also be disabled.
- */
- I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
- ~GEN7_DOP_CLOCK_GATE_ENABLE));
- I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
- GEN6_CSUNIT_CLOCK_GATE_DISABLE));
-
- config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
-
- /* It apparently takes a fairly long time for a new MUX
+ /*
+ * It apparently takes a fairly long time for a new MUX
* configuration to be be applied after these register writes.
* This delay duration was derived empirically based on the
* render_basic config but hopefully it covers the maximum
@@ -1602,6 +1592,30 @@ static int hsw_enable_metric_set(struct i915_perf_stream *stream)
* a delay at this location would mitigate any invalid reports.
*/
usleep_range(15000, 20000);
+}
+
+static int hsw_enable_metric_set(struct i915_perf_stream *stream)
+{
+ struct drm_i915_private *dev_priv = stream->dev_priv;
+ const struct i915_oa_config *oa_config = stream->oa_config;
+
+ /*
+ * PRM:
+ *
+ * OA unit is using “crclk” for its functionality. When trunk
+ * level clock gating takes place, OA clock would be gated,
+ * unable to count the events from non-render clock domain.
+ * Render clock gating must be disabled when OA is enabled to
+ * count the events from non-render domain. Unit level clock
+ * gating for RCS should also be disabled.
+ */
+ I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
+ ~GEN7_DOP_CLOCK_GATE_ENABLE));
+ I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
+ GEN6_CSUNIT_CLOCK_GATE_DISABLE));
+
+ config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
+ delay_after_mux();
config_oa_regs(dev_priv, oa_config->b_counter_regs,
oa_config->b_counter_regs_len);
@@ -1679,7 +1693,7 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
CTX_REG(reg_state,
CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
- gen8_make_rpcs(i915, &ce->sseu));
+ intel_sseu_make_rpcs(i915, &ce->sseu));
}
/*
@@ -1709,7 +1723,6 @@ gen8_update_reg_state_unlocked(struct intel_context *ce,
static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
const struct i915_oa_config *oa_config)
{
- struct intel_engine_cs *engine = dev_priv->engine[RCS0];
unsigned int map_type = i915_coherent_map_type(dev_priv);
struct i915_gem_context *ctx;
struct i915_request *rq;
@@ -1738,30 +1751,43 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
/* Update all contexts now that we've stalled the submission. */
list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
- struct intel_context *ce = intel_context_lookup(ctx, engine);
- u32 *regs;
-
- /* OA settings will be set upon first use */
- if (!ce || !ce->state)
- continue;
-
- regs = i915_gem_object_pin_map(ce->state->obj, map_type);
- if (IS_ERR(regs))
- return PTR_ERR(regs);
+ struct i915_gem_engines_iter it;
+ struct intel_context *ce;
+
+ for_each_gem_engine(ce,
+ i915_gem_context_lock_engines(ctx),
+ it) {
+ u32 *regs;
+
+ if (ce->engine->class != RENDER_CLASS)
+ continue;
+
+ /* OA settings will be set upon first use */
+ if (!ce->state)
+ continue;
+
+ regs = i915_gem_object_pin_map(ce->state->obj,
+ map_type);
+ if (IS_ERR(regs)) {
+ i915_gem_context_unlock_engines(ctx);
+ return PTR_ERR(regs);
+ }
- ce->state->obj->mm.dirty = true;
- regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
+ ce->state->obj->mm.dirty = true;
+ regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
- gen8_update_reg_state_unlocked(ce, regs, oa_config);
+ gen8_update_reg_state_unlocked(ce, regs, oa_config);
- i915_gem_object_unpin_map(ce->state->obj);
+ i915_gem_object_unpin_map(ce->state->obj);
+ }
+ i915_gem_context_unlock_engines(ctx);
}
/*
* Apply the configuration by doing one context restore of the edited
* context image.
*/
- rq = i915_request_alloc(engine, dev_priv->kernel_context);
+ rq = i915_request_create(dev_priv->engine[RCS0]->kernel_context);
if (IS_ERR(rq))
return PTR_ERR(rq);
@@ -1815,6 +1841,7 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream)
return ret;
config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
+ delay_after_mux();
config_oa_regs(dev_priv, oa_config->b_counter_regs,
oa_config->b_counter_regs_len);
@@ -2090,7 +2117,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
* In our case we are expecting that taking pm + FORCEWAKE
* references will effectively disable RC6.
*/
- stream->wakeref = intel_runtime_pm_get(dev_priv);
+ stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm);
intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
ret = alloc_oa_buffer(dev_priv);
@@ -2126,7 +2153,7 @@ err_oa_buf_alloc:
put_oa_config(dev_priv, stream->oa_config);
intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
- intel_runtime_pm_put(dev_priv, stream->wakeref);
+ intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
err_config:
if (stream->ctx)
@@ -2495,6 +2522,9 @@ static int i915_perf_release(struct inode *inode, struct file *file)
i915_perf_destroy_locked(stream);
mutex_unlock(&dev_priv->perf.lock);
+ /* Release the reference the perf stream kept on the driver. */
+ drm_dev_put(&dev_priv->drm);
+
return 0;
}
@@ -2630,6 +2660,11 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
if (!(param->flags & I915_PERF_FLAG_DISABLED))
i915_perf_enable_locked(stream);
+ /* Take a reference on the driver that will be kept with stream_fd
+ * until its release.
+ */
+ drm_dev_get(&dev_priv->drm);
+
return stream_fd;
err_open:
@@ -3344,8 +3379,8 @@ static struct ctl_table oa_table[] = {
.maxlen = sizeof(i915_perf_stream_paranoid),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "oa_max_sample_rate",
@@ -3353,7 +3388,7 @@ static struct ctl_table oa_table[] = {
.maxlen = sizeof(i915_oa_max_sample_rate),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
+ .extra1 = SYSCTL_ZERO,
.extra2 = &oa_sample_rate_hard_limit,
},
{}
@@ -3457,9 +3492,13 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
dev_priv->perf.oa.ops.disable_metric_set = gen10_disable_metric_set;
- dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
- dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
-
+ if (IS_GEN(dev_priv, 10)) {
+ dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
+ dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
+ } else {
+ dev_priv->perf.oa.ctx_oactxctrl_offset = 0x124;
+ dev_priv->perf.oa.ctx_flexeu0_offset = 0x78e;
+ }
dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
}
}