From 3d071d8da1f586c24863a57349586a1611b9aa67 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Thu, 15 Dec 2016 12:42:33 +0100 Subject: [PATCH 0001/1299] random: remove stale maybe_reseed_primary_crng The function maybe_reseed_primary_crng is not used anywhere and thus can be removed. Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 1ef26403bcc8..8e5ab20848c0 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -855,13 +855,6 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) spin_unlock_irqrestore(&primary_crng.lock, flags); } -static inline void maybe_reseed_primary_crng(void) -{ - if (crng_init > 2 && - time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL)) - crng_reseed(&primary_crng, &input_pool); -} - static inline void crng_wait_ready(void) { wait_event_interruptible(crng_init_wait, crng_ready()); From 2e03c36f25ebb52d3358b8baebcdf96895c33a87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Tue, 27 Dec 2016 23:39:31 +0100 Subject: [PATCH 0002/1299] random: remove stale urandom_init_wait The urandom_init_wait wait queue is a left over from the pre-ChaCha20 times and can therefore be savely removed. Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 8e5ab20848c0..482531d87fb8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -409,7 +409,6 @@ static struct poolinfo { */ static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); -static DECLARE_WAIT_QUEUE_HEAD(urandom_init_wait); static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); From 43d8a72cd985ca5279a9eb84d61fcbb3ee3d3774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Tue, 27 Dec 2016 23:40:59 +0100 Subject: [PATCH 0003/1299] random: remove variable limit The variable limit was used to identify the nonblocking pool's unlimited random number generation. As the nonblocking pool is a thing of the past, remove the limit variable and any conditions around it (i.e. preserve the branches for limit == 1). Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 482531d87fb8..92d6dd24c86e 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -466,7 +466,6 @@ struct entropy_store { int entropy_count; int entropy_total; unsigned int initialized:1; - unsigned int limit:1; unsigned int last_data_init:1; __u8 last_data[EXTRACT_SIZE]; }; @@ -484,7 +483,6 @@ static __u32 blocking_pool_data[OUTPUT_POOL_WORDS] __latent_entropy; static struct entropy_store input_pool = { .poolinfo = &poolinfo_table[0], .name = "input", - .limit = 1, .lock = __SPIN_LOCK_UNLOCKED(input_pool.lock), .pool = input_pool_data }; @@ -492,7 +490,6 @@ static struct entropy_store input_pool = { static struct entropy_store blocking_pool = { .poolinfo = &poolinfo_table[1], .name = "blocking", - .limit = 1, .pull = &input_pool, .lock = __SPIN_LOCK_UNLOCKED(blocking_pool.lock), .pool = blocking_pool_data, @@ -1212,15 +1209,6 @@ static void xfer_secondary_pool(struct entropy_store *r, size_t nbytes) r->entropy_count > r->poolinfo->poolfracbits) return; - if (r->limit == 0 && random_min_urandom_seed) { - unsigned long now = jiffies; - - if (time_before(now, - r->last_pulled + random_min_urandom_seed * HZ)) - return; - r->last_pulled = now; - } - _xfer_secondary_pool(r, nbytes); } @@ -1228,8 +1216,6 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes) { __u32 tmp[OUTPUT_POOL_WORDS]; - /* For /dev/random's pool, always leave two wakeups' worth */ - int rsvd_bytes = r->limit ? 0 : random_read_wakeup_bits / 4; int bytes = nbytes; /* pull at least as much as a wakeup */ @@ -1240,7 +1226,7 @@ static void _xfer_secondary_pool(struct entropy_store *r, size_t nbytes) trace_xfer_secondary_pool(r->name, bytes * 8, nbytes * 8, ENTROPY_BITS(r), ENTROPY_BITS(r->pull)); bytes = extract_entropy(r->pull, tmp, bytes, - random_read_wakeup_bits / 8, rsvd_bytes); + random_read_wakeup_bits / 8, 0); mix_pool_bytes(r, tmp, bytes); credit_entropy_bits(r, bytes*8); } @@ -1268,7 +1254,7 @@ static void push_to_pool(struct work_struct *work) static size_t account(struct entropy_store *r, size_t nbytes, int min, int reserved) { - int entropy_count, orig; + int entropy_count, orig, have_bytes; size_t ibytes, nfrac; BUG_ON(r->entropy_count > r->poolinfo->poolfracbits); @@ -1277,14 +1263,12 @@ static size_t account(struct entropy_store *r, size_t nbytes, int min, retry: entropy_count = orig = ACCESS_ONCE(r->entropy_count); ibytes = nbytes; - /* If limited, never pull more than available */ - if (r->limit) { - int have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); + /* never pull more than available */ + have_bytes = entropy_count >> (ENTROPY_SHIFT + 3); - if ((have_bytes -= reserved) < 0) - have_bytes = 0; - ibytes = min_t(size_t, ibytes, have_bytes); - } + if ((have_bytes -= reserved) < 0) + have_bytes = 0; + ibytes = min_t(size_t, ibytes, have_bytes); if (ibytes < min) ibytes = 0; From 5d0e5ea343a0f70351428476bcf8715e0731f26a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20M=C3=BCller?= Date: Tue, 27 Dec 2016 23:41:22 +0100 Subject: [PATCH 0004/1299] random: fix comment for unused random_min_urandom_seed The variable random_min_urandom_seed is not needed any more as it defined the reseeding behavior of the nonblocking pool. Though it is not needed any more, it is left in the code for user space interface compatibility. Signed-off-by: Stephan Mueller Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 92d6dd24c86e..9d147d456598 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -313,9 +313,7 @@ static int random_read_wakeup_bits = 64; static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; /* - * The minimum number of seconds between urandom pool reseeding. We - * do this to limit the amount of entropy that can be drained from the - * input pool even if there are heavy demands on /dev/urandom. + * Variable is currently unused by left for user space compatibility. */ static int random_min_urandom_seed = 60; From c96b63a6a7ac4bd670ec2e663793a9a31418b790 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:42 +0200 Subject: [PATCH 0005/1299] drm/i915: Don't leak edid in intel_crt_detect_ddc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the path where intel_crt_detect_ddc() detects a CRT, if would return true without freeing the edid. Fixes: a2bd1f541f19 ("drm/i915: check whether we actually received an edid in detect_ddc") Cc: Chris Wilson Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v3.6+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 385e29af8baa..2bf5aca6e37c 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -499,6 +499,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev); struct edid *edid; struct i2c_adapter *i2c; + bool ret = false; BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG); @@ -515,17 +516,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) */ if (!is_digital) { DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); - return true; + ret = true; + } else { + DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } - - DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } else { DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n"); } kfree(edid); - return false; + return ret; } static enum drm_connector_status From 04313b00b79405f86d815100f85c47a2ee5b8ca0 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:43 +0200 Subject: [PATCH 0006/1299] drm/i915: Don't init hpd polling for vlv and chv from runtime_suspend() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An error in the condition for avoiding the call to intel_hpd_poll_init() for valleyview and cherryview from intel_runtime_suspend() caused it to be called unconditionally. Fix it. Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Cc: stable@vger.kernel.org Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Lyude Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-2-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4ae69ebe166e..ca168b22ee26 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2377,7 +2377,7 @@ static int intel_runtime_suspend(struct device *kdev) assert_forcewakes_inactive(dev_priv); - if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) intel_hpd_poll_init(dev_priv); DRM_DEBUG_KMS("Device suspended\n"); From 46a1bd289507dfcc428fb9daf65421ed6be6af8b Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:44 +0200 Subject: [PATCH 0007/1299] drm/i915: Fix calculation of rotated x and y offsets for planar formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parameters tile_size, tile_width and tile_height were passed in the wrong order to _intel_adjust_tile_offset() when calculating the rotated offsets. This doesn't fix any user visible bug, since for packed formats new and old offset are the same and the rotated offsets are within a tile before they are fed to _intel_adjust_tile_offset(). In that case, the offsets are unchanged. That is not true for planar formats, but those are currently not supported. Fixes: 66a2d927cb0e ("drm/i915: Make intel_adjust_tile_offset() work for linear buffers") Cc: Ville Syrjälä Cc: Sivakumar Thulasimani Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-3-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0f4272f98648..0d2dad1bb86b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2578,8 +2578,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, * We only keep the x/y offsets, so push all of the * gtt offset into the x/y offsets. */ - _intel_adjust_tile_offset(&x, &y, tile_size, - tile_width, tile_height, pitch_tiles, + _intel_adjust_tile_offset(&x, &y, + tile_width, tile_height, + tile_size, pitch_tiles, gtt_offset_rotated * tile_size, 0); gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; From 31bb2ef97ea9db343348f9b5ccaa9bb6f48fc655 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:45 +0200 Subject: [PATCH 0008/1299] drm/i915: Check for NULL atomic state in intel_crtc_disable_noatomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In intel_crtc_disable_noatomic(), bail on a failure to allocate an atomic state to avoid a NULL pointer dereference. Fixes: 4a80655827af ("drm/i915: Pass atomic state to crtc enable/disable functions") Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-4-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0d2dad1bb86b..09edff4633f6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6882,6 +6882,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) } state = drm_atomic_state_alloc(crtc->dev); + if (!state) { + DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", + crtc->base.id, crtc->name); + return; + } + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; /* Everything's already locked, -EDEADLK can't happen. */ From 62fa0ce2bd590ef95b4557fca780441dc33ba427 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 21 Jan 2017 18:22:33 +0000 Subject: [PATCH 0009/1299] agp/intel: Move intel_fake_agp_sizes into #ifdef block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the intel_fake_agp_sizes array into the same #ifdef block as it is used to avoid instantiation when not used, and so triggering a compiler warning drivers/char/agp/intel-gtt.c:335:42: warning: ‘intel_fake_agp_sizes’ defined but not used [-Wunused-const-variable=] Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170121182233.30852-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula Reviewed-by: Joonas Lahtinen --- drivers/char/agp/intel-gtt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 9702c78f458d..7fcc2a9d1d5a 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -332,14 +332,6 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } -static const struct aper_size_info_fixed intel_fake_agp_sizes[] = { - {32, 8192, 3}, - {64, 16384, 4}, - {128, 32768, 5}, - {256, 65536, 6}, - {512, 131072, 7}, -}; - static unsigned int intel_gtt_stolen_size(void) { u16 gmch_ctrl; @@ -670,6 +662,14 @@ static int intel_gtt_init(void) } #if IS_ENABLED(CONFIG_AGP_INTEL) +static const struct aper_size_info_fixed intel_fake_agp_sizes[] = { + {32, 8192, 3}, + {64, 16384, 4}, + {128, 32768, 5}, + {256, 65536, 6}, + {512, 131072, 7}, +}; + static int intel_fake_agp_fetch_size(void) { int num_sizes = ARRAY_SIZE(intel_fake_agp_sizes); From ec78828ed60a9833582cbb582248bd1b0200f50e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Jan 2017 12:18:08 +0000 Subject: [PATCH 0010/1299] drm/i915: Fix W=1 warning for csr_load_work_fn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/intel_csr.c: In function ‘csr_load_work_fn’: drivers/gpu/drm/i915/intel_csr.c:399:6: error: variable ‘ret’ set but not used [-Werror=unused-but-set-variable] Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170118121808.27869-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_csr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 0085bc745f6a..9dcc434d3b74 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -396,13 +396,11 @@ static void csr_load_work_fn(struct work_struct *work) struct drm_i915_private *dev_priv; struct intel_csr *csr; const struct firmware *fw = NULL; - int ret; dev_priv = container_of(work, typeof(*dev_priv), csr.work); csr = &dev_priv->csr; - ret = request_firmware(&fw, dev_priv->csr.fw_path, - &dev_priv->drm.pdev->dev); + request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev); if (fw) dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw); From e27414a04f1a571eeb0b214ebfc77e6dc9bb8a08 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 20 Jan 2017 22:36:55 +0800 Subject: [PATCH 0011/1299] drm/i915/error: use rb_entry() To make the code clearer, use rb_entry() instead of container_of() to deal with rbtree. Signed-off-by: Geliang Tang Link: http://patchwork.freedesktop.org/patch/msgid/b08fd4be9d4c45d88c158a17b854c3fd628840ed.1484816339.git.geliangtang@gmail.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9cd22cda17af..e5375323eb06 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1080,7 +1080,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, ee->waiters = waiter; for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); + struct intel_wait *w = rb_entry(rb, typeof(*w), node); strcpy(waiter->comm, w->tsk->comm); waiter->pid = w->tsk->pid; From 2f1ac9cc68e0ca569405bd854979aa7eb787f774 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 09:37:24 +0000 Subject: [PATCH 0012/1299] drm/i915: Queue hangcheck when irqs are disabled Ensure that the hangcheck is queued even in the absence of interrupts. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170123093724.18592-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index fcfa423d08bd..c6fa77177615 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -127,6 +127,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) if (!b->irq_enabled || test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { mod_timer(&b->fake_irq, jiffies + 1); + i915_queue_hangcheck(i915); } else { /* Ensure we never sleep indefinitely */ GEM_BUG_ON(!time_after(b->timeout, jiffies)); From 5d12fcef4ef15d00ae3228cc1ed80d2d81bc2c06 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 11:31:31 +0000 Subject: [PATCH 0013/1299] drm/i915: Assert that the kernel_context is hw-id 0 For easy recognisability, we want the kernel context to have id 0 and all user contexts to have non-zero ids. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170123113132.18665-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 17f90c618208..77458da9627d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -451,6 +451,11 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) return PTR_ERR(ctx); } + /* For easy recognisablity, we want the kernel context to be 0 and then + * all user contexts will have non-zero hw_id. + */ + GEM_BUG_ON(ctx->hw_id); + i915_gem_context_clear_bannable(ctx); ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; From 86aa7e760a6709789a21bdf186bd0c3635da6b9a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 11:31:32 +0000 Subject: [PATCH 0014/1299] drm/i915: Assert that the context-switch completion matches our context When execlists signals the context completion, it also provides the context id for the status event. Assert that id matches the one we expect. v2: The upper dword of the context status is a duplicate of the upper dword from elsp submission (i.e. includes the group id as well as the context id). Include this check as well. v3: Only check against lrc_desc (as this contains the hw_id check) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123113132.18665-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 432ee495dec2..eceffe25c022 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -595,6 +595,11 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + /* Check the context/desc id for this event matches */ + GEM_BUG_ON(readl(buf + 2 * idx + 1) != + upper_32_bits(intel_lr_context_descriptor(port[0].request->ctx, + engine))); + GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); From 4c01ded5732d6533a2858fae30c197f734745062 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 22 Dec 2016 11:33:23 +0100 Subject: [PATCH 0015/1299] drm/i915: Use atomic page flip for intel again. This reverts commit 527b6abe5fd2d2 (Revert "drm/i915: Use atomic commits for legacy page_flips") and reapplies commit ee042aa40b66d1. ("drm/i915: Use atomic commits for legacy page_flips") The reason for the revert was because legacy cursor updates were forced to wait for pending page flips and rendering after they were converted to atomic. Commit f79f26921ee12c6f (drm/i915: Add a cursor hack to allow converting legacy page flip to atomic, v3) adds a fastpath to cursor updates, which fixes the stuttering issues. With these changes I feel confident enough to re-enable cursor updates. Legacy cursor update won't block in the following cases: - Moving cursor - Changing cursor fb The legacy cursor update will still block in the following cases: - Showing/hiding cursor. - Cursor size or scaling changes. - cursor update while cursor is invisible (could be fixed, if it turns out to be important). - Cursor tiling changes (Not sure we support tiled cursors.) - Last update was a modeset. Cc: Steven Newbury Cc: Rafael Ristovski Cc: Chris Wilson Cc: Daniel Vetter Tested-by: Rafael Ristovski Testcase: igt/kms_cursor_legacy Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 09edff4633f6..0bf8e1bfbe7e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12146,6 +12146,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) spin_unlock(&dev->event_lock); } +__maybe_unused static int intel_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, @@ -14732,7 +14733,7 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .set_property = drm_atomic_helper_crtc_set_property, .destroy = intel_crtc_destroy, - .page_flip = intel_crtc_page_flip, + .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = intel_crtc_duplicate_state, .atomic_destroy_state = intel_crtc_destroy_state, .set_crc_source = intel_crtc_set_crc_source, From 7c518460303353084ebcfca99bc4b67ce33745a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 14:52:45 +0000 Subject: [PATCH 0016/1299] drm/i915: Pevent copying uninitialised garbage into vma->ggtt_view Since tweaking i915_vma_compare() we allowed constructors to skip clearing the ggtt_view believing that we didn't access the unused members. That, as it turns out, was not entirely true. In particular, i915_gem_fault() uses ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); i.e. the ggtt_view.partial for both normal and partial views. If we allowed garbage into the normal vma->ggtt_view and then try userspace tried to mmap it, we could explode in an unobvious fashion. Fixes: 7b92c047bae2 ("drm/i915: Eliminate superfluous i915_ggtt_view_rotated") Fixes: 3bf4d5751943 ("drm/i915: Stop clearing i915_ggtt_view") Reported-by: Matthew Auld Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170123145245.3972-1-chris@chris-wilson.co.uk Tested-by: Matthew Auld Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 307b22ae7791..155906e84812 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,7 +91,7 @@ vma_create(struct drm_i915_gem_object *obj, vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - if (view) { + if (view && view->type != I915_GGTT_VIEW_NORMAL) { vma->ggtt_view = *view; if (view->type == I915_GGTT_VIEW_PARTIAL) { GEM_BUG_ON(range_overflows_t(u64, From 70962fbe5c75e785d250c04db4d01c18b7316c13 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:56 +0000 Subject: [PATCH 0017/1299] drm/i915: Remove disable_lite_restore_wa This w/a (WaEnableForceRestoreInCtxtDescForVCS) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 11 +---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index eceffe25c022..8ffa4961aa40 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -272,10 +272,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - engine->disable_lite_restore_wa = - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && - (engine->id == VCS || engine->id == VCS2); - engine->ctx_desc_template = GEN8_CTX_VALID; if (IS_GEN8(dev_priv)) engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; @@ -284,11 +280,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) /* TODO: WaDisableLiteRestore when we start using semaphore * signalling between Command Streamers */ /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ - - /* WaEnableForceRestoreInCtxtDescForVCS:skl */ - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ - if (engine->disable_lite_restore_wa) - engine->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; } /** @@ -558,7 +549,7 @@ static bool execlists_elsp_ready(struct intel_engine_cs *engine) int port; port = 1; /* wait for a free slot */ - if (engine->disable_lite_restore_wa || engine->preempt_wa) + if (engine->preempt_wa) port = 0; /* wait for GPU to be idle before continuing */ return !engine->execlist_port[port].request; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 79c2b8d72322..9183d148b36a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -380,7 +380,6 @@ struct intel_engine_cs { struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - bool disable_lite_restore_wa; bool preempt_wa; u32 ctx_desc_template; From f8dd2934c4ec03a56cfc1b7d23c0248aa6e4adf0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:57 +0000 Subject: [PATCH 0018/1299] drm/i915: Remove BXT incoherent seqno write workaround This w/a was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8ffa4961aa40..202ce1e6e499 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1638,21 +1638,6 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return 0; } -static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) -{ - /* - * On BXT A steppings there is a HW coherency issue whereby the - * MI_STORE_DATA_IMM storing the completed request's seqno - * occasionally doesn't invalidate the CPU cache. Work around this by - * clflushing the corresponding cacheline whenever the caller wants - * the coherency to be guaranteed. Note that this cacheline is known - * to be clean at this point, since we only write it in - * bxt_a_set_seqno(), where we also do a clflush after the write. So - * this clflush in practice becomes an invalidate operation. - */ - intel_flush_status_page(engine, I915_GEM_HWS_INDEX); -} - /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -1800,8 +1785,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - engine->irq_seqno_barrier = bxt_a_seqno_barrier; } static inline void From 32ebc292119a92e3f753a287923d2d31846247f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:58 +0000 Subject: [PATCH 0019/1299] drm/i915: Remove BXT restore arbitration around ctx switch This w/a (WaDisableCtxRestoreArbitration) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 202ce1e6e499..b3d1a9520616 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1102,10 +1102,6 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct drm_i915_private *dev_priv = engine->i915; uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */ ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); if (ret < 0) @@ -1202,10 +1198,6 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, MI_NOOP); } - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); return wa_ctx_end(wa_ctx, *offset = index, 1); From 68bee6157384017521e881c9436b3b6336cfac96 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:59 +0000 Subject: [PATCH 0020/1299] drm/i915: Remove BXT disable pixel mask clamping w/a This w/a (WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-4-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b3d1a9520616..029a60e8683d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1170,15 +1170,6 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0); - wa_ctx_emit(batch, index, - _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING)); - wa_ctx_emit(batch, index, MI_NOOP); - } - /* WaClearTdlStateAckDirtyBits:bxt */ if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) { wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4)); From b403c8feaf4838ad54cb9c15d0ad8453aa90af50 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:06:00 +0000 Subject: [PATCH 0021/1299] drm/i915: Remove BXT TDL state w/a This w/a (WaClearTdlStateAckDirtyBits) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-5-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 029a60e8683d..32096d141d57 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1170,25 +1170,6 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaClearTdlStateAckDirtyBits:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4)); - - wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2); - /* dummy write to CS, mask bits are 0 to ensure the register is not modified */ - wa_ctx_emit(batch, index, 0x0); - wa_ctx_emit(batch, index, MI_NOOP); - } - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); return wa_ctx_end(wa_ctx, *offset = index, 1); From bb8f0f5abdd7845175962a3fb99a5681290f9566 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:01:34 +0000 Subject: [PATCH 0022/1299] drm/i915: Split intel_engine allocation and initialisation In order to reset the GPU early on in the module load sequence, we need to allocate the basic engine structs (to populate the mmio offsets etc). Currently, the engine initialisation allocates both the base struct and also allocate auxiliary objects, which depend upon state setup quite late in the load sequence. We split off the allocation callback for later and allow ourselves to allocate the engine structs themselves early. v2: Different paint for the unwind following error. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170124110135.6418-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 19 +++++- drivers/gpu/drm/i915/i915_drv.h | 3 + drivers/gpu/drm/i915/intel_engine_cs.c | 82 +++++++++++++++++++------- drivers/gpu/drm/i915/intel_lrc.h | 2 - 4 files changed, 82 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ca168b22ee26..f8d1ffeccc69 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -755,6 +755,15 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv) return -ENOMEM; } +static void i915_engines_cleanup(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + kfree(engine); +} + static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { destroy_workqueue(dev_priv->hotplug.dp_wq); @@ -817,12 +826,15 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->pps_mutex); intel_uc_init_early(dev_priv); - i915_memcpy_init_early(dev_priv); + ret = intel_engines_init_early(dev_priv); + if (ret) + return ret; + ret = i915_workqueues_init(dev_priv); if (ret < 0) - return ret; + goto err_engines; ret = intel_gvt_init(dev_priv); if (ret < 0) @@ -857,6 +869,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_gvt_cleanup(dev_priv); err_workqueues: i915_workqueues_cleanup(dev_priv); +err_engines: + i915_engines_cleanup(dev_priv); return ret; } @@ -869,6 +883,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); + i915_engines_cleanup(dev_priv); } static int i915_mmio_setup(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ead3b6417c54..628f0fd53827 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2940,6 +2940,9 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); +int intel_engines_init_early(struct drm_i915_private *dev_priv); +int intel_engines_init(struct drm_i915_private *dev_priv); + /* intel_hotplug.c */ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 pin_mask, u32 long_mask); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 371acf109e34..69a6416d1223 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -110,21 +110,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init_early() - allocate the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *dev_priv) +int intel_engines_init_early(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; unsigned int mask = 0; - int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int i; - int ret; + int err; WARN_ON(ring_mask == 0); WARN_ON(ring_mask & @@ -134,20 +133,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv) if (!HAS_ENGINE(dev_priv, i)) continue; - if (i915.enable_execlists) - init = intel_engines[i].init_execlists; - else - init = intel_engines[i].init_legacy; - - if (!init) - continue; - - ret = intel_engine_setup(dev_priv, i); - if (ret) - goto cleanup; - - ret = init(dev_priv->engine[i]); - if (ret) + err = intel_engine_setup(dev_priv, i); + if (err) goto cleanup; mask |= ENGINE_MASK(i); @@ -166,14 +153,69 @@ int intel_engines_init(struct drm_i915_private *dev_priv) return 0; cleanup: + for_each_engine(engine, dev_priv, id) + kfree(engine); + return err; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev_priv: i915 device private + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); + struct intel_engine_cs *engine; + enum intel_engine_id id, err_id; + unsigned int mask = 0; + int err = 0; + for_each_engine(engine, dev_priv, id) { + int (*init)(struct intel_engine_cs *engine); + if (i915.enable_execlists) + init = intel_engines[id].init_execlists; + else + init = intel_engines[id].init_legacy; + if (!init) { + kfree(engine); + dev_priv->engine[id] = NULL; + continue; + } + + err = init(engine); + if (err) { + err_id = id; + goto cleanup; + } + + mask |= ENGINE_MASK(id); + } + + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); + + return 0; + +cleanup: + for_each_engine(engine, dev_priv, id) { + if (id >= err_id) + kfree(engine); + else if (i915.enable_execlists) intel_logical_ring_cleanup(engine); else intel_engine_cleanup(engine); } - - return ret; + return err; } void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 0c852c024227..c8009c7bfbdd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -68,8 +68,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); -int intel_engines_init(struct drm_i915_private *dev_priv); - /* Logical Ring Contexts */ /* One extra page is added before LRC for GuC as shared data */ From 241455172b8683f65a325490baf2d8dd683b3832 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:01:35 +0000 Subject: [PATCH 0023/1299] drm/i915: Reset the gpu on takeover The GPU may be in an unknown state following resume and module load. The previous occupant may have left contexts loaded, or other dangerous state, which can cause an immediate GPU hang for us. The only save course of action is to reset the GPU prior to using it - similarly to how we reset the GPU prior to unload (before a second user may be affected by our leftover state). We need to reset the GPU very early in our load/resume sequence so that any stale HW pointers are revoked prior to any resource allocations we make (that may conflict). A reset should only be a couple of milliseconds on a slow device, a cost we should easily be able to absorb into our initialisation times. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170124110135.6418-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 27 +++++++++++++++++++++++---- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f8d1ffeccc69..507a6f69426f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -950,6 +950,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) goto put_bridge; intel_uncore_init(dev_priv); + i915_gem_init_mmio(dev_priv); return 0; @@ -1731,6 +1732,8 @@ static int i915_drm_resume_early(struct drm_device *dev) !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) intel_power_domains_init_hw(dev_priv, true); + i915_gem_sanitize(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); out: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 628f0fd53827..a53f6b30b695 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3120,6 +3120,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_sanitize(struct drm_i915_private *i915); int i915_gem_load_init(struct drm_i915_private *dev_priv); void i915_gem_load_cleanup(struct drm_i915_private *dev_priv); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); @@ -3335,6 +3336,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); +void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a07b62732923..2522d4895ff7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4201,6 +4201,23 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) !i915_gem_context_is_kernel(engine->last_retired_context)); } +void i915_gem_sanitize(struct drm_i915_private *i915) +{ + /* + * If we inherit context state from the BIOS or earlier occupants + * of the GPU, the GPU may be in an inconsistent state when we + * try to take over. The only way to remove the earlier state + * is by resetting. However, resetting on earlier gen is tricky as + * it may impact the display and we are uncertain about the stability + * of the reset, so we only reset recent machines with logical + * context support (that must be reset to remove any stray contexts). + */ + if (HAS_HW_CONTEXTS(i915)) { + int reset = intel_gpu_reset(i915, ALL_ENGINES); + WARN_ON(reset && reset != -ENODEV); + } +} + int i915_gem_suspend(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; @@ -4271,10 +4288,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - if (HAS_HW_CONTEXTS(dev_priv)) { - int reset = intel_gpu_reset(dev_priv, ALL_ENGINES); - WARN_ON(reset && reset != -ENODEV); - } + i915_gem_sanitize(dev_priv); return 0; @@ -4492,6 +4506,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv) return ret; } +void i915_gem_init_mmio(struct drm_i915_private *i915) +{ + i915_gem_sanitize(i915); +} + void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) { From 5ec63bbdf527d26606c8ba9d3a841755d3887fbe Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Jan 2017 19:04:06 +0200 Subject: [PATCH 0024/1299] drm/i915/dp: do not proceed with autotests if we don't ACK them There is no point in setting intel_dp->compliance.test_type, and proceeding with the autotests, if we're about to NAK the request. Some drive-by cleanups while at it. v2: look at the ACK bit, as the result may also contain TEST_EDID_CHECKSUM_WRITE Cc: Manasi Navare Reviewed-by: Manasi Navare Tested-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484931846-25390-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e80d620846c8..247fbf352576 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3985,45 +3985,42 @@ static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) static void intel_dp_handle_test_request(struct intel_dp *intel_dp) { uint8_t response = DP_TEST_NAK; - uint8_t rxdata = 0; - int status = 0; + uint8_t request = 0; + int status; - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request); if (status <= 0) { DRM_DEBUG_KMS("Could not read test request from sink\n"); goto update_status; } - switch (rxdata) { + switch (request) { case DP_TEST_LINK_TRAINING: DRM_DEBUG_KMS("LINK_TRAINING test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_TRAINING; response = intel_dp_autotest_link_training(intel_dp); break; case DP_TEST_LINK_VIDEO_PATTERN: DRM_DEBUG_KMS("TEST_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_VIDEO_PATTERN; response = intel_dp_autotest_video_pattern(intel_dp); break; case DP_TEST_LINK_EDID_READ: DRM_DEBUG_KMS("EDID test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_EDID_READ; response = intel_dp_autotest_edid(intel_dp); break; case DP_TEST_LINK_PHY_TEST_PATTERN: DRM_DEBUG_KMS("PHY_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_PHY_TEST_PATTERN; response = intel_dp_autotest_phy_pattern(intel_dp); break; default: - DRM_DEBUG_KMS("Invalid test request '%02x'\n", rxdata); + DRM_DEBUG_KMS("Invalid test request '%02x'\n", request); break; } + if (response & DP_TEST_ACK) + intel_dp->compliance.test_type = request; + update_status: - status = drm_dp_dpcd_write(&intel_dp->aux, - DP_TEST_RESPONSE, - &response, 1); + status = drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, response); if (status <= 0) DRM_DEBUG_KMS("Could not write test response to sink\n"); } From c816e605ffb26ce1d3c06238c7de6662569ecb1e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:02 +0000 Subject: [PATCH 0025/1299] drm/i915: Assert that we don't submit to execlists whilst a preempt is pending To complement the check in execlists_elsp_ready(), also assert that we don't submit the same context while it has a lite restore still pending. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 32096d141d57..9dd612a2df16 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -375,6 +375,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); u64 desc[2]; + GEM_BUG_ON(port[0].count > 1); if (!port[0].count) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); From 816ee798ec2b46b1f92aaebb1c7b5d2e1abdc43e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:03 +0000 Subject: [PATCH 0026/1299] drm/i915: Only disable execlist preemption for the duration of the request We need to prevent resubmission of the context immediately following an initial resubmit (which does a lite-restore preemption). Currently we do this by disabling all submission whilst the context is still active, but we can improve this by limiting the restriction to only until we receive notification from the context-switch interrupt that the lite-restore preemption is complete. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 18 ++++++++++++------ drivers/gpu/drm/i915/intel_lrc.c | 14 ++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fa69d72fdcb9..9d7a77ecec3d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3320,15 +3320,21 @@ static int i915_engine_info(struct seq_file *m, void *unused) rcu_read_lock(); rq = READ_ONCE(engine->execlist_port[0].request); - if (rq) - print_request(m, rq, "\t\tELSP[0] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[0] count=%d, ", + engine->execlist_port[0].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[0] idle\n"); + } rq = READ_ONCE(engine->execlist_port[1].request); - if (rq) - print_request(m, rq, "\t\tELSP[1] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[1] count=%d, ", + engine->execlist_port[1].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[1] idle\n"); + } rcu_read_unlock(); spin_lock_irq(&engine->timeline->lock); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9dd612a2df16..9896027880ea 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -380,7 +380,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); - engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */ + port[0].count++; if (port[1].request) { GEM_BUG_ON(port[1].count); @@ -545,15 +545,11 @@ bool intel_execlists_idle(struct drm_i915_private *dev_priv) return true; } -static bool execlists_elsp_ready(struct intel_engine_cs *engine) +static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { - int port; + const struct execlist_port *port = engine->execlist_port; - port = 1; /* wait for a free slot */ - if (engine->preempt_wa) - port = 0; /* wait for GPU to be idle before continuing */ - - return !engine->execlist_port[port].request; + return port[0].count + port[1].count < 2; } /* @@ -601,8 +597,6 @@ static void intel_lrc_irq_handler(unsigned long data) i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); - - engine->preempt_wa = false; } GEM_BUG_ON(port[0].count == 0 && diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9183d148b36a..dbd32585f27a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -380,7 +380,6 @@ struct intel_engine_cs { struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - bool preempt_wa; u32 ctx_desc_template; /* Contexts are pinned whilst they are active on the GPU. The last From 538b257dae83268cc3536fb4c4ab4f57901d449d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 15:18:05 +0000 Subject: [PATCH 0027/1299] drm/i915: Move breadcrumbs irq_posted up a level to engine In the next patch, we will use the irq_posted technique for another engine interrupt, rather than use two members for the atomic updates, we can use two bits of one instead. First, we need to update the breadcrumbs to use the new common engine->irq_posted. v2: Use set_bit() rather than __set_bit() to ensure atomicity with respect to other bits in the mask Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124151805.26146-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 ++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a53f6b30b695..6eff81fb939c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3963,7 +3963,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) */ if (engine->irq_seqno_barrier && rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && - cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { struct task_struct *tsk; /* The ordering of irq_posted versus applying the barrier diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6fefc34ef602..7e087c344265 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,7 +1033,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - smp_store_mb(engine->breadcrumbs.irq_posted, true); + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); if (intel_engine_wakeup(engine)) trace_i915_gem_request_notify(engine); } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index c6fa77177615..6b24f2544b6b 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -81,7 +81,7 @@ static void irq_enable(struct intel_engine_cs *engine) * we still need to force the barrier before reading the seqno, * just in case. */ - engine->breadcrumbs.irq_posted = true; + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ spin_lock(&engine->i915->irq_lock); @@ -95,8 +95,6 @@ static void irq_disable(struct intel_engine_cs *engine) spin_lock(&engine->i915->irq_lock); engine->irq_disable(engine); spin_unlock(&engine->i915->irq_lock); - - engine->breadcrumbs.irq_posted = false; } static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) @@ -257,7 +255,8 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * in case the seqno passed. */ __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) + if (test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)) wake_up_process(to_wait(next)->tsk); } @@ -610,7 +609,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) if (intel_engine_has_waiter(engine)) { b->timeout = wait_timeout(); __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) + if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) wake_up_process(b->first_wait->tsk); } else { /* sanitize the IMR and unmask any auxiliary interrupts */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index dbd32585f27a..a9ea84ea3155 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -211,6 +211,9 @@ struct intel_engine_cs { struct intel_render_state *render_state; + unsigned long irq_posted; +#define ENGINE_IRQ_BREADCRUMB 0 + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -229,7 +232,6 @@ struct intel_engine_cs { */ struct intel_breadcrumbs { struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ - bool irq_posted; spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ From f747026c2b350fdb3c2d6fad51b7ebed4851183e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 15:20:21 +0000 Subject: [PATCH 0028/1299] drm/i915: Only run execlist context-switch handler after an interrupt Mark when we run the execlist tasklet following the interrupt, so we don't probe a potentially uninitialised register when submitting the contexts multiple times before the hardware responds. v2: Use a shared engine->irq_posted v3: Always use locked bitops to be sure of atomicity wrt to other bits in the mask. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124152021.26587-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_irq.c | 7 +++++-- drivers/gpu/drm/i915/intel_lrc.c | 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7e087c344265..3f3c9082b0f8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1349,8 +1349,11 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) notify_ring(engine); - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) - tasklet_schedule(&engine->irq_tasklet); + + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet_hi_schedule(&engine->irq_tasklet); + } } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9896027880ea..f729568e5e54 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -564,7 +564,7 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - if (!execlists_elsp_idle(engine)) { + while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { u32 __iomem *csb_mmio = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); u32 __iomem *buf = @@ -1297,6 +1297,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); /* After a GPU reset, we may have requests to replay */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); if (!execlists_elsp_idle(engine)) { engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a9ea84ea3155..8e872730f8eb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -213,6 +213,7 @@ struct intel_engine_cs { unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 +#define ENGINE_IRQ_EXECLIST 1 /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the From a37951ac9f9642819e400814f6f30689db233c24 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:06 +0000 Subject: [PATCH 0029/1299] drm/i915: Skip the execlists CSB scan and rewrite if the ring is untouched If the CSB head/tail pointers are unchanged, we can skip the update of the CSB register afterwards. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f729568e5e54..99ab2d70be37 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -574,9 +574,12 @@ static void intel_lrc_irq_handler(unsigned long data) csb = readl(csb_mmio); head = GEN8_CSB_READ_PTR(csb); tail = GEN8_CSB_WRITE_PTR(csb); + if (head == tail) + break; + if (tail < head) tail += GEN8_CSB_ENTRIES; - while (head < tail) { + do { unsigned int idx = ++head % GEN8_CSB_ENTRIES; unsigned int status = readl(buf + 2 * idx); @@ -601,7 +604,7 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_BUG_ON(port[0].count == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - } + } while (head < tail); writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, GEN8_CSB_WRITE_PTR(csb) << 8), From 3833281adb0f01ca5b279664d14611d8ed1deb98 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:07 +0000 Subject: [PATCH 0030/1299] drm/i915: Only attempt to pass the first request to execlists Only the first request added to the execlist queue can be submitted. If this request is not the first request on the queue, it means that there are already higher priority requests waiting upon the tasklet and kicking it will make no difference. This is more relevant for a later patch, where we more eagerly try and kick the tasklet to handle the submission of new requests. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-6-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 99ab2d70be37..11568ab9f248 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -651,10 +651,11 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - if (insert_request(&request->priotree, &engine->execlist_queue)) + if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; - if (execlists_elsp_idle(engine)) - tasklet_hi_schedule(&engine->irq_tasklet); + if (execlists_elsp_idle(engine)) + tasklet_hi_schedule(&engine->irq_tasklet); + } spin_unlock_irqrestore(&engine->timeline->lock, flags); } From 48ea2554f46e7c1771dba1529a17aa04792871b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:08 +0000 Subject: [PATCH 0031/1299] drm/i915: Dequeue execlists on a new request if any port is available If the second ELSP port is available, schedule the execlists tasklet to see if the new request can occupy it. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 11568ab9f248..bee9d565b8f3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -653,7 +653,7 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; - if (execlists_elsp_idle(engine)) + if (execlists_elsp_ready(engine)) tasklet_hi_schedule(&engine->irq_tasklet); } From 7c9e934ef8a09a1a42f15ce9f0f872fdfdb67b97 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:09 +0000 Subject: [PATCH 0032/1299] drm/i915: Emit dma-fence (and execlists submit) first from signaler When introduced, I thought that reducing client latency from the signaler was the priority. Since its inception the signaler has become responsible for keeping the execlists full, via the dma-fence. As this is very important to minimise overall execution time, signal the dma-fence first and then signal any waiting clients. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 6b24f2544b6b..9fd002bcebb6 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -459,16 +459,16 @@ static int intel_breadcrumbs_signaler(void *arg) */ request = READ_ONCE(b->first_signal); if (signal_complete(request)) { + local_bh_disable(); + dma_fence_signal(&request->fence); + local_bh_enable(); /* kick start the tasklets */ + /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. */ intel_engine_remove_wait(engine, &request->signaling.wait); - local_bh_disable(); - dma_fence_signal(&request->fence); - local_bh_enable(); /* kick start the tasklets */ - /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may * have installed an even older signal than the one From eb955eee27d9dc176871540c43c9070ee4701642 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 21:29:39 +0000 Subject: [PATCH 0033/1299] drm/i915: Move atomic state free from out of fence release Fences are required to support being released from under an atomic context. The drm_atomic_state struct may take a mutex when being released and so we cannot drop a reference to the drm_atomic_state from the fence release path directly, and so we need to defer that unreference to a worker. [ 326.576697] WARNING: CPU: 2 PID: 366 at kernel/sched/core.c:7737 __might_sleep+0x5d/0x80 [ 326.576816] do not call blocking ops when !TASK_RUNNING; state=1 set at [] intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.576818] Modules linked in: rfcomm fuse snd_hda_codec_hdmi bnep snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer input_leds led_class snd punit_atom_debug btusb btrtl btbcm btintel intel_rapl bluetooth i915 drm_kms_helper syscopyarea sysfillrect iwlwifi sysimgblt soundcore fb_sys_fops mei_txe cfg80211 drm pwm_lpss_platform pwm_lpss pinctrl_cherryview fjes acpi_pad parport_pc ppdev parport autofs4 [ 326.576899] CPU: 2 PID: 366 Comm: i915/signal:0 Tainted: G U 4.10.0-rc3-patser+ #5030 [ 326.576902] Hardware name: /NUC5PPYB, BIOS PYBSWCEL.86A.0031.2015.0601.1712 06/01/2015 [ 326.576905] Call Trace: [ 326.576920] dump_stack+0x4d/0x6d [ 326.576926] __warn+0xc0/0xe0 [ 326.576931] warn_slowpath_fmt+0x5a/0x80 [ 326.577004] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577075] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577079] __might_sleep+0x5d/0x80 [ 326.577087] mutex_lock+0x1b/0x40 [ 326.577133] drm_property_free_blob+0x1e/0x80 [drm] [ 326.577167] ? drm_property_destroy+0xe0/0xe0 [drm] [ 326.577200] drm_mode_object_unreference+0x5c/0x70 [drm] [ 326.577233] drm_property_unreference_blob+0xe/0x10 [drm] [ 326.577260] __drm_atomic_helper_crtc_destroy_state+0x14/0x40 [drm_kms_helper] [ 326.577278] drm_atomic_helper_crtc_destroy_state+0x10/0x20 [drm_kms_helper] [ 326.577352] intel_crtc_destroy_state+0x9/0x10 [i915] [ 326.577388] drm_atomic_state_default_clear+0xea/0x1d0 [drm] [ 326.577462] intel_atomic_state_clear+0xd/0x20 [i915] [ 326.577497] drm_atomic_state_clear+0x1a/0x30 [drm] [ 326.577532] __drm_atomic_state_free+0x13/0x60 [drm] [ 326.577607] intel_atomic_commit_ready+0x6f/0x78 [i915] [ 326.577670] i915_sw_fence_release+0x3a/0x50 [i915] [ 326.577733] dma_i915_sw_fence_wake+0x39/0x80 [i915] [ 326.577741] dma_fence_signal+0xda/0x120 [ 326.577812] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577884] intel_breadcrumbs_signaler+0xb1/0x270 [i915] [ 326.577889] kthread+0x127/0x130 [ 326.577961] ? intel_engine_remove_wait+0x1a0/0x1a0 [i915] [ 326.577964] ? kthread_stop+0x120/0x120 [ 326.577970] ret_from_fork+0x22/0x30 Fixes: c004a90b7263 ("drm/i915: Restore nonblocking awaits for modesetting") Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Maarten Lankhorst Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170123212939.30345-1-chris@chris-wilson.co.uk Cc: # v4.10-rc1+ Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 5 +++++ drivers/gpu/drm/i915/intel_display.c | 28 ++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6eff81fb939c..3e07b8b0ec89 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2241,6 +2241,11 @@ struct drm_i915_private { struct i915_frontbuffer_tracking fb_tracking; + struct intel_atomic_helper { + struct llist_head free_list; + struct work_struct free_work; + } atomic_helper; + u16 orig_clock; bool mchbar_need_disable; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0bf8e1bfbe7e..5f55bc37c538 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14570,8 +14570,14 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence, break; case FENCE_FREE: - drm_atomic_state_put(&state->base); - break; + { + struct intel_atomic_helper *helper = + &to_i915(state->base.dev)->atomic_helper; + + if (llist_add(&state->freed, &helper->free_list)) + schedule_work(&helper->free_work); + break; + } } return NOTIFY_DONE; @@ -16594,6 +16600,18 @@ static void sanitize_watermarks(struct drm_device *dev) drm_modeset_acquire_fini(&ctx); } +static void intel_atomic_helper_free_state(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16613,6 +16631,9 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; + INIT_WORK(&dev_priv->atomic_helper.free_work, + intel_atomic_helper_free_state); + intel_init_quirks(dev); intel_init_pm(dev_priv); @@ -17270,6 +17291,9 @@ void intel_modeset_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + flush_work(&dev_priv->atomic_helper.free_work); + WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list)); + intel_disable_gt_powersave(dev_priv); /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0cec0013ace0..396356ce4206 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -371,6 +371,8 @@ struct intel_atomic_state { struct skl_wm_values wm_results; struct i915_sw_fence commit_ready; + + struct llist_node freed; }; struct intel_plane_state { From 8da53efaa228f12f709b23d7cd08c5644af474c4 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 23 Jan 2017 10:32:36 -0800 Subject: [PATCH 0034/1299] drm/i915/kbl: Apply WaIncreaseDefaultTLBEntries on KBL. According to wa_database this Wa persist on KBL as it was on SKL. Cc: Tim Gore Cc: Mika Kuoppala Cc: Tvrtko Ursulin Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485196357-30599-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e808aad203d8..cc0bb7bb6775 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2184,12 +2184,12 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_SKYLAKE(dev_priv)) + else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_BROXTON(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); From b976dc53ec43da887fb5731f2f7e1f770cec6074 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 23 Jan 2017 10:32:37 -0800 Subject: [PATCH 0035/1299] drm/i915: Introduce IS_GEN9_BC for Skylake and Kabylake. Along with GLK it was introduced the .is_lp and IS_GEN9_LP. So, following the same simplification standard we can put Skylake and Kabylake under the same bucket for most of the things. So let's add the IS_GEN9_BC for "Big Core" (non Atom based platforms). The i915_drv.c was let out of this patch on purpose because that is really a decision per platform, just like other cases where IS_KABYLAKE is different from IS_SKYLAKE. v2: fix conflict with IS_LP and 3 new cases for this big core bucket: - intel_ddi.c: intel_ddi_get_link_dpll - intel_fbc.c: find_compression_threshold - i915_gem_gtt.c: gtt_write_workarounds Cc: Anusha Srivatsa Cc: Ander Conselvan de Oliveira Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485196357-30599-2-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++---------- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/intel_audio.c | 2 +- drivers/gpu/drm/i915/intel_color.c | 4 ++-- drivers/gpu/drm/i915/intel_ddi.c | 20 ++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 12 ++++++------ drivers/gpu/drm/i915/intel_dp.c | 5 ++--- drivers/gpu/drm/i915/intel_dpll_mgr.c | 2 +- drivers/gpu/drm/i915/intel_fbc.c | 3 +-- drivers/gpu/drm/i915/intel_i2c.c | 4 ++-- drivers/gpu/drm/i915/intel_mocs.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 13 ++++++------- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 +++++----- 14 files changed, 47 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9d7a77ecec3d..6d9914648970 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1224,21 +1224,18 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1814,7 +1811,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; @@ -1834,8 +1831,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) &ia_freq); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1))), + (IS_GEN9_BC(dev_priv) ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -4450,7 +4447,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3e07b8b0ec89..3623a6e46283 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2760,8 +2760,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7))) #define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8))) -#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && INTEL_INFO(dev_priv)->is_lp) #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) +#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) +#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cc0bb7bb6775..40685c68260b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2189,7 +2189,7 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_BROXTON(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 16c202781db0..cd9d20763140 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -702,7 +702,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) + if (!IS_GEN9_BC(dev_priv)) return; i915_audio_component_get_power(kdev); diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index d81232b79f00..34952d04485e 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -536,8 +536,8 @@ void intel_color_init(struct drm_crtc *crtc) } else if (IS_HASWELL(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; - } else if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || - IS_BROXTON(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || + IS_BROXTON(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; } else { diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 66b367d0771a..9a9a670f8549 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -445,7 +445,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por if (IS_GEN9_LP(dev_priv)) return hdmi_level; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); hdmi_default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { @@ -518,7 +518,7 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* If we're boosting the current, set bit 31 of trans1 */ if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; @@ -572,7 +572,7 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) hdmi_level = intel_ddi_hdmi_level(dev_priv, port); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); /* If we're boosting the current, set bit 31 of trans1 */ @@ -1089,7 +1089,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) <= 8) hsw_ddi_clock_get(encoder, pipe_config); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) skl_ddi_clock_get(encoder, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_clock_get(encoder, pipe_config); @@ -1150,7 +1150,7 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_encoder *intel_encoder = intel_ddi_get_crtc_new_encoder(crtc_state); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) return skl_ddi_pll_select(intel_crtc, crtc_state, intel_encoder); else if (IS_GEN9_LP(dev_priv)) @@ -1641,7 +1641,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); @@ -1658,7 +1658,7 @@ void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { uint32_t val; /* DDI -> PLL mapping */ @@ -1714,7 +1714,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, pll); intel_prepare_hdmi_ddi_buffers(encoder); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, @@ -1784,7 +1784,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, intel_edp_panel_off(intel_dp); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); else if (INTEL_GEN(dev_priv) < 9) @@ -2157,7 +2157,7 @@ intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) pll->state = tmp_pll_state; return NULL; } - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { pll = skl_find_link_pll(dev_priv, clock); } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { pll = hsw_ddi_dp_get_dpll(encoder, clock); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5f55bc37c538..5a9da484bb23 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5800,7 +5800,7 @@ static int skl_calc_cdclk(int max_pixclk, int vco); static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) { - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; int max_cdclk, vco; @@ -10673,7 +10673,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skylake_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); @@ -15681,7 +15681,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (found || IS_GEN9_BC(dev_priv)) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C and D detection is indicated by the SFUSE_STRAP @@ -15697,7 +15697,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + if (IS_GEN9_BC(dev_priv) && (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp || dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi || dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi)) @@ -16196,7 +16196,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } /* Returns the core display clock speed */ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) dev_priv->display.get_display_clock_speed = skylake_get_display_clock_speed; else if (IS_GEN9_LP(dev_priv)) @@ -16277,7 +16277,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) bxt_modeset_commit_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { dev_priv->display.modeset_commit_cdclk = skl_modeset_commit_cdclk; dev_priv->display.modeset_calc_cdclk = diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 247fbf352576..e0f9b9e49acc 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -226,7 +226,7 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) if (IS_GEN9_LP(dev_priv)) { *source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { *source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else { @@ -1751,8 +1751,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, * DPLL0 VCO may need to be adjusted to get the correct * clock for eDP. This will affect cdclk as well. */ - if (is_edp(intel_dp) && - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) { + if (is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { int vco; switch (pipe_config->port_clock / 2) { diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index c92a2558beb4..368eff93a0a6 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2015,7 +2015,7 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; else if (IS_GEN9_LP(dev_priv)) dpll_mgr = &bxt_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 89fe5c8464df..51585008fb9d 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -537,8 +537,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) end = ggtt->stolen_size - 8 * 1024 * 1024; else end = U64_MAX; diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index bce1ba80f277..b6401e8f1bd6 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -74,7 +74,7 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, { if (IS_GEN9_LP(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -89,7 +89,7 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, if (IS_GEN9_LP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c787fc4e6eb9..8f98fc714fe9 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..d4db14b53b61 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2895,8 +2895,7 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || - IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) return true; return false; @@ -5294,7 +5293,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + IS_GEN9_BC(dev_priv)) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -5307,7 +5306,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -5637,7 +5636,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -5655,7 +5654,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -6775,7 +6774,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index c0b7e95b5b8e..66aa1bbb42f0 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -732,7 +732,7 @@ gen9_sanitize_power_well_requests(struct drm_i915_private *dev_priv, * other request bits to be set, so WARN for those. */ if (power_well_id == SKL_DISP_PW_1 || - ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + (IS_GEN9_BC(dev_priv) && power_well_id == SKL_DISP_PW_MISC_IO)) DRM_DEBUG_DRIVER("Clearing auxiliary requests for %s forced on " "by DMC\n", power_well->name); @@ -2323,7 +2323,7 @@ static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; } else if (IS_GEN9_LP(dev_priv)) { @@ -2398,7 +2398,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, hsw_power_wells); } else if (IS_BROADWELL(dev_priv)) { set_power_wells(power_domains, bdw_power_wells); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { set_power_wells(power_domains, skl_power_wells); } else if (IS_BROXTON(dev_priv)) { set_power_wells(power_domains, bxt_power_wells); @@ -2730,7 +2730,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) power_domains->initializing = true; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_display_core_init(dev_priv, resume); } else if (IS_GEN9_LP(dev_priv)) { bxt_display_core_init(dev_priv, resume); @@ -2769,7 +2769,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) if (!i915.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_display_core_uninit(dev_priv); else if (IS_GEN9_LP(dev_priv)) bxt_display_core_uninit(dev_priv); From 1c044f9bbc107313af5f3c6bfa21300a6d0a08cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Jan 2017 17:26:01 +0000 Subject: [PATCH 0036/1299] drm/i915: Remove early pre-production RPS workarounds for BXT Remove WaGsvDisableTurbo and WaRsUseTimeoutMode as these were only for pre-production Broxton devices, and this code is now defunct. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_pm.c | 35 +++------------------------------ 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d4db14b53b61..a04d81aa1fc1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4935,10 +4935,6 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - return; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); WARN_ON(val > dev_priv->rps.max_freq); WARN_ON(val < dev_priv->rps.min_freq); @@ -5335,22 +5331,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - /* - * BIOS could leave the Hw Turbo enabled, so need to explicitly - * clear out the Control register just to avoid inconsitency - * with debugfs interface, which will show Turbo as enabled - * only and that is not expected by the User after adding the - * WaGsvDisableTurbo. Apart from this there is no problem even - * if the Turbo is left enabled in the Control register, as the - * Up/Down interrupts would remain masked. - */ - gen9_disable_rps(dev_priv); - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - return; - } - /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); @@ -5410,18 +5390,9 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); - /* WaRsUseTimeoutMode:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - } else { - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - } + I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + I915_WRITE(GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. From da15f7cb1914aa6ca3efb874e04ae3d96038641a Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 24 Jan 2017 08:16:34 -0800 Subject: [PATCH 0037/1299] drm/i915: Add support for DP link training compliance This patch adds support to handle automated DP compliance link training test requests. This patch has been tested with Unigraf DPR-120 DP Compliance device for testing Link Training Compliance. After we get a short pulse Compliance test request, test request values are read and hotplug uevent is sent in order to trigger another modeset during which the pipe is configured and link is retrained and enabled for link parameters requested by the test. v5: * Only modify the compliance structure after all validation is done (Jani Nikula) * Remove the variable test_result (Jani Nikula) v4: * Return TEST_NAK for read failures and invalid values (Jani Nikula) * Conver the test link BW to link rate before storing (Jani Nikula) v3: * Validate the test link rate and lane count as soon as the request comes (Jani Nikula) v2: * Validate the test lane count before using it in intel_dp_compute_config (Jani Nikula) Signed-off-by: Manasi Navare Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485274594-17361-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 65 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_drv.h | 2 + 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e0f9b9e49acc..26d7fc1bc541 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1613,6 +1613,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Conveniently, the link BW constants become indices with a shift...*/ int min_clock = 0; int max_clock; + int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; int common_rates[DP_MAX_SUPPORTED_RATES] = {}; @@ -1654,6 +1655,15 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + intel_dp->compliance.test_link_rate); + if (link_rate_index >= 0) + min_clock = max_clock = link_rate_index; + min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; + } DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %d pixel clock %iKHz\n", max_lane_count, common_rates[max_clock], @@ -3919,8 +3929,46 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_ACK; - return test_result; + int status = 0; + int min_lane_count = 1; + int common_rates[DP_MAX_SUPPORTED_RATES] = {}; + int link_rate_index, test_link_rate; + uint8_t test_lane_count, test_link_bw; + /* (DP CTS 1.2) + * 4.3.1.11 + */ + /* Read the TEST_LANE_COUNT and TEST_LINK_RTAE fields (DP CTS 3.1.4) */ + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LANE_COUNT, + &test_lane_count); + + if (status <= 0) { + DRM_DEBUG_KMS("Lane count read failed\n"); + return DP_TEST_NAK; + } + test_lane_count &= DP_MAX_LANE_COUNT_MASK; + /* Validate the requested lane count */ + if (test_lane_count < min_lane_count || + test_lane_count > intel_dp->max_sink_lane_count) + return DP_TEST_NAK; + + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, + &test_link_bw); + if (status <= 0) { + DRM_DEBUG_KMS("Link Rate read failed\n"); + return DP_TEST_NAK; + } + /* Validate the requested link rate */ + test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + test_link_rate); + if (link_rate_index < 0) + return DP_TEST_NAK; + + intel_dp->compliance.test_lane_count = test_lane_count; + intel_dp->compliance.test_link_rate = test_link_rate; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) @@ -4131,9 +4179,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!intel_dp->lane_count) return; - /* if link training is requested we should perform it always */ - if ((intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) || - (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) { + /* Retrain if Channel EQ or CR not ok */ + if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", intel_encoder->base.name); @@ -4158,6 +4205,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -4191,7 +4239,7 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) sink_irq_vector); if (sink_irq_vector & DP_AUTOMATED_TEST_REQUEST) - DRM_DEBUG_DRIVER("Test request in short pulse not handled\n"); + intel_dp_handle_test_request(intel_dp); if (sink_irq_vector & (DP_CP_IRQ | DP_SINK_SPECIFIC_IRQ)) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } @@ -4199,6 +4247,11 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); drm_modeset_unlock(&dev->mode_config.connection_mutex); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); + /* Send a Hotplug Uevent to userspace to start modeset */ + drm_kms_helper_hotplug_event(intel_encoder->base.dev); + } return true; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 396356ce4206..fbd20dab5b45 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -896,6 +896,8 @@ struct intel_dp_compliance { unsigned long test_type; struct intel_dp_compliance_data test_data; bool test_active; + int test_link_rate; + u8 test_lane_count; }; struct intel_dp { From b48a5ba9710f83d998f16917d57bfa622991e9d6 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 20 Jan 2017 19:09:28 -0800 Subject: [PATCH 0038/1299] drm/i915: Fixes to support DP Compliance EDID tests This patch addresses a few issues from the original patch for DP Compliance EDID test support submitted by Todd Previte Video Mode requested in the EDID test handler for the EDID Read test (CTS 4.2.2.3) should be set to PREFERRED as per the CTS spec. v2: * Added read debugfs data from test_data.edid if its EDID test (Jani NIkula) Signed-off-by: Manasi Navare Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484968170-12467-3-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6d9914648970..726256fa35ce 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3775,7 +3775,10 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) if (connector->status == connector_status_connected && connector->encoder != NULL) { intel_dp = enc_to_intel_dp(connector->encoder); - seq_printf(m, "%lx", intel_dp->compliance.test_data.edid); + if (intel_dp->compliance.test_type == + DP_TEST_LINK_EDID_READ) + seq_printf(m, "%lx", + intel_dp->compliance.test_data.edid); } else seq_puts(m, "0"); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 26d7fc1bc541..e245802c5727 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3979,7 +3979,7 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; + uint8_t test_result = DP_TEST_ACK; struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_connector *connector = &intel_connector->base; @@ -4014,7 +4014,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; - intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_STANDARD; + intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_PREFERRED; } /* Set test active flag here so userspace doesn't interrupt things */ From 08b79f62a1a3f6774cf27db4d12f3f938486c4f9 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 20 Jan 2017 19:09:29 -0800 Subject: [PATCH 0039/1299] drm: Add definitions for DP compliance Video pattern tests v4: * Remove redundant single bit defs (Jani Nikula) v3: * Fix the conventions in bit definitions (Jani Nikula) v2: * Add all the other DP Complianec TEST register defs (Jani Nikula) Cc: dri-devel@lists.freedesktop.org Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484968170-12467-4-git-send-email-manasi.d.navare@intel.com --- include/drm/drm_dp_helper.h | 57 +++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 04681359a6f5..ba89295c8651 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -417,6 +417,63 @@ #define DP_TEST_LANE_COUNT 0x220 #define DP_TEST_PATTERN 0x221 +# define DP_NO_TEST_PATTERN 0x0 +# define DP_COLOR_RAMP 0x1 +# define DP_BLACK_AND_WHITE_VERTICAL_LINES 0x2 +# define DP_COLOR_SQUARE 0x3 + +#define DP_TEST_H_TOTAL_HI 0x222 +#define DP_TEST_H_TOTAL_LO 0x223 + +#define DP_TEST_V_TOTAL_HI 0x224 +#define DP_TEST_V_TOTAL_LO 0x225 + +#define DP_TEST_H_START_HI 0x226 +#define DP_TEST_H_START_LO 0x227 + +#define DP_TEST_V_START_HI 0x228 +#define DP_TEST_V_START_LO 0x229 + +#define DP_TEST_HSYNC_HI 0x22A +# define DP_TEST_HSYNC_POLARITY (1 << 7) +# define DP_TEST_HSYNC_WIDTH_HI_MASK (127 << 0) +#define DP_TEST_HSYNC_WIDTH_LO 0x22B + +#define DP_TEST_VSYNC_HI 0x22C +# define DP_TEST_VSYNC_POLARITY (1 << 7) +# define DP_TEST_VSYNC_WIDTH_HI_MASK (127 << 0) +#define DP_TEST_VSYNC_WIDTH_LO 0x22D + +#define DP_TEST_H_WIDTH_HI 0x22E +#define DP_TEST_H_WIDTH_LO 0x22F + +#define DP_TEST_V_HEIGHT_HI 0x230 +#define DP_TEST_V_HEIGHT_LO 0x231 + +#define DP_TEST_MISC0 0x232 +# define DP_TEST_SYNC_CLOCK (1 << 0) +# define DP_TEST_COLOR_FORMAT_MASK (3 << 1) +# define DP_TEST_COLOR_FORMAT_SHIFT 1 +# define DP_COLOR_FORMAT_RGB (0 << 1) +# define DP_COLOR_FORMAT_YCbCr422 (1 << 1) +# define DP_COLOR_FORMAT_YCbCr444 (2 << 1) +# define DP_TEST_DYNAMIC_RANGE_CEA (1 << 3) +# define DP_TEST_YCBCR_COEFFICIENTS (1 << 4) +# define DP_YCBCR_COEFFICIENTS_ITU601 (0 << 4) +# define DP_YCBCR_COEFFICIENTS_ITU709 (1 << 4) +# define DP_TEST_BIT_DEPTH_MASK (7 << 5) +# define DP_TEST_BIT_DEPTH_SHIFT 5 +# define DP_TEST_BIT_DEPTH_6 (0 << 5) +# define DP_TEST_BIT_DEPTH_8 (1 << 5) +# define DP_TEST_BIT_DEPTH_10 (2 << 5) +# define DP_TEST_BIT_DEPTH_12 (3 << 5) +# define DP_TEST_BIT_DEPTH_16 (4 << 5) + +#define DP_TEST_MISC1 0x233 +# define DP_TEST_REFRESH_DENOMINATOR (1 << 0) +# define DP_TEST_INTERLACED (1 << 1) + +#define DP_TEST_REFRESH_RATE_NUMERATOR 0x234 #define DP_TEST_CRC_R_CR 0x240 #define DP_TEST_CRC_G_Y 0x242 From 611032bfa71a7634e801142016f2d41485f8638f Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 24 Jan 2017 08:21:49 -0800 Subject: [PATCH 0040/1299] drm/i915: Add support for DP Video pattern compliance tests The intel_dp_autotest_video_pattern() function gets invoked through the compliance test handler on a HPD short pulse if the test type is set to DP_TEST_VIDEO_PATTERN. This performs the DPCD registers reads to read the requested test pattern, video pattern resolution, frame rate and bits per color value. The results of this analysis are handed off to userspace so that the userspace app can set the video pattern mode appropriately for the test result/response. When the test is requested with specific BPC value, we read the BPC value from the DPCD register. If this BPC value in intel_dp structure has a non-zero value and we're on a display port connector, then we use the value to calculate the bpp for the pipe. Also in this case if its a 18bpp video pattern request, then we force the dithering on pipe to be disabled since it causes CRC mismatches. The compliance_test_active flag is set at the end of the individual test handling functions. This is so that the kernel-side operations can be completed without the risk of interruption from the userspace app that is polling on that flag. v5: * Remove test_result variable * Populate the compliance test data at the end of the function (Jani Nikula) v4: *Return TEST_NAK on read failures and invalid values (Jani Nikula) * Address CRC mismatch errors v3: * Use the updated properly shifted bit definitions (Jani Nikula) * Force dithering to be disabled on 18bpp compliance test request (Manasi Navare) v2: * Updated the DPCD Register reads based on proper defines in header (Jani Nikula) * Squahsed the patch that forced the pipe bpp to compliance test bpp (Jani Nikula) Signed-off-by: Manasi Navare Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485274909-17470-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 9 ++++ drivers/gpu/drm/i915/intel_display.c | 7 ++- drivers/gpu/drm/i915/intel_dp.c | 68 +++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_dp_mst.c | 7 ++- drivers/gpu/drm/i915/intel_drv.h | 11 +++++ 5 files changed, 97 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 726256fa35ce..0a39487386ad 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3779,6 +3779,15 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) DP_TEST_LINK_EDID_READ) seq_printf(m, "%lx", intel_dp->compliance.test_data.edid); + else if (intel_dp->compliance.test_type == + DP_TEST_LINK_VIDEO_PATTERN) { + seq_printf(m, "hdisplay: %d\n", + intel_dp->compliance.test_data.hdisplay); + seq_printf(m, "vdisplay: %d\n", + intel_dp->compliance.test_data.vdisplay); + seq_printf(m, "bpc: %u\n", + intel_dp->compliance.test_data.bpc); + } } else seq_puts(m, "0"); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5a9da484bb23..ef65b4be5e56 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13060,8 +13060,11 @@ intel_modeset_pipe_config(struct drm_crtc *crtc, } /* Dithering seems to not pass-through bits correctly when it should, so - * only enable it on 6bpc panels. */ - pipe_config->dither = pipe_config->pipe_bpp == 6*3; + * only enable it on 6bpc panels and when its not a compliance + * test requesting 6bpc video pattern. + */ + pipe_config->dither = (pipe_config->pipe_bpp == 6*3) && + !pipe_config->dither_force_disable; DRM_DEBUG_KMS("hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e245802c5727..0fafbec6dacf 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -28,8 +28,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -1593,6 +1595,13 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; + pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", + pipe_config->pipe_bpp); + } return bpp; } @@ -3973,8 +3982,63 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; - return test_result; + uint8_t test_pattern; + uint16_t test_misc; + __be16 h_width, v_height; + int status = 0; + + /* Read the TEST_PATTERN (DP CTS 3.1.5) */ + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, + &test_pattern, 1); + if (status <= 0) { + DRM_DEBUG_KMS("Test pattern read failed\n"); + return DP_TEST_NAK; + } + if (test_pattern != DP_COLOR_RAMP) + return DP_TEST_NAK; + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_H_WIDTH_HI, + &h_width, 2); + if (status <= 0) { + DRM_DEBUG_KMS("H Width read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_V_HEIGHT_HI, + &v_height, 2); + if (status <= 0) { + DRM_DEBUG_KMS("V Height read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, + &test_misc, 1); + if (status <= 0) { + DRM_DEBUG_KMS("TEST MISC read failed\n"); + return DP_TEST_NAK; + } + if ((test_misc & DP_TEST_COLOR_FORMAT_MASK) != DP_COLOR_FORMAT_RGB) + return DP_TEST_NAK; + if (test_misc & DP_TEST_DYNAMIC_RANGE_CEA) + return DP_TEST_NAK; + switch (test_misc & DP_TEST_BIT_DEPTH_MASK) { + case DP_TEST_BIT_DEPTH_6: + intel_dp->compliance.test_data.bpc = 6; + break; + case DP_TEST_BIT_DEPTH_8: + intel_dp->compliance.test_data.bpc = 8; + break; + default: + return DP_TEST_NAK; + } + + intel_dp->compliance.test_data.video_pattern = test_pattern; + intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width); + intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height); + /* Set test active flag here so userspace doesn't interrupt things */ + intel_dp->compliance.test_active = 1; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 205fe4748ec5..29a9af177734 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -47,6 +47,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = false; bpp = 24; + if (intel_dp->compliance.test_data.bpc) { + bpp = intel_dp->compliance.test_data.bpc * 3; + DRM_DEBUG_KMS("Setting pipe bpp to %d\n", + bpp); + } /* * for MST we always configure max link bw - the spec doesn't * seem to suggest we should do otherwise. @@ -55,7 +60,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = 24; + pipe_config->pipe_bpp = bpp; pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); state = pipe_config->base.state; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index fbd20dab5b45..1fb593ecc60c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -580,6 +580,14 @@ struct intel_crtc_state { */ bool dither; + /* + * Dither gets enabled for 18bpp which causes CRC mismatch errors for + * compliance video pattern tests. + * Disable dither only if it is a compliance test request for + * 18bpp. + */ + bool dither_force_disable; + /* Controls for the clock computation, to override various stages. */ bool clock_set; @@ -890,6 +898,9 @@ struct intel_dp_desc { struct intel_dp_compliance_data { unsigned long edid; + uint8_t video_pattern; + uint16_t hdisplay, vdisplay; + uint8_t bpc; }; struct intel_dp_compliance { From 9fcee2f77e88f1d7c3f72fca418cdae3e79a6e83 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Jan 2017 10:19:19 +0000 Subject: [PATCH 0041/1299] drm/i915: Report the failure to write to the punit The write to the punit may fail, so propagate the error code back to its callers. Of particular interest are the RPS writes, so add appropriate user error codes and logging. v2: Add DEBUG for failed frequency changes during RPS. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170126101919.13211-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++-- drivers/gpu/drm/i915/i915_drv.h | 4 +-- drivers/gpu/drm/i915/i915_irq.c | 5 ++- drivers/gpu/drm/i915/i915_sysfs.c | 9 +++--- drivers/gpu/drm/i915/intel_pm.c | 45 +++++++++++++++++++-------- drivers/gpu/drm/i915/intel_sideband.c | 10 ++++-- 6 files changed, 53 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0a39487386ad..3ae06568df7b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4278,7 +4278,8 @@ i915_max_freq_set(void *data, u64 val) dev_priv->rps.max_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4333,7 +4334,8 @@ i915_min_freq_set(void *data, u64 val) dev_priv->rps.min_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3623a6e46283..457129b64d79 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3701,7 +3701,7 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv); extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); -extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); +extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); @@ -3727,7 +3727,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, /* intel_sideband.c */ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3f3c9082b0f8..059d66b46e1a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1209,7 +1209,10 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay += adj; new_delay = clamp_t(int, new_delay, min, max); - intel_set_rps(dev_priv, new_delay); + if (intel_set_rps(dev_priv, new_delay)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + dev_priv->rps.last_adj = 0; + } mutex_unlock(&dev_priv->rps.hw_lock); } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 376ac957cd1c..a721ff116101 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -395,13 +395,13 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; + return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -448,14 +448,13 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; - + return ret ?: count; } static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a04d81aa1fc1..bec4283cc228 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4933,7 +4933,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) /* gen6_set_rps is called to update the frequency request, but should also be * called when the range (min_delay and max_delay) is modified so that we can * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); WARN_ON(val > dev_priv->rps.max_freq); @@ -4968,10 +4968,14 @@ static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } -static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { + int err; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); WARN_ON(val > dev_priv->rps.max_freq); WARN_ON(val < dev_priv->rps.min_freq); @@ -4983,13 +4987,18 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); if (val != dev_priv->rps.cur_freq) { - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + if (err) + return err; + if (!IS_CHERRYVIEW(dev_priv)) gen6_set_rps_thresholds(dev_priv, val); } dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down @@ -5002,6 +5011,7 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { u32 val = dev_priv->rps.idle_freq; + int err; if (dev_priv->rps.cur_freq <= val) return; @@ -5019,8 +5029,11 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) * power than the render powerwell. */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); + + if (err) + DRM_ERROR("Failed to set RPS for idle\n"); } void gen6_rps_busy(struct drm_i915_private *dev_priv) @@ -5035,10 +5048,11 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_enable_rps_interrupts(dev_priv); /* Ensure we start at the user's desired frequency */ - intel_set_rps(dev_priv, - clamp(dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit)); + if (intel_set_rps(dev_priv, + clamp(dev_priv->rps.cur_freq, + dev_priv->rps.min_freq_softlimit, + dev_priv->rps.max_freq_softlimit))) + DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -5106,12 +5120,16 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->rps.client_lock); } -void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + int err; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); else - gen6_set_rps(dev_priv, val); + err = gen6_set_rps(dev_priv, val); + + return err; } static void gen9_disable_rc6(struct drm_i915_private *dev_priv) @@ -5315,7 +5333,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) } static void reset_rps(struct drm_i915_private *dev_priv, - void (*set)(struct drm_i915_private *, u8)) + int (*set)(struct drm_i915_private *, u8)) { u8 freq = dev_priv->rps.cur_freq; @@ -5323,7 +5341,8 @@ static void reset_rps(struct drm_i915_private *dev_priv, dev_priv->rps.power = -1; dev_priv->rps.cur_freq = -1; - set(dev_priv, freq); + if (set(dev_priv, freq)) + DRM_ERROR("Failed to reset RPS to initial values\n"); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 1a840bf92eea..d3d49a09c919 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -93,14 +93,18 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) return val; } -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { + int err; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); + err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); mutex_unlock(&dev_priv->sb_lock); + + return err; } u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) From ed576a5ca47fbe374cd762cb1fde4bdc41e233b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Jan 2017 13:48:08 +0000 Subject: [PATCH 0042/1299] drm/i915: Also clear the punit's PDATA sideband register As the previous punit i/o may have failed, the contents of the PDATA are undefined. Always clear it to 0 prior to sending the command. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170125134808.6152-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_sideband.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index d3d49a09c919..9f782b5eb6e6 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -60,8 +60,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, } I915_WRITE(VLV_IOSF_ADDR, addr); - if (!is_read) - I915_WRITE(VLV_IOSF_DATA, *val); + I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val); I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); if (intel_wait_for_register(dev_priv, @@ -74,7 +73,6 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, if (is_read) *val = I915_READ(VLV_IOSF_DATA); - I915_WRITE(VLV_IOSF_DATA, 0); return 0; } From bc384c77e3bb3df2b396f4fb246b452571896c5e Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 24 Jan 2017 16:47:22 -0200 Subject: [PATCH 0043/1299] x86/gpu: GLK uses the same GMS values as SKL So don't forget to reserve its stolen memory bits. Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Ander Conselvan de Oliveira Cc: x86@kernel.org Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Paulo Zanoni Acked-by: Ingo Molnar Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485283642-14401-1-git-send-email-paulo.r.zanoni@intel.com --- arch/x86/kernel/early-quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6a08e25a48d8..23c4f1ce0718 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -526,6 +526,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_SKL_IDS(&gen9_early_ops), INTEL_BXT_IDS(&gen9_early_ops), INTEL_KBL_IDS(&gen9_early_ops), + INTEL_GLK_IDS(&gen9_early_ops), }; static void __init From 2eebe4f2d5f4c91edc37801d828ba29edfbc7722 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 23 Jan 2017 11:42:59 +0200 Subject: [PATCH 0044/1299] drm/color: un-inline drm_color_lut_extract() The function is not that big, but it's also not used for anything performance critical. Make it a normal function. As a side effect, this apparently makes sparse smarter about what it's doing, and gets rid of the warning: ./include/drm/drm_color_mgmt.h:53:28: warning: shift too big (4294967295) for type unsigned long ./include/drm/drm_color_mgmt.h:53:28: warning: cast truncates bits from constant value (8000000000000000 becomes 0) Cc: Lionel Landwerlin Reviewed-by: Daniel Vetter Reviewed-by: Lionel Landwerlin Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485164579-16250-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/drm_color_mgmt.c | 24 ++++++++++++++++++++++++ include/drm/drm_color_mgmt.h | 27 ++------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 789b4c65cd69..5618f60c7690 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -87,6 +87,30 @@ * "GAMMA_LUT" property above. */ +/** + * drm_color_lut_extract - clamp&round LUT entries + * @user_input: input value + * @bit_precision: number of bits the hw LUT supports + * + * Extract a degamma/gamma LUT value provided by user (in the form of + * &drm_color_lut entries) and round it to the precision supported by the + * hardware. + */ +uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) +{ + uint32_t val = user_input; + uint32_t max = 0xffff >> (16 - bit_precision); + + /* Round only if we're not using full precision. */ + if (bit_precision < 16) { + val += 1UL << (16 - bit_precision - 1); + val >>= 16 - bit_precision; + } + + return clamp_val(val, 0, max); +} +EXPORT_SYMBOL(drm_color_lut_extract); + /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index c767238ac9d5..bce4a532836d 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -25,6 +25,8 @@ #include +uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); + void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, bool has_ctm, @@ -33,29 +35,4 @@ void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, int gamma_size); -/** - * drm_color_lut_extract - clamp&round LUT entries - * @user_input: input value - * @bit_precision: number of bits the hw LUT supports - * - * Extract a degamma/gamma LUT value provided by user (in the form of - * &drm_color_lut entries) and round it to the precision supported by the - * hardware. - */ -static inline uint32_t drm_color_lut_extract(uint32_t user_input, - uint32_t bit_precision) -{ - uint32_t val = user_input; - uint32_t max = 0xffff >> (16 - bit_precision); - - /* Round only if we're not using full precision. */ - if (bit_precision < 16) { - val += 1UL << (16 - bit_precision - 1); - val >>= 16 - bit_precision; - } - - return clamp_val(val, 0, max); -} - - #endif From 920bcd1820a6966b6224f62eadcb4e931bb72e8e Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 26 Jan 2017 18:19:07 -0200 Subject: [PATCH 0045/1299] drm/i915: make i915_stolen_to_physical() return phys_addr_t The i915_stolen_to_physical() function has 'unsigned long' as its return type but it returns the 'base' variable, which is of type 'u32'. The only place where this function is called assigns the returned value to dev_priv->mm.stolen_base, which is of type 'phys_addr_t'. The return value is actually a physical address and everything else in the stolen memory code seems to be using phys_addr_t, so fix i915_stolen_to_physical() to use phys_addr_t. v2: Add missing blank lines after declarations (Chris, checkpatch.pl). Reviewed-by: Chris Wilson Signed-off-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1485461947-16030-1-git-send-email-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/i915_gem_stolen.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 127d698e7c84..729dba017b78 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) +static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - u32 base; + phys_addr_t base; /* Almost universally we can find the Graphics Base of Stolen Memory * at register BSM (0x5c) in the igfx configuration space. On a few @@ -196,7 +196,7 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct { - u32 start, end; + phys_addr_t start, end; } stolen[2] = { { .start = base, .end = base + ggtt->stolen_size, }, { .start = base, .end = base + ggtt->stolen_size, }, @@ -228,11 +228,13 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { + phys_addr_t end = base + ggtt->stolen_size - 1; + DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", (unsigned long long)ggtt_start, (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n", - base, base + (u32)ggtt->stolen_size - 1); + DRM_DEBUG_KMS("Stolen memory adjusted to %pa - %pa\n", + &base, &end); } } @@ -261,8 +263,10 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)ggtt->stolen_size); + phys_addr_t end = base + ggtt->stolen_size; + + DRM_ERROR("conflict detected with stolen region: [%pa - %pa]\n", + &base, &end); base = 0; } } From 05c41f926fcc7ef838c80a6a99d84f67b4e0b824 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 26 Jan 2017 17:32:11 +0300 Subject: [PATCH 0046/1299] drm/i915: fix use-after-free in page_flip_completed() page_flip_completed() dereferences 'work' variable after executing queue_work(). This is not safe as the 'work' item might be already freed by queued work: BUG: KASAN: use-after-free in page_flip_completed+0x3ff/0x490 at addr ffff8803dc010f90 Call Trace: __asan_report_load8_noabort+0x59/0x80 page_flip_completed+0x3ff/0x490 intel_finish_page_flip_mmio+0xe3/0x130 intel_pipe_handle_vblank+0x2d/0x40 gen8_irq_handler+0x4a7/0xed0 __handle_irq_event_percpu+0xf6/0x860 handle_irq_event_percpu+0x6b/0x160 handle_irq_event+0xc7/0x1b0 handle_edge_irq+0x1f4/0xa50 handle_irq+0x41/0x70 do_IRQ+0x9a/0x200 common_interrupt+0x89/0x89 Freed: kfree+0x113/0x4d0 intel_unpin_work_fn+0x29a/0x3b0 process_one_work+0x79e/0x1b70 worker_thread+0x611/0x1460 kthread+0x241/0x3a0 ret_from_fork+0x27/0x40 Move queue_work() after trace_i915_flip_complete() to fix this. Fixes: e5510fac98a7 ("drm/i915: add tracepoints for flip requests & completions") Signed-off-by: Andrey Ryabinin Cc: # v2.6.36+ Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170126143211.24013-1-aryabinin@virtuozzo.com --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ef65b4be5e56..37f71e992e63 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4253,10 +4253,10 @@ static void page_flip_completed(struct intel_crtc *intel_crtc) drm_crtc_vblank_put(&intel_crtc->base); wake_up_all(&dev_priv->pending_flip_queue); - queue_work(dev_priv->wq, &work->unpin_work); - trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj); + + queue_work(dev_priv->wq, &work->unpin_work); } static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc) From 40f62bb75e354dddfd8d7b6d7b96c4ab93cb00f1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 27 Jan 2017 16:11:29 +0200 Subject: [PATCH 0047/1299] Revert "drm/color: un-inline drm_color_lut_extract()" This reverts commit 2eebe4f2d5f4c91edc37801d828ba29edfbc7722, because I screwed up and applied it to the wrong branch. Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 27 +++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 5618f60c7690..789b4c65cd69 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -87,30 +87,6 @@ * "GAMMA_LUT" property above. */ -/** - * drm_color_lut_extract - clamp&round LUT entries - * @user_input: input value - * @bit_precision: number of bits the hw LUT supports - * - * Extract a degamma/gamma LUT value provided by user (in the form of - * &drm_color_lut entries) and round it to the precision supported by the - * hardware. - */ -uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{ - uint32_t val = user_input; - uint32_t max = 0xffff >> (16 - bit_precision); - - /* Round only if we're not using full precision. */ - if (bit_precision < 16) { - val += 1UL << (16 - bit_precision - 1); - val >>= 16 - bit_precision; - } - - return clamp_val(val, 0, max); -} -EXPORT_SYMBOL(drm_color_lut_extract); - /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index bce4a532836d..c767238ac9d5 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -25,8 +25,6 @@ #include -uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); - void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, bool has_ctm, @@ -35,4 +33,29 @@ void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, int gamma_size); +/** + * drm_color_lut_extract - clamp&round LUT entries + * @user_input: input value + * @bit_precision: number of bits the hw LUT supports + * + * Extract a degamma/gamma LUT value provided by user (in the form of + * &drm_color_lut entries) and round it to the precision supported by the + * hardware. + */ +static inline uint32_t drm_color_lut_extract(uint32_t user_input, + uint32_t bit_precision) +{ + uint32_t val = user_input; + uint32_t max = 0xffff >> (16 - bit_precision); + + /* Round only if we're not using full precision. */ + if (bit_precision < 16) { + val += 1UL << (16 - bit_precision - 1); + val >>= 16 - bit_precision; + } + + return clamp_val(val, 0, max); +} + + #endif From f5b98461cb8167ba362ad9f74c41d126b7becea7 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 Jan 2017 19:32:01 +0100 Subject: [PATCH 0048/1299] random: use chacha20 for get_random_int/long Now that our crng uses chacha20, we can rely on its speedy characteristics for replacing MD5, while simultaneously achieving a higher security guarantee. Before the idea was to use these functions if you wanted random integers that aren't stupidly insecure but aren't necessarily secure either, a vague gray zone, that hopefully was "good enough" for its users. With chacha20, we can strengthen this claim, since either we're using an rdrand-like instruction, or we're using the same crng as /dev/urandom. And it's faster than what was before. We could have chosen to replace this with a SipHash-derived function, which might be slightly faster, but at the cost of having yet another RNG construction in the kernel. By moving to chacha20, we have a single RNG to analyze and verify, and we also already get good performance improvements on all platforms. Implementation-wise, rather than use a generic buffer for both get_random_int/long and memcpy based on the size needs, we use a specific buffer for 32-bit reads and for 64-bit reads. This way, we're guaranteed to always have aligned accesses on all platforms. While slightly more verbose in C, the assembly this generates is a lot simpler than otherwise. Finally, on 32-bit platforms where longs and ints are the same size, we simply alias get_random_int to get_random_long. Signed-off-by: Jason A. Donenfeld Suggested-by: Theodore Ts'o Cc: Theodore Ts'o Cc: Hannes Frederic Sowa Cc: Andy Lutomirski Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 90 +++++++++++++++++++++--------------------- include/linux/random.h | 1 - init/main.c | 1 - 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 9d147d456598..b800e5479b7d 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2016,64 +2016,66 @@ struct ctl_table random_table[] = { }; #endif /* CONFIG_SYSCTL */ -static u32 random_int_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; - -int random_int_secret_init(void) -{ - get_random_bytes(random_int_secret, sizeof(random_int_secret)); - return 0; -} - -static DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash) - __aligned(sizeof(unsigned long)); +struct batched_entropy { + union { + unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)]; + unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)]; + }; + unsigned int position; +}; /* - * Get a random word for internal kernel use only. Similar to urandom but - * with the goal of minimal entropy pool depletion. As a result, the random - * value is not cryptographically secure but for several uses the cost of - * depleting entropy is too high - */ -unsigned int get_random_int(void) -{ - __u32 *hash; - unsigned int ret; - - if (arch_get_random_int(&ret)) - return ret; - - hash = get_cpu_var(get_random_int_hash); - - hash[0] += current->pid + jiffies + random_get_entropy(); - md5_transform(hash, random_int_secret); - ret = hash[0]; - put_cpu_var(get_random_int_hash); - - return ret; -} -EXPORT_SYMBOL(get_random_int); - -/* - * Same as get_random_int(), but returns unsigned long. + * Get a random word for internal kernel use only. The quality of the random + * number is either as good as RDRAND or as good as /dev/urandom, with the + * goal of being quite fast and not depleting entropy. */ +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); unsigned long get_random_long(void) { - __u32 *hash; unsigned long ret; + struct batched_entropy *batch; if (arch_get_random_long(&ret)) return ret; - hash = get_cpu_var(get_random_int_hash); - - hash[0] += current->pid + jiffies + random_get_entropy(); - md5_transform(hash, random_int_secret); - ret = *(unsigned long *)hash; - put_cpu_var(get_random_int_hash); - + batch = &get_cpu_var(batched_entropy_long); + if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { + extract_crng((u8 *)batch->entropy_long); + batch->position = 0; + } + ret = batch->entropy_long[batch->position++]; + put_cpu_var(batched_entropy_long); return ret; } EXPORT_SYMBOL(get_random_long); +#if BITS_PER_LONG == 32 +unsigned int get_random_int(void) +{ + return get_random_long(); +} +#else +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); +unsigned int get_random_int(void) +{ + unsigned int ret; + struct batched_entropy *batch; + + if (arch_get_random_int(&ret)) + return ret; + + batch = &get_cpu_var(batched_entropy_int); + if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { + extract_crng((u8 *)batch->entropy_int); + batch->position = 0; + } + ret = batch->entropy_int[batch->position++]; + put_cpu_var(batched_entropy_int); + return ret; +} +#endif +EXPORT_SYMBOL(get_random_int); + /** * randomize_page - Generate a random, page aligned address * @start: The smallest acceptable address the caller will take. diff --git a/include/linux/random.h b/include/linux/random.h index 7bd2403e4fef..16ab429735a7 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -37,7 +37,6 @@ extern void get_random_bytes(void *buf, int nbytes); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern void get_random_bytes_arch(void *buf, int nbytes); -extern int random_int_secret_init(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; diff --git a/init/main.c b/init/main.c index b0c9d6facef9..09beb7fc6e8c 100644 --- a/init/main.c +++ b/init/main.c @@ -879,7 +879,6 @@ static void __init do_basic_setup(void) do_ctors(); usermodehelper_enable(); do_initcalls(); - random_int_secret_init(); } static void __init do_pre_smp_initcalls(void) From c440408cf6901eeb2c09563397e24a9097907078 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 22 Jan 2017 16:34:08 +0100 Subject: [PATCH 0049/1299] random: convert get_random_int/long into get_random_u32/u64 Many times, when a user wants a random number, he wants a random number of a guaranteed size. So, thinking of get_random_int and get_random_long in terms of get_random_u32 and get_random_u64 makes it much easier to achieve this. It also makes the code simpler. On 32-bit platforms, get_random_int and get_random_long are both aliased to get_random_u32. On 64-bit platforms, int->u32 and long->u64. Signed-off-by: Jason A. Donenfeld Cc: Greg Kroah-Hartman Cc: Theodore Ts'o Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 55 +++++++++++++++++++++--------------------- include/linux/random.h | 17 +++++++++++-- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index b800e5479b7d..066ae125f2c8 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2018,8 +2018,8 @@ struct ctl_table random_table[] = { struct batched_entropy { union { - unsigned long entropy_long[CHACHA20_BLOCK_SIZE / sizeof(unsigned long)]; - unsigned int entropy_int[CHACHA20_BLOCK_SIZE / sizeof(unsigned int)]; + u64 entropy_u64[CHACHA20_BLOCK_SIZE / sizeof(u64)]; + u32 entropy_u32[CHACHA20_BLOCK_SIZE / sizeof(u32)]; }; unsigned int position; }; @@ -2029,52 +2029,51 @@ struct batched_entropy { * number is either as good as RDRAND or as good as /dev/urandom, with the * goal of being quite fast and not depleting entropy. */ -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_long); -unsigned long get_random_long(void) +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u64); +u64 get_random_u64(void) { - unsigned long ret; + u64 ret; struct batched_entropy *batch; - if (arch_get_random_long(&ret)) +#if BITS_PER_LONG == 64 + if (arch_get_random_long((unsigned long *)&ret)) return ret; +#else + if (arch_get_random_long((unsigned long *)&ret) && + arch_get_random_long((unsigned long *)&ret + 1)) + return ret; +#endif - batch = &get_cpu_var(batched_entropy_long); - if (batch->position % ARRAY_SIZE(batch->entropy_long) == 0) { - extract_crng((u8 *)batch->entropy_long); + batch = &get_cpu_var(batched_entropy_u64); + if (batch->position % ARRAY_SIZE(batch->entropy_u64) == 0) { + extract_crng((u8 *)batch->entropy_u64); batch->position = 0; } - ret = batch->entropy_long[batch->position++]; - put_cpu_var(batched_entropy_long); + ret = batch->entropy_u64[batch->position++]; + put_cpu_var(batched_entropy_u64); return ret; } -EXPORT_SYMBOL(get_random_long); +EXPORT_SYMBOL(get_random_u64); -#if BITS_PER_LONG == 32 -unsigned int get_random_int(void) +static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_u32); +u32 get_random_u32(void) { - return get_random_long(); -} -#else -static DEFINE_PER_CPU(struct batched_entropy, batched_entropy_int); -unsigned int get_random_int(void) -{ - unsigned int ret; + u32 ret; struct batched_entropy *batch; if (arch_get_random_int(&ret)) return ret; - batch = &get_cpu_var(batched_entropy_int); - if (batch->position % ARRAY_SIZE(batch->entropy_int) == 0) { - extract_crng((u8 *)batch->entropy_int); + batch = &get_cpu_var(batched_entropy_u32); + if (batch->position % ARRAY_SIZE(batch->entropy_u32) == 0) { + extract_crng((u8 *)batch->entropy_u32); batch->position = 0; } - ret = batch->entropy_int[batch->position++]; - put_cpu_var(batched_entropy_int); + ret = batch->entropy_u32[batch->position++]; + put_cpu_var(batched_entropy_u32); return ret; } -#endif -EXPORT_SYMBOL(get_random_int); +EXPORT_SYMBOL(get_random_u32); /** * randomize_page - Generate a random, page aligned address diff --git a/include/linux/random.h b/include/linux/random.h index 16ab429735a7..ed5c3838780d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -42,8 +42,21 @@ extern void get_random_bytes_arch(void *buf, int nbytes); extern const struct file_operations random_fops, urandom_fops; #endif -unsigned int get_random_int(void); -unsigned long get_random_long(void); +u32 get_random_u32(void); +u64 get_random_u64(void); +static inline unsigned int get_random_int(void) +{ + return get_random_u32(); +} +static inline unsigned long get_random_long(void) +{ +#if BITS_PER_LONG == 64 + return get_random_u64(); +#else + return get_random_u32(); +#endif +} + unsigned long randomize_page(unsigned long start, unsigned long range); u32 prandom_u32(void); From 77ae9957897df86e627089688265e0db029dd0df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 09:40:07 +0000 Subject: [PATCH 0050/1299] drm/i915: Enable userspace to opt-out of implicit fencing Userspace is faced with a dilemma. The kernel requires implicit fencing to manage resource usage (we always must wait for the GPU to finish before releasing its PTE) and for third parties. However, userspace may wish to avoid this serialisation if it is either using explicit fencing between parties and wants more fine-grained access to buffers (e.g. it may partition the buffer between uses and track fences on ranges rather than the implicit fences tracking the whole object). It follows that userspace needs a mechanism to avoid the kernel's serialisation on its implicit fences before execbuf execution. The next question is whether this is an object, execbuf or context flag. Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on shared winsys buffers, but implicit fencing on internal surfaces) require a per-object level flag. Given that this flag need to be only set once for the lifetime of the object, this reduces the convenience of having an execbuf or context level flag (and avoids having multiple pieces of uABI controlling the same feature). Incorrect use of this flag will result in rendering corruption and GPU hangs - but will not result in use-after-free or similar resource tracking issues. Serious caveat: write ordering is not strictly correct after setting this flag on a render target on multiple engines. This affects all subsequent GEM operations (execbuf, set-domain, pread) and shared dma-buf operations. A fix is possible - but costly (both in terms of further ABI changes and runtime overhead). Testcase: igt/gem_exec_async Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Chad Versace Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++ include/uapi/drm/i915_drm.h | 29 +++++++++++++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 507a6f69426f..6c9c2037fadf 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -349,6 +349,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: + case I915_PARAM_HAS_EXEC_ASYNC: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c66e90571031..6fd60682bf93 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1111,6 +1111,9 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) + continue; + ret = i915_gem_request_await_object (req, obj, obj->base.pending_write_domain); if (ret) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 57093b455db6..6620b6ad76ed 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -397,6 +397,12 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_SCHEDULER 41 #define I915_PARAM_HUC_STATUS 42 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + typedef struct drm_i915_getparam { __s32 param; /* @@ -737,8 +743,29 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) #define EXEC_OBJECT_PINNED (1<<4) #define EXEC_OBJECT_PAD_TO_SIZE (1<<5) +/* The kernel implicitly tracks GPU activity on all GEM objects, and + * synchronises operations with outstanding rendering. This includes + * rendering on other devices if exported via dma-buf. However, sometimes + * this tracking is too coarse and the user knows better. For example, + * if the object is split into non-overlapping ranges shared between different + * clients or engines (i.e. suballocating objects), the implicit tracking + * by kernel assumes that each operation affects the whole object rather + * than an individual range, causing needless synchronisation between clients. + * The kernel will also forgo any CPU cache flushes prior to rendering from + * the object as the client is expected to be also handling such domain + * tracking. + * + * The kernel maintains the implicit tracking in order to manage resources + * used by the GPU - this flag only disables the synchronisation prior to + * rendering with this object in this execbuf. + * + * Opting out of implicit synhronisation requires the user to do its own + * explicit tracking to avoid rendering corruption. See, for example, + * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. + */ +#define EXEC_OBJECT_ASYNC (1<<6) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1) __u64 flags; union { From fec0445caa273209d2809760ac7c63e743d6f512 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 09:40:08 +0000 Subject: [PATCH 0051/1299] drm/i915: Support explicit fencing for execbuf Now that the user can opt-out of implicit fencing, we need to give them back control over the fencing. We employ sync_file to wrap our drm_i915_gem_request and provide an fd that userspace can merge with other sync_file fds and pass back to the kernel to wait upon before future execution. Testcase: igt/gem_exec_fence Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Chad Versace Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 54 ++++++++++++++++++++-- include/uapi/drm/i915_drm.h | 36 ++++++++++++++- 4 files changed, 87 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 183f5dc1c3f2..1ae0bb91ee60 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,7 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select SYNC_FILE help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6c9c2037fadf..3aa5bf58cf32 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -2550,7 +2551,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6fd60682bf93..91c2393199a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -1595,6 +1596,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; int ret; bool need_relocs; @@ -1638,6 +1642,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) { + ret = -EINVAL; + goto pre_mutex_err; + } + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + out_fence_fd = -1; + goto pre_mutex_err; + } + } + /* Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged @@ -1782,6 +1803,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + if (in_fence) { + ret = i915_gem_request_await_dma_fence(params->request, + in_fence); + if (ret < 0) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(¶ms->request->fence); + if (!out_fence) { + ret = -ENOMEM; + goto err_request; + } + } + /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the @@ -1809,6 +1845,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + if (out_fence) { + if (ret == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } err_batch_unpin: /* @@ -1830,6 +1876,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); + dma_fence_put(in_fence); return ret; } @@ -1937,11 +1986,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = drm_malloc_gfp(args->buffer_count, sizeof(*exec2_list), GFP_TEMPORARY); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6620b6ad76ed..3554495bef13 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 #define DRM_I915_GET_SPRITE_COLORKEY 0x2a #define DRM_I915_SET_SPRITE_COLORKEY 0x2b #define DRM_I915_GEM_WAIT 0x2c @@ -280,6 +281,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) @@ -403,6 +405,13 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_ASYNC 43 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + typedef struct drm_i915_getparam { __s32 param; /* @@ -855,7 +864,32 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_RESOURCE_STREAMER (1<<15) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) +/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_IN (1<<16) + +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given + * to the caller, and it should be close() after use. (The fd is a regular + * file descriptor and will be cleaned up on process termination. It holds + * a reference to the request, but nothing else.) + * + * The sync_file fd can be combined with other sync_file and passed either + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip + * will only occur after this request completes), or to other devices. + * + * Using I915_EXEC_FENCE_OUT requires use of + * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written + * back to userspace. Failure to do so will cause the out-fence to always + * be reported as zero, and the real fence fd to be leaked. + */ +#define I915_EXEC_FENCE_OUT (1<<17) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ From 9fb5026f857dc3145cf13eedabadb28ed028d093 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 11:16:58 +0200 Subject: [PATCH 0052/1299] drm/i915/glk: Turn on workarounds that apply to Geminilake too Apply workarounds to Geminilake, and annotate those that are applied unconditionally when they apply to GLK based on the workaround database. v2: Fix commit message typos. (David) v3: Rebase. Cc: David Weinehall Cc: Rodrigo Vivi Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/1485422218-9102-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +-- drivers/gpu/drm/i915/intel_lrc.c | 6 ++-- drivers/gpu/drm/i915/intel_mocs.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 23 ++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 37 +++++++++++++++++++------ 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 40685c68260b..048040efc3f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2184,14 +2184,14 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); else if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); - else if (IS_BROXTON(dev_priv)) + else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index bee9d565b8f3..0e7b950bceac 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1101,13 +1101,13 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct drm_i915_private *dev_priv = engine->i915; uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */ + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); if (ret < 0) return ret; index = ret; - /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( @@ -1131,7 +1131,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, 0); } - /* WaMediaPoolStateCmdInWABB:bxt */ + /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { /* * EU pool configuration is setup along with golden context diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 8f98fc714fe9..773e36253e7c 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -191,7 +191,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, "Platform that should have a MOCS table does not.\n"); } - /* WaDisableSkipCaching:skl,bxt,kbl */ + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN9(dev_priv)) { int i; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bec4283cc228..ec16f3d6dd2e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -65,12 +65,12 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl */ + /* WaEnableChickenDCPR:skl,bxt,kbl,glk */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ - /* WaFbcWakeMemOn:skl,bxt,kbl */ + /* WaFbcWakeMemOn:skl,bxt,kbl,glk */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS | DISP_FBC_MEMORY_WAKE); @@ -104,6 +104,19 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) PWM1_GATING_DIS | PWM2_GATING_DIS); } +static void glk_init_clock_gating(struct drm_i915_private *dev_priv) +{ + gen9_init_clock_gating(dev_priv); + + /* + * WaDisablePWMClockGating:glk + * Backlight PWM may stop in the asserted state, causing backlight + * to stay fully on. + */ + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); +} + static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) { u32 tmp; @@ -2173,7 +2186,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } /* - * WaWmMemoryReadLatency:skl + * WaWmMemoryReadLatency:skl,glk * * punit doesn't take into account the read latency so we need * to add 2us to the various latency levels we retrieve from the @@ -7645,8 +7658,10 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = skylake_init_clock_gating; else if (IS_KABYLAKE(dev_priv)) dev_priv->display.init_clock_gating = kabylake_init_clock_gating; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; + else if (IS_GEMINILAKE(dev_priv)) + dev_priv->display.init_clock_gating = glk_init_clock_gating; else if (IS_BROADWELL(dev_priv)) dev_priv->display.init_clock_gating = broadwell_init_clock_gating; else if (IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 69035e4f9b3b..d32cbba25d98 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -812,10 +812,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; int ret; - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */ + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */ + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); @@ -823,8 +823,8 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB); - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl */ + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); @@ -853,12 +853,12 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_ENABLE_GPGPU_PREEMPTION); - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */ + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ /* WaDisablePartialResolveInVc:skl,bxt,kbl */ WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */ + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); @@ -900,14 +900,14 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */ + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); /* WaOCLCoherentLineFlush:skl,bxt,kbl */ I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) return ret; @@ -917,7 +917,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */ + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); if (ret) return ret; @@ -1120,6 +1120,22 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) return 0; } +static int glk_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + int init_workarounds_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1144,6 +1160,9 @@ int init_workarounds_ring(struct intel_engine_cs *engine) if (IS_KABYLAKE(dev_priv)) return kbl_init_workarounds(engine); + if (IS_GEMINILAKE(dev_priv)) + return glk_init_workarounds(engine); + return 0; } From bafbcc2fd148610e897ba49a4681f088a0eb58f9 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 13:24:21 +0200 Subject: [PATCH 0053/1299] drm/i915: Disable plane gamma in SKL+ sprite planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plane gamma tables are never programmed, so just disable it, like it is done for the primary plane. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1485429865-10687-2-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_sprite.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9ef54688872a..c05545f64f81 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -226,6 +226,7 @@ skl_update_plane(struct drm_plane *drm_plane, plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); + plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); if (key->flags) { From 47f9ea8b9143890c9b98ad9e16e8ad793be2d9e3 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 13:24:22 +0200 Subject: [PATCH 0054/1299] drm/i915/glk: Plane color correction register changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Geminilake, the bits for enabling pipe csc, pipe gamma and plane gamma moved to a new register. So update the plane update functions to set the right bits. Pipe CSC is kept disabled though, since enabling that also enables the dedicated degamma table, and that is not properly programmed yet, leading to a black screen. v2: Use plane_id. (Ville) Remove unnecessary variable. (Ville) Keep registers in offset order. (Ville) Don't set plane gamma disable twice. (Ander) Cc: Ville Syrjälä Signed-off-by: Ander Conselvan De Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1485429865-10687-3-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 19 ++++++++++++++++++- drivers/gpu/drm/i915/intel_display.c | 16 ++++++++++++---- drivers/gpu/drm/i915/intel_sprite.c | 17 ++++++++++++----- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 72f9f36ae5ce..c220736363fe 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5869,11 +5869,18 @@ enum { #define _PLANE_KEYMSK_2_A 0x70298 #define _PLANE_KEYMAX_1_A 0x701a0 #define _PLANE_KEYMAX_2_A 0x702a0 +#define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ +#define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ +#define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c #define _PLANE_NV12_BUF_CFG_1_A 0x70278 #define _PLANE_NV12_BUF_CFG_2_A 0x70378 + #define _PLANE_CTL_1_B 0x71180 #define _PLANE_CTL_2_B 0x71280 #define _PLANE_CTL_3_B 0x71380 @@ -5968,7 +5975,17 @@ enum { #define PLANE_NV12_BUF_CFG(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe)) -/* SKL new cursor registers */ +#define _PLANE_COLOR_CTL_1_B 0x711CC +#define _PLANE_COLOR_CTL_2_B 0x712CC +#define _PLANE_COLOR_CTL_3_B 0x713CC +#define _PLANE_COLOR_CTL_1(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_1_A, _PLANE_COLOR_CTL_1_B) +#define _PLANE_COLOR_CTL_2(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_2_A, _PLANE_COLOR_CTL_2_B) +#define PLANE_COLOR_CTL(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_COLOR_CTL_1(pipe), _PLANE_COLOR_CTL_2(pipe)) + +#/* SKL new cursor registers */ #define _CUR_BUF_CFG_A 0x7017c #define _CUR_BUF_CFG_B 0x7117c #define CUR_BUF_CFG(pipe) _MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 37f71e992e63..ac25706b7d4d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3386,13 +3386,21 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int dst_w = drm_rect_width(&plane_state->base.dst); int dst_h = drm_rect_height(&plane_state->base.dst); - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); /* Sizes are 0 based */ diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index c05545f64f81..b16a29591803 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -219,14 +219,21 @@ skl_update_plane(struct drm_plane *drm_plane, uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - - plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); if (key->flags) { From 2fcb206654e8b3826424ffa7ef65bd74cc65ff16 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 13:24:23 +0200 Subject: [PATCH 0055/1299] drm/i915: Split broadwell_load_luts() into smaller functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the logic for progamming each LUT out of broadwell_load_luts(), so we can reuse part of it for geminilake. Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1485429865-10687-4-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_color.c | 43 ++++++++++++++++++++++-------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c220736363fe..d16627a72ccb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8166,6 +8166,7 @@ enum { #define PAL_PREC_10_12_BIT (0 << 31) #define PAL_PREC_SPLIT_MODE (1 << 31) #define PAL_PREC_AUTO_INCREMENT (1 << 15) +#define PAL_PREC_INDEX_VALUE_MASK (0x3ff << 0) #define _PAL_PREC_DATA_A 0x4A404 #define _PAL_PREC_DATA_B 0x4AC04 #define _PAL_PREC_DATA_C 0x4B404 diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 34952d04485e..82a3bc9befa8 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -340,20 +340,12 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) hsw_enable_ips(intel_crtc); } -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ -static void broadwell_load_luts(struct drm_crtc_state *state) +static void bdw_load_degamma_lut(struct drm_crtc_state *state) { - struct drm_crtc *crtc = state->crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc_state *intel_state = to_intel_crtc_state(state); - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; - if (crtc_state_is_legacy(state)) { - haswell_load_luts(state); - return; - } - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); @@ -377,6 +369,18 @@ static void broadwell_load_luts(struct drm_crtc_state *state) (v << 20) | (v << 10) | v); } } +} + +static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + + WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); + + I915_WRITE(PREC_PAL_INDEX(pipe), + PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT | offset); if (state->gamma_lut) { struct drm_color_lut *lut = @@ -410,6 +414,23 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1); I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1); } +} + +/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ +static void broadwell_load_luts(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_degamma_lut(state); + bdw_load_gamma_lut(state, + INTEL_INFO(dev_priv)->color.degamma_lut_size); intel_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT); From 9751bafc43d599262d9de7e6d988804dc5f5d4aa Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 27 Jan 2017 11:02:30 +0200 Subject: [PATCH 0056/1299] drm/i915/glk: Program pipe gamma and degamma tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gamma tables in Geminilake were changed. There is no split-gamma mode. Instead, there is a dedicated degamma table that is enabled whenever pipe CSC is enabled. The dedicated gamma table has 16 bit precision but doesn't support separate channels. Since that doesn't match the per-channel format of the degamma LUT property, for now only a linear table is loaded and the property ignored. v2: Remove empty line. (Ville) Reuse broadwell code. (Ville) v3: Don't write PIPE_CSC_MODE. (Ville) Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170127090230.20302-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_reg.h | 14 ++++++++ drivers/gpu/drm/i915/intel_color.c | 58 +++++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ecb487b5356f..df2051b41fa1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -403,6 +403,7 @@ static const struct intel_device_info intel_geminilake_info = { .platform = INTEL_GEMINILAKE, .is_alpha_support = 1, .ddb_size = 1024, + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; static const struct intel_device_info intel_kabylake_info = { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d16627a72ccb..4322c67388f6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8176,12 +8176,26 @@ enum { #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 +#define _PAL_PREC_EXT2_GC_MAX_A 0x4A430 +#define _PAL_PREC_EXT2_GC_MAX_B 0x4AC30 +#define _PAL_PREC_EXT2_GC_MAX_C 0x4B430 #define PREC_PAL_INDEX(pipe) _MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B) #define PREC_PAL_DATA(pipe) _MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B) #define PREC_PAL_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4) #define PREC_PAL_EXT_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4) +#define _PRE_CSC_GAMC_INDEX_A 0x4A484 +#define _PRE_CSC_GAMC_INDEX_B 0x4AC84 +#define _PRE_CSC_GAMC_INDEX_C 0x4B484 +#define PRE_CSC_GAMC_AUTO_INCREMENT (1 << 10) +#define _PRE_CSC_GAMC_DATA_A 0x4A488 +#define _PRE_CSC_GAMC_DATA_B 0x4AC88 +#define _PRE_CSC_GAMC_DATA_C 0x4B488 + +#define PRE_CSC_GAMC_INDEX(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_INDEX_A, _PRE_CSC_GAMC_INDEX_B) +#define PRE_CSC_GAMC_DATA(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_DATA_A, _PRE_CSC_GAMC_DATA_B) + /* pipe CSC & degamma/gamma LUTs on CHV */ #define _CGM_PIPE_A_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x67900) #define _CGM_PIPE_A_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x67904) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 82a3bc9befa8..0627eee1cb64 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -380,7 +380,9 @@ static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); I915_WRITE(PREC_PAL_INDEX(pipe), - PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT | offset); + (offset ? PAL_PREC_SPLIT_MODE : 0) | + PAL_PREC_AUTO_INCREMENT | + offset); if (state->gamma_lut) { struct drm_color_lut *lut = @@ -443,6 +445,57 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_INDEX(pipe), 0); } +static void glk_load_degamma_lut(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + const uint32_t lut_size = 33; + uint32_t i; + + /* + * When setting the auto-increment bit, the hardware seems to + * ignore the index bits, so we need to reset it to index 0 + * separately. + */ + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), 0); + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT); + + /* + * FIXME: The pipe degamma table in geminilake doesn't support + * different values per channel, so this just loads a linear table. + */ + for (i = 0; i < lut_size; i++) { + uint32_t v = (i * ((1 << 16) - 1)) / (lut_size - 1); + + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v); + } + + /* Clamp values > 1.0. */ + while (i++ < 35) + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), (1 << 16) - 1); +} + +static void glk_load_luts(struct drm_crtc_state *state) +{ + struct drm_crtc *crtc = state->crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(crtc)->pipe; + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + glk_load_degamma_lut(state); + bdw_load_gamma_lut(state, 0); + + intel_state->gamma_mode = GAMMA_MODE_MODE_10BIT; + I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_10BIT); + POSTING_READ(GAMMA_MODE(pipe)); +} + /* Loads the palette/gamma unit for the CRTC on CherryView. */ static void cherryview_load_luts(struct drm_crtc_state *state) { @@ -561,6 +614,9 @@ void intel_color_init(struct drm_crtc *crtc) IS_BROXTON(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_luts = glk_load_luts; } else { dev_priv->display.load_luts = i9xx_load_luts; } From 2355cf088d469f65d180618c24400ded38895b92 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 27 Jan 2017 15:03:09 +0200 Subject: [PATCH 0057/1299] drm/i915: Create context desc template when context is created Move the invariant parts of context desc setup from execlist init to context creation. This is advantageous when we need to create different templates based on the context parametrization, ie. for svm capable contexts. v2: s/create/default, remove engine->ctx_desc_template Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1485522189-31984-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 23 ++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 15 +++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 35 ++----------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 4 files changed, 38 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 77458da9627d..680105421bb9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -236,6 +236,26 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } +static u32 default_desc_template(const struct drm_i915_private *dev_priv) +{ + u32 desc; + + desc = GEN8_CTX_VALID | + GEN8_CTX_PRIVILEGE | + GEN8_CTX_ADDRESSING_MODE(dev_priv) << + GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN8(dev_priv)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers + * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + */ + + return desc; +} + static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) @@ -309,8 +329,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + ctx->desc_template = default_desc_template(dev_priv); ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4322c67388f6..feefbee0a717 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3358,11 +3358,26 @@ enum { INTEL_LEGACY_64B_CONTEXT }; +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 #define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ INTEL_LEGACY_64B_CONTEXT : \ INTEL_LEGACY_32B_CONTEXT) +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 + #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0e7b950bceac..0e99d53d5523 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -190,12 +190,6 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define GEN8_CTX_VALID (1<<0) -#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) -#define GEN8_CTX_FORCE_RESTORE (1<<2) -#define GEN8_CTX_L3LLC_COHERENT (1<<5) -#define GEN8_CTX_PRIVILEGE (1<<8) - #define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ @@ -212,14 +206,6 @@ reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ } while (0) -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 @@ -267,21 +253,6 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; } -static void -logical_ring_init_platform_invariants(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - engine->ctx_desc_template = GEN8_CTX_VALID; - if (IS_GEN8(dev_priv)) - engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; - engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers */ - /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ -} - /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -295,7 +266,7 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * * This is what a descriptor looks like, from LSB to MSB:: * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) * bits 12-31: LRCA, GTT address of (the HWSP of) this context * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware @@ -310,8 +281,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<desc_template; /* bits 3-4 */ - desc |= engine->ctx_desc_template; /* bits 0-11 */ + desc = ctx->desc_template; /* bits 0-11 */ desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -1805,7 +1775,6 @@ logical_ring_setup(struct intel_engine_cs *engine) tasklet_init(&engine->irq_tasklet, intel_lrc_irq_handler, (unsigned long)engine); - logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8e872730f8eb..b9c15cd40fbf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -383,7 +383,6 @@ struct intel_engine_cs { struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - u32 ctx_desc_template; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the From ce64645d86ac5550559f3dca07fd9ba42166e196 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 27 Jan 2017 17:57:06 +0200 Subject: [PATCH 0058/1299] drm/i915: use variadic macros and arrays to choose port/pipe based registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the use of more than 3 ports/pipes/whatever without tricks, even if the register offsets are not evenly spaced. There's the risk of out of bounds access if we're not careful; currently that would "just" lead to the wrong register offset being used. It might be possible to add build bug ons for build time constant indexing. We already have ports defined up to E, not sure if we might have bugs related to them and the current _PORT3() macro. text data bss dec hex filename 1239868 46199 4096 1290163 13afb3 drivers/gpu/drm/i915/i915.ko 1238828 46199 4096 1289123 13aba3 drivers/gpu/drm/i915/i915.ko Cc: Chris Wilson Cc: Ville Syrjälä Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Ville Syrjälä Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485532626-20923-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index feefbee0a717..02a65ddae3a3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -48,6 +48,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG); } +#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index]) + #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a))) #define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b)) #define _PLANE(plane, a, b) _PIPE(plane, a, b) @@ -56,14 +58,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b)) #define _PORT(port, a, b) ((a) + (port)*((b)-(a))) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) -#define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \ - (pipe) == PIPE_B ? (b) : (c)) +#define _PIPE3(pipe, ...) _PICK(pipe, __VA_ARGS__) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c)) -#define _PORT3(port, a, b, c) ((port) == PORT_A ? (a) : \ - (port) == PORT_B ? (b) : (c)) +#define _PORT3(port, ...) _PICK(port, __VA_ARGS__) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c)) -#define _PHY3(phy, a, b, c) ((phy) == DPIO_PHY0 ? (a) : \ - (phy) == DPIO_PHY1 ? (b) : (c)) +#define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) #define _MASKED_FIELD(mask, value) ({ \ From 248a124d6fc0fde44b8fafc695ca4a2c24eb0856 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 10:44:56 +0000 Subject: [PATCH 0059/1299] drm/i915: Make intel_detect_preproduction_hw easier to extend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we add new generations, we should keep detecting new pre-production system development platforms that were temporarily enabled to facilitate initial development and now superseded by production systems. To make it easier to add more platforms, split the if into a series of logical operations. v2: s/sdv/pre/ - not all system development vehicles are for preproduction usage. Signed-off-by: Chris Wilson Cc: "Zanoni, Paulo R" Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Ville Syrjälä Cc: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170130104458.2653-1-chris@chris-wilson.co.uk Acked-by: Jani Nikula Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3aa5bf58cf32..b960aa929070 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -779,8 +779,12 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) */ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) { - if (IS_HSW_EARLY_SDV(dev_priv) || - IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0)) + bool pre = false; + + pre |= IS_HSW_EARLY_SDV(dev_priv); + pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); + + if (pre) DRM_ERROR("This is a pre-production stepping. " "It may not be fully functional.\n"); } From 7c5ff4a2c588c89d00ab0e0dd61f44942233d63d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 10:44:57 +0000 Subject: [PATCH 0060/1299] drm/i915: Mark the kernel as tainted if we fail the preproduction check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preproduction sdv are not supported beyond the release of production hardware, and continued use is ill-advised. Mark the kernel as tainted to reinforce the error. Signed-off-by: Chris Wilson Cc: "Zanoni, Paulo R" Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Ville Syrjälä Cc: Matt Roper Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170130104458.2653-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b960aa929070..0f5dbd434991 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -784,9 +784,11 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) pre |= IS_HSW_EARLY_SDV(dev_priv); pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); - if (pre) + if (pre) { DRM_ERROR("This is a pre-production stepping. " "It may not be fully functional.\n"); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); + } } /** From 0102ba1fd8af8c2719436eaadc743f940ab525c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 10:44:58 +0000 Subject: [PATCH 0061/1299] drm/i915: Add early BXT sdv to the list of preproduction machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend intel_detect_preproduction_hw() to include BXT A and B steppings. Signed-off-by: Chris Wilson Cc: "Zanoni, Paulo R" Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Ville Syrjälä Cc: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170130104458.2653-3-chris@chris-wilson.co.uk Acked-by: Jani Nikula Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0f5dbd434991..0e99cb22362c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -783,6 +783,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) pre |= IS_HSW_EARLY_SDV(dev_priv); pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); + pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); if (pre) { DRM_ERROR("This is a pre-production stepping. " From 4fa6053efdc7a30720fffddb9df353b0273ce17f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 29 Jan 2017 09:24:33 +0000 Subject: [PATCH 0062/1299] drm/i915: Record more information about the hanging contexts Include extra information such as the user_handle and hw_id so that userspace can identify which of their contexts hung, useful if they are performing self-diagnositics. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170129092433.10483-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 14 +++-- drivers/gpu/drm/i915/i915_gpu_error.c | 77 +++++++++++++++++---------- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 457129b64d79..448e5d85d1e0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -969,6 +969,16 @@ struct drm_i915_error_state { u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; struct intel_instdone instdone; + struct drm_i915_error_context { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 handle; + u32 hw_id; + int ban_score; + int active; + int guilty; + } context; + struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; @@ -1002,10 +1012,6 @@ struct drm_i915_error_state { u32 pp_dir_base; }; } vm_info; - - pid_t pid; - char comm[TASK_COMM_LEN]; - int context_bans; } engine[I915_NUM_ENGINES]; struct drm_i915_error_buffer { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e5375323eb06..5283fe815a4d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -384,6 +384,15 @@ static void error_print_request(struct drm_i915_error_state_buf *m, erq->head, erq->tail); } +static void error_print_context(struct drm_i915_error_state_buf *m, + const char *header, + struct drm_i915_error_context *ctx) +{ + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, + ctx->ban_score, ctx->guilty, ctx->active); +} + static void error_print_engine(struct drm_i915_error_state_buf *m, struct drm_i915_error_engine *ee) { @@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_request(m, " ELSP[0]: ", &ee->execlist[0]); error_print_request(m, " ELSP[1]: ", &ee->execlist[1]); + error_print_context(m, " Active context: ", &ee->context); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -562,12 +572,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && - error->engine[i].pid != -1) { - err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n", + error->engine[i].context.pid) { + err_printf(m, "Active process (on ring %s): %s [%d], score %d\n", engine_str(i), - error->engine[i].comm, - error->engine[i].pid, - error->engine[i].context_bans); + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score); } } err_printf(m, "Reset count: %u\n", error->reset_count); @@ -658,11 +668,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i]->name); - if (ee->pid != -1) - err_printf(m, " (submitted by %s [%d], bans %d)", - ee->comm, - ee->pid, - ee->context_bans); + if (ee->context.pid) + err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)", + ee->context.comm, + ee->context.pid, + ee->context.handle, + ee->context.hw_id, + ee->context.ban_score); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); @@ -1267,6 +1279,28 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, &ee->execlist[n]); } +static void record_context(struct drm_i915_error_context *e, + struct i915_gem_context *ctx) +{ + if (ctx->pid) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(ctx->pid, PIDTYPE_PID); + if (task) { + strcpy(e->comm, task->comm); + e->pid = task->pid; + } + rcu_read_unlock(); + } + + e->handle = ctx->user_handle; + e->hw_id = ctx->hw_id; + e->ban_score = ctx->ban_score; + e->guilty = ctx->guilty_count; + e->active = ctx->active_count; +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) { @@ -1281,7 +1315,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_engine *ee = &error->engine[i]; struct drm_i915_gem_request *request; - ee->pid = -1; ee->engine_id = -1; if (!engine) @@ -1296,11 +1329,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { struct intel_ring *ring; - struct pid *pid; ee->vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; + record_context(&ee->context, request->ctx); + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -1318,19 +1352,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_error_object_create(dev_priv, request->ctx->engine[i].state); - pid = request->ctx->pid; - if (pid) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(pid, PIDTYPE_PID); - if (task) { - strcpy(ee->comm, task->comm); - ee->pid = task->pid; - } - rcu_read_unlock(); - } - error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1534,12 +1555,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, "GPU HANG: ecode %d:%d:0x%08x", INTEL_GEN(dev_priv), engine_id, ecode); - if (engine_id != -1 && error->engine[engine_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].context.pid) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].comm, - error->engine[engine_id].pid); + error->engine[engine_id].context.comm, + error->engine[engine_id].context.pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", From 517f188c6be5bcc71f92c049d91a26539482a6f8 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Thu, 26 Jan 2017 13:20:13 +0800 Subject: [PATCH 0063/1299] drm/i915: noop forcewake get/put when vgpu activated For a virtualized GPU, the host maintains the forcewake state on the real device. As we don't control forcewake ourselves, we can simply set force_wake_get() and force_wake_put() to be no-ops. By setting the vfuncs, we adjust both the manual control of forcewake and around the mmio accessors (making our vgpu specific mmio routines redundant and to be removed in the next patch). Signed-off-by: Weinan Li Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1485408013-12780-1-git-send-email-weinan.z.li@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..da81874dd4a5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -132,6 +132,13 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma } } +static void +vgpu_fw_domains_nop(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */ +} + static void fw_domains_posting_read(struct drm_i915_private *dev_priv) { @@ -1375,6 +1382,11 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE, FORCEWAKE_ACK); } + if (intel_vgpu_active(dev_priv)) { + dev_priv->uncore.funcs.force_wake_get = vgpu_fw_domains_nop; + dev_priv->uncore.funcs.force_wake_put = vgpu_fw_domains_nop; + } + /* All future platforms are expected to require complex power gating */ WARN_ON(dev_priv->uncore.fw_domains == 0); } From 9b51b105f2a52a873107d7b28f0aca94e52ad7bd Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Thu, 26 Jan 2017 13:23:48 +0800 Subject: [PATCH 0064/1299] drm/i915: clean up unused vgpu_read/write Having converted the force_wake_get/_put routines for a vGPU to be no-op, we can use the common mmio accessors and remove our specialised routines that simply skipped the calls to control force_wake. Signed-off-by: Weinan Li Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1485408228-12932-1-git-send-email-weinan.z.li@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 58 ----------------------------- 1 file changed, 58 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index da81874dd4a5..3d243fefe09b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1052,34 +1052,6 @@ __gen6_read(64) #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1202,31 +1174,6 @@ __gen6_write(32) #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1461,11 +1408,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) intel_shadow_table_check(); - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS From 4703b0472e126c715019a9671ea0fe38556114bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Jan 2017 10:46:30 +0000 Subject: [PATCH 0065/1299] drm/i915: Be defensive when cleaning up i915_gem_internal pages If we abort the i915_gem_internal get_pages, we mark the failing sg as the last. However, that means we iterate upto and including the failing sg element and results in us trying to free the unallocated sg_page(). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170131104630.3074-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_internal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 17ce53d0d092..64d8fb3fd764 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -35,8 +35,10 @@ static void internal_free_pages(struct sg_table *st) { struct scatterlist *sg; - for (sg = st->sgl; sg; sg = __sg_next(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } sg_free_table(st); kfree(st); @@ -116,6 +118,7 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) return st; err: + sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); return ERR_PTR(-ENOMEM); From c88473878d47131ccfc67a00ba688d4d7d0f4519 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 16:55:30 +0000 Subject: [PATCH 0066/1299] drm/i915: Treat stolen memory as DMA addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversion of stolen to use phys_addr_t (from essentially u32) sparked an interesting discussion. We treat stolen memory as only accessible from the GPU (the DMA device) - an attempt to use it from the CPU will generate a MCE on gen6 onwards, although it is in theory a physical address that can be dereferenced from the CPU as demonstrated by earlier generations. As such, using phys_addr_t has the wrong connotations and as we pass the address into the DMA device via dma_addr_t (through the scatterlists used to program the GTT entries), we should treat it as dma_addr_t throughout. Signed-off-by: Chris Wilson Cc: Paulo Zanoni Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170127165531.28135-1-chris@chris-wilson.co.uk Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 36 +++++++++++++------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 448e5d85d1e0..15f0fc48c442 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1461,7 +1461,7 @@ struct i915_gem_mm { struct work_struct free_work; /** Usable portion of the GTT for GEM */ - phys_addr_t stolen_base; /* limited to low memory (32-bit) */ + dma_addr_t stolen_base; /* limited to low memory (32-bit) */ /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 729dba017b78..a75de8c6ece0 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) +static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - phys_addr_t base; + dma_addr_t base; /* Almost universally we can find the Graphics Base of Stolen Memory * at register BSM (0x5c) in the igfx configuration space. On a few @@ -196,7 +196,7 @@ static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct { - phys_addr_t start, end; + dma_addr_t start, end; } stolen[2] = { { .start = base, .end = base + ggtt->stolen_size, }, { .start = base, .end = base + ggtt->stolen_size, }, @@ -228,12 +228,12 @@ static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { - phys_addr_t end = base + ggtt->stolen_size - 1; + dma_addr_t end = base + ggtt->stolen_size - 1; DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", (unsigned long long)ggtt_start, (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to %pa - %pa\n", + DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n", &base, &end); } } @@ -263,9 +263,9 @@ static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - phys_addr_t end = base + ggtt->stolen_size; + dma_addr_t end = base + ggtt->stolen_size; - DRM_ERROR("conflict detected with stolen region: [%pa - %pa]\n", + DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n", &base, &end); base = 0; } @@ -285,13 +285,13 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - phys_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; @@ -308,7 +308,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -334,7 +334,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -354,7 +354,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -380,11 +380,11 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - phys_addr_t stolen_top; + dma_addr_t stolen_top; stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; @@ -403,7 +403,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - phys_addr_t reserved_base, stolen_top; + dma_addr_t reserved_base, stolen_top; u32 reserved_total, reserved_size; u32 stolen_usable_start; @@ -419,7 +419,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (ggtt->stolen_size == 0) return 0; - dev_priv->mm.stolen_base = i915_stolen_to_physical(dev_priv); + dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv); if (dev_priv->mm.stolen_base == 0) return 0; @@ -468,8 +468,8 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (reserved_base < dev_priv->mm.stolen_base || reserved_base + reserved_size > stolen_top) { - phys_addr_t reserved_top = reserved_base + reserved_size; - DRM_DEBUG_KMS("Stolen reserved area [%pa - %pa] outside stolen memory [%pa - %pa]\n", + dma_addr_t reserved_top = reserved_base + reserved_size; + DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", &reserved_base, &reserved_top, &dev_priv->mm.stolen_base, &stolen_top); return 0; From 1692cd60d999b00a0491692dab0286e6011abd36 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 13:47:21 +0000 Subject: [PATCH 0067/1299] drm/i915: Sanity check the computed size and base of stolen memory Just do a quick check that the stolen memory address range doesn't overflow our chosen integer type. v2: Add add_overflows() to utils with the promise that gcc7 can do this better than C and then maybe it will have a proper definition in core. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170130134721.5159-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index a75de8c6ece0..cba12ee90cbf 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -189,7 +189,7 @@ static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) base = tom - tseg_size - ggtt->stolen_size; } - if (base == 0) + if (base == 0 || add_overflows(base, ggtt->stolen_size)) return 0; /* make sure we don't clobber the GTT if it's within stolen memory */ diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 34020873e1f6..b8ba0f2f92af 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,17 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#if GCC_VERSION >= 70000 +#define add_overflows(A, B) \ + __builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0) +#else +#define add_overflows(A, B) ({ \ + typeof(A) a = (A); \ + typeof(B) b = (B); \ + a + b < a; \ +}) +#endif + #define range_overflows(start, size, max) ({ \ typeof(start) start__ = (start); \ typeof(size) size__ = (size); \ From 69aeafeae9b30d797c439a30d1a4ccc8dc5b0eb0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 11:19:32 +0000 Subject: [PATCH 0068/1299] drm/i915: Flush untouched framebuffers before display on !llc On a non-llc system, the objects are created with .cache_level = CACHE_NONE and so the transition to uncached for scanout is a no-op. However, if the object was never written to, it will still be in the CPU domain (having been zeroed out by shmemfs). Those cachelines need to be flushed prior to display. Reported-and-tested-by: Vito Caputo Fixes: a6a7cc4b7db6 ("drm/i915: Always flush the dirty CPU cache when pinning the scanout") Signed-off-by: Chris Wilson Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170109111932.6342-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2522d4895ff7..f3b5cb497921 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3519,7 +3519,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty) { + if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj, true); intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); } From 1881a4234ef03751daf55b62b17e6bb0dbf7792a Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 25 Jan 2017 19:43:23 +0530 Subject: [PATCH 0069/1299] drm/i915: Add MIPI_IO WA and program DSI regulators Enable MIPI IO WA for BXT DSI as per bspec and program the DSI regulators. v2: Moved IO enable to pre-enable as per Mika's review comments. Also reused the existing register definition for BXT_P_CR_GT_DISP_PWRON. v3: Added Programming the DSI regulators as per disable/enable sequences. v4: Restricting regulator changes to BXT as suggested by Jani/Mika v5: Removed redundant read/modify for regulator register as per Jani's comment. Maintain enable/disable symmetry as per spec. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Acked-by: Mika Kahola Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485353603-11260-1-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 7 +++++++ drivers/gpu/drm/i915/intel_dsi.c | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 02a65ddae3a3..a9538ae81673 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1552,6 +1552,7 @@ enum skl_disp_power_wells { _MMIO(_BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1)) #define BXT_P_CR_GT_DISP_PWRON _MMIO(0x138090) +#define MIPIO_RST_CTRL (1 << 2) #define _BXT_PHY_CTL_DDI_A 0x64C00 #define _BXT_PHY_CTL_DDI_B 0x64C10 @@ -8361,6 +8362,12 @@ enum { #define _BXT_MIPIC_PORT_CTRL 0x6B8C0 #define BXT_MIPI_PORT_CTRL(tc) _MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL) +#define BXT_P_DSI_REGULATOR_CFG _MMIO(0x160020) +#define STAP_SELECT (1 << 0) + +#define BXT_P_DSI_REGULATOR_TX_CTRL _MMIO(0x160054) +#define HS_IO_CTRL_SELECT (1 << 0) + #define DPI_ENABLE (1 << 31) /* A + C */ #define MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT 27 #define MIPIA_MIPI4DPHY_DELAY_COUNT_MASK (0xf << 27) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 16732e7bc08e..c98234eca2a6 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -548,6 +548,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; + u32 val; DRM_DEBUG_KMS("\n"); @@ -558,6 +559,17 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_disable_dsi_pll(encoder); intel_enable_dsi_pll(encoder, pipe_config); + if (IS_BROXTON(dev_priv)) { + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val | MIPIO_RST_CTRL); + + /* Power up DSI regulator */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, 0); + } + intel_dsi_prepare(encoder, pipe_config); /* Panel Enable over CRC PMIC */ @@ -707,6 +719,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + u32 val; DRM_DEBUG_KMS("\n"); @@ -714,6 +727,17 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, intel_dsi_clear_device_ready(encoder); + if (IS_BROXTON(dev_priv)) { + /* Power down DSI regulator to save power */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, HS_IO_CTRL_SELECT); + + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val & ~MIPIO_RST_CTRL); + } + intel_disable_dsi_pll(encoder); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { From e2de845e0b84ebb7e910bb898febbf813b7df483 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Feb 2017 12:53:38 +0000 Subject: [PATCH 0070/1299] drm/i915/execlists: Skip resetting RING_CONTEXT_STATUS_PTR As we now flag when the GPU signals a context-switch and do not read the status register before we see that signal, we do not have to ensure that it is cleared upon reset (and can leave it to the GPU to reset it from the power context). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170201125338.12932-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0e99d53d5523..753458452997 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1317,7 +1317,6 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; struct intel_context *ce = &request->ctx->engine[engine->id]; @@ -1344,7 +1343,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, return; /* Catch up with any missed context-switch interrupts */ - I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0)); if (request->ctx != port[0].request->ctx) { i915_gem_request_put(port[0].request); port[0] = port[1]; From 453cfe21711f644e744e834d5c61042b2fac218d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Feb 2017 13:12:22 +0000 Subject: [PATCH 0071/1299] drm/i915/execlists: Add interrupt-pending check to intel_execlists_idle() Primarily this serves as a sanity check that the bit has been cleared before we suspend (and hasn't reappeared after resume). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170201131222.11882-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 753458452997..44a92ea464ba 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -508,9 +508,15 @@ bool intel_execlists_idle(struct drm_i915_private *dev_priv) if (!i915.enable_execlists) return true; - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + /* Interrupt/tasklet pending? */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + return false; + + /* Both ports drained, no more ELSP submission? */ if (!execlists_elsp_idle(engine)) return false; + } return true; } From a667fb402c1e856209bf9e77ba41fc1cf356b867 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 15 Dec 2016 15:29:44 +0100 Subject: [PATCH 0072/1299] drm/i915: Disable all crtcs during driver unload, v2. We may keep the crtc's enabled when userspace unsets all framebuffers but keeps the crtc active. This exposes a WARN in fbc_global disable, and a lot of bugs in our hardware readout code. Solve this by disabling all crtc's for now. Changes since v1: - Use lock_all_ctx instead of lock_all. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1481812185-19098-4-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0e99cb22362c..0667f480df97 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -43,6 +43,7 @@ #include #include +#include #include #include "i915_drv.h" @@ -1307,6 +1308,8 @@ void i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_modeset_acquire_ctx ctx; + int ret; intel_fbdev_fini(dev); @@ -1315,6 +1318,24 @@ void i915_driver_unload(struct drm_device *dev) intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + drm_modeset_acquire_init(&ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (!ret) + ret = drm_atomic_helper_disable_all(dev, &ctx); + + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(&ctx); + } + + if (ret) + DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + i915_driver_unregister(dev_priv); drm_vblank_cleanup(dev); From 37255d8d3f4d0c88fc9f60f50bb4c3c30e6688cc Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 15 Dec 2016 15:29:43 +0100 Subject: [PATCH 0073/1299] drm/i915: Fix POWER_DOMAIN_AUDIO refcounting. If the crtc was brought up with audio before the driver loads, then crtc_disable will remove a refcount to audio that doesn't exist before. Fortunately we already set power domains on readout, so we can just add the power domain handling to get_crtc_power_domains, which will update the power domains correctly in all cases. This was found when testing module reload on CI with the crtc enabled, which resulted in the following warn after module reload + modeset: [ 24.197041] ------------[ cut here ]------------ [ 24.197075] WARNING: CPU: 0 PID: 99 at drivers/gpu/drm/i915/intel_runtime_pm.c:1790 intel_display_power_put+0x134/0x140 [i915] [ 24.197076] Use count on domain AUDIO is already zero [ 24.197098] CPU: 0 PID: 99 Comm: kworker/u8:2 Not tainted 4.9.0-CI-Trybot_393+ #1 [ 24.197099] Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0042.2016.0409.1246 04/09/2016 [ 24.197102] Workqueue: events_unbound async_run_entry_fn [ 24.197105] ffffc900003c7688 ffffffff81435b35 ffffc900003c76d8 0000000000000000 [ 24.197107] ffffc900003c76c8 ffffffff8107e4d6 000006fe5dc36f28 ffff88025dc30054 [ 24.197109] ffff88025dc36f28 ffff88025dc30000 ffff88025dc30000 0000000000000015 [ 24.197110] Call Trace: [ 24.197113] [] dump_stack+0x67/0x92 [ 24.197116] [] __warn+0xc6/0xe0 [ 24.197118] [] warn_slowpath_fmt+0x4a/0x50 [ 24.197149] [] intel_display_power_put+0x134/0x140 [i915] [ 24.197187] [] intel_disable_ddi+0x4d/0x80 [i915] [ 24.197223] [] intel_encoders_disable.isra.74+0x7f/0x90 [i915] [ 24.197257] [] haswell_crtc_disable+0x55/0x170 [i915] [ 24.197292] [] intel_atomic_commit_tail+0x108/0xfd0 [i915] [ 24.197295] [] ? __lock_is_held+0x66/0x90 [ 24.197330] [] intel_atomic_commit+0x429/0x560 [i915] [ 24.197332] [] ?drm_atomic_add_affected_connectors+0x56/0xf0 [ 24.197334] [] drm_atomic_commit+0x46/0x50 [ 24.197336] [] restore_fbdev_mode+0x147/0x270 [ 24.197337] [] drm_fb_helper_restore_fbdev_mode_unlocked+0x2e/0x70 [ 24.197339] [] drm_fb_helper_set_par+0x28/0x50 [ 24.197374] [] intel_fbdev_set_par+0x13/0x70 [i915] [ 24.197376] [] fbcon_init+0x57a/0x600 [ 24.197379] [] visual_init+0xd1/0x130 [ 24.197381] [] do_bind_con_driver+0x1bc/0x3a0 [ 24.197384] [] do_take_over_console+0x111/0x180 [ 24.197386] [] do_fbcon_takeover+0x52/0xb0 [ 24.197387] [] fbcon_event_notify+0x723/0x850 [ 24.197390] [] ?__blocking_notifier_call_chain+0x30/0x70 [ 24.197392] [] notifier_call_chain+0x34/0xa0 [ 24.197394] [] __blocking_notifier_call_chain+0x48/0x70 [ 24.197397] [] blocking_notifier_call_chain+0x11/0x20 [ 24.197398] [] fb_notifier_call_chain+0x16/0x20 [ 24.197400] [] register_framebuffer+0x24c/0x330 [ 24.197402] [] drm_fb_helper_initial_config+0x219/0x3c0 [ 24.197436] [] intel_fbdev_initial_config+0x13/0x30 [i915] [ 24.197438] [] async_run_entry_fn+0x34/0x140 [ 24.197440] [] process_one_work+0x1ec/0x6b0 [ 24.197442] [] ? process_one_work+0x166/0x6b0 [ 24.197445] [] worker_thread+0x49/0x490 [ 24.197447] [] ? process_one_work+0x6b0/0x6b0 [ 24.197448] [] kthread+0xeb/0x110 [ 24.197451] [] ? kthread_park+0x60/0x60 [ 24.197453] [] ret_from_fork+0x27/0x40 [ 24.197476] ---[ end trace bda64b683b8e8162 ]--- Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1481812185-19098-3-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 14 ++------------ drivers/gpu/drm/i915/intel_display.c | 4 ++++ drivers/gpu/drm/i915/intel_dp_mst.c | 9 ++------- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 9a9a670f8549..d957211c76ad 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1835,8 +1835,6 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; @@ -1863,10 +1861,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, intel_edp_drrs_enable(intel_dp, pipe_config); } - if (intel_crtc->config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); - } } static void intel_disable_ddi(struct intel_encoder *intel_encoder, @@ -1874,16 +1870,10 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *old_conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int type = intel_encoder->type; - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (intel_crtc->config->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(intel_encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b44e9466d394..88689a0b4183 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5730,6 +5730,7 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; @@ -5751,6 +5752,9 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT(intel_display_port_power_domain(intel_encoder)); } + if (HAS_DDI(dev_priv) && crtc_state->has_audio) + mask |= BIT(POWER_DOMAIN_AUDIO); + if (crtc_state->shared_dpll) mask |= BIT(POWER_DOMAIN_PLLS); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 95267fcef71b..6a85d388f936 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -92,7 +92,6 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int ret; DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); @@ -103,10 +102,8 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, if (ret) { DRM_ERROR("failed to update payload %d\n", ret); } - if (old_crtc_state->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } } static void intel_mst_post_disable_dp(struct intel_encoder *encoder, @@ -219,10 +216,8 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, ret = drm_dp_check_act_status(&intel_dp->mst_mgr); ret = drm_dp_update_payload_part2(&intel_dp->mst_mgr); - if (pipe_config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); - } } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, From 5584f1b1d73e9cc95092734c316e467c6c4468f9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 2 Feb 2017 10:47:11 +0100 Subject: [PATCH 0074/1299] drm/i915: fix i915 running as dom0 under Xen Commit 920cf4194954ec ("drm/i915: Introduce an internal allocator for disposable private objects") introduced a regression for the kernel running as Xen dom0: when switching to graphics mode a GPU HANG occurred. Reason seems to be a missing adaption similar to that done in commit 7453c549f5f648 ("swiotlb: Export swiotlb_max_segment to users") to i915_gem_object_get_pages_internal(). So limit the maximum page order to be used according to the maximum swiotlb segment size instead to the complete swiotlb size. Fixes: 920cf4194954 ("drm/i915: Introduce an internal allocator for disposable private objects") Signed-off-by: Juergen Gross Link: http://patchwork.freedesktop.org/patch/msgid/20170202094711.939-1-jgross@suse.com Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.10-rc1+ Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_internal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 64d8fb3fd764..2b9d5e94a8ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -68,8 +68,16 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) max_order = MAX_ORDER; #ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) /* minimum max swiotlb size is IO_TLB_SEGSIZE */ - max_order = min(max_order, ilog2(IO_TLB_SEGPAGES)); + if (swiotlb_nr_tbl()) { + unsigned int max_segment; + + max_segment = swiotlb_max_segment(); + if (max_segment) { + max_segment = max_t(unsigned int, max_segment, + PAGE_SIZE) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); + } + } #endif gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; From 170b03c4a88298e71f3a76ab79f6895f6949f3a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Jan 2017 10:11:02 +0000 Subject: [PATCH 0075/1299] drm/i915: Build DRM range manager selftests for CI Build the struct drm_mm selftests so that we can trivially run them within our CI. "Enable debug, become developer." - Joonas Lahtinen Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170125101102.9010-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 597648c7a645..598551dbf62c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -24,6 +24,7 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y + select DRM_DEBUG_MM_SELFTEST select DRM_I915_SW_FENCE_DEBUG_OBJECTS default n help From 3aac4acb89710fe782c9e78e7b1febf76e112c6c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 1 Feb 2017 15:46:09 +0200 Subject: [PATCH 0076/1299] drm/i915: don't warn about Skylake CPU - KabyPoint PCH combo Apparently there are machines out there with Skylake CPU and KabyPoint PCH. Judging from our driver code, there doesn't seem to be any code paths that would do anything different between SunrisePoint and KabyPoint PCHs, so it would seem okay to accept the combo without warnings. Fixes: 22dea0be50b2 ("drm/i915: Introduce Kabypoint PCH for Kabylake H/DT.") References: https://lists.freedesktop.org/archives/intel-gfx/2017-February/118611.html Reported-by: Rainer Koenig Cc: Rainer Koenig Cc: Rodrigo Vivi Cc: # v4.8+ Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485956769-26015-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0667f480df97..276676b2bbc8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -214,7 +214,8 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) } else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_KBP; DRM_DEBUG_KMS("Found KabyPoint PCH\n"); - WARN_ON(!IS_KABYLAKE(dev_priv)); + WARN_ON(!IS_SKYLAKE(dev_priv) && + !IS_KABYLAKE(dev_priv)); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) || ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && From bb96dcf5830e5d81a1da2e2a14e6c0f7dfc64348 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Feb 2017 13:27:21 +0000 Subject: [PATCH 0077/1299] drm/i915: Recreate internal objects with single page segments if dmar fails If we fail to dma-map the object, the most common cause is lack of space inside the SW-IOTLB due to fragmentation. If we recreate the_sg_table using segments of PAGE_SIZE (and single page allocations), we may succeed in remapping the scatterlist. First became a significant problem for the mock selftests after commit 5584f1b1d73e ("drm/i915: fix i915 running as dom0 under Xen") increased the max_order. Fixes: 920cf4194954 ("drm/i915: Introduce an internal allocator for disposable private objects") Fixes: 5584f1b1d73e ("drm/i915: fix i915 running as dom0 under Xen") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170202132721.12711-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin Cc: # v4.10-rc1+ --- drivers/gpu/drm/i915/i915_gem_internal.c | 37 +++++++++++++++--------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 2b9d5e94a8ae..fc950abbe400 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -48,24 +48,12 @@ static struct sg_table * i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - unsigned int npages = obj->base.size / PAGE_SIZE; struct sg_table *st; struct scatterlist *sg; + unsigned int npages; int max_order; gfp_t gfp; - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); - - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return ERR_PTR(-ENOMEM); - } - - sg = st->sgl; - st->nents = 0; - max_order = MAX_ORDER; #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { @@ -87,6 +75,20 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) gfp |= __GFP_DMA32; } +create_st: + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + npages = obj->base.size / PAGE_SIZE; + if (sg_alloc_table(st, npages, GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + st->nents = 0; + do { int order = min(fls(npages) - 1, max_order); struct page *page; @@ -114,8 +116,15 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) sg = __sg_next(sg); } while (1); - if (i915_gem_gtt_prepare_pages(obj, st)) + if (i915_gem_gtt_prepare_pages(obj, st)) { + /* Failed to dma-map try again with single page sg segments */ + if (get_order(st->sgl->length)) { + internal_free_pages(st); + max_order = 0; + goto create_st; + } goto err; + } /* Mark the pages as dontneed whilst they are still pinned. As soon * as they are unpinned they are allowed to be reaped by the shrinker, From e32e836afe39691848270f1ac2a9d0e09ad173fd Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 2 Feb 2017 14:55:00 +0000 Subject: [PATCH 0078/1299] drm/i915: remove 512GB allocation warning Now that we have selftests in place exercising truly huge allocations we will start to hit the 512GB warning, so now seems like a good time to remove this user-triggerable WARN. Cc: Joonas Lahtinen Cc: Chris Wilson Signed-off-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/1486047300-13198-1-git-send-email-matthew.auld@intel.com Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 048040efc3f0..c567b34800cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1423,10 +1423,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, if (ret) return ret; - WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, - "The allocation has spanned more than 512GB. " - "It is highly likely this is incorrect."); - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { WARN_ON(!pdp); From 0aab2c721d81590012a5021a516f00666646741f Mon Sep 17 00:00:00 2001 From: "Lee, Shawn C" Date: Fri, 3 Feb 2017 12:32:09 +0800 Subject: [PATCH 0079/1299] drm/i915/bxt: Add MST support when do DPLL calculation Add the missing INTEL_OUTPUT_DP_MST case in bxt_get_dpll() to correctly initialize the crtc_state and port plls when link training a DP MST monitor on BXT/APL devices. Fixes: a277ca7dc01d ("drm/i915: Split bxt_ddi_pll_select()") Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=99572 Reviewed-by: Cooper Chiou Reviewed-by: Gary C Wang Reviewed-by: Ciobanu, Nathan D Reviewed-by: Herbert, Marc Reviewed-by: Bride, Jim Reviewed-by: Navare, Manasi D Cc: Jani Nikula Cc: # v4.9+ Signed-off-by: Lee, Shawn C Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486096329-6255-1-git-send-email-shawn.c.lee@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 368eff93a0a6..79f656ca593d 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1855,7 +1855,8 @@ bxt_get_dpll(struct intel_crtc *crtc, return NULL; if ((encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_EDP) && + encoder->type == INTEL_OUTPUT_EDP || + encoder->type == INTEL_OUTPUT_DP_MST) && !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) return NULL; From 52da22e7aba155be238faff4f6e97b2eb9de64f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 10:56:52 +0000 Subject: [PATCH 0080/1299] drm/i915: Reject set-tiling-ioctl with stride==0 and a tiling mode In commit 957870f93412 ("drm/i915: Split out i915_gem_object_set_tiling()"), I swapped an alignment check for IS_ALIGNED and in the process removed the less-than check. That check turns out to be important as it was the only rejection for stride == 0. Tvrtko did spot it, but I was overconfident in the IS_ALIGNED() conversion. Fixes: 957870f93412 ("drm/i915: Split out i915_gem_object_set_tiling()") Testcase: igt/gem_tiling_max_stride Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170203105652.27819-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index b1361cfd4c5c..974ac08df473 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -173,7 +173,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, else tile_width = 512; - if (!IS_ALIGNED(stride, tile_width)) + if (!stride || !IS_ALIGNED(stride, tile_width)) return false; /* 965+ just needs multiples of tile width */ From a3a1e5336c41ab3763c64152df5f868cb91d20bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 11:50:35 +0000 Subject: [PATCH 0081/1299] drm/i915: Tidy the tail of i915_tiling_ok() The current tail breaks the pattern of if (check) return false, which can catch the reader out. If we move the gen2/3 power-of-two test into the earlier gen2/3 branch, we can eliminate the contrary tail. Suggested-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170203115036.24743-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_tiling.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 974ac08df473..152bed2dcb11 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -158,6 +158,9 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (stride > 8192) return false; + if (!is_power_of_2(stride)) + return false; + if (IS_GEN3(i915)) { if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) return false; @@ -176,12 +179,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (!stride || !IS_ALIGNED(stride, tile_width)) return false; - /* 965+ just needs multiples of tile width */ - if (INTEL_GEN(i915) >= 4) - return true; - - /* Pre-965 needs power of two tile widths */ - return is_power_of_2(stride); + return true; } static bool i915_vma_fence_prepare(struct i915_vma *vma, From 955b8e9ca8b924f8c21d43797f8bf8f39908efd3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 11:46:00 +0000 Subject: [PATCH 0082/1299] drm/i915: Allow large objects to be tiled on gen2/3 We now have partial VMA support to break large objects into fence sized regions and no longer have to restrict tiling to small objects on gen2/3 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170203115036.24743-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_tiling.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 152bed2dcb11..8b2b507bdf7e 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -160,14 +160,6 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (!is_power_of_2(stride)) return false; - - if (IS_GEN3(i915)) { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) - return false; - } else { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19) - return false; - } } if (IS_GEN2(i915) || From 4a04e371228937d104fd03d5219f8c25fda29757 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 3 Feb 2017 14:45:29 -0800 Subject: [PATCH 0083/1299] drm/i915: fix pm refcounting on fence error in execbuf Fences are creted/checked before the pm ref is taken, so if we jump to pre_mutex_err we will uncorrectly call intel_runtime_pm_put. v2: Massage unwind error paths Fixes: fec0445caa27 (drm/i915: Support explicit fencing for execbuf) Testcase: igt/gem_exec_params Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1486161930-11764-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 91c2393199a3..26c26a6675df 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1644,18 +1644,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_FENCE_IN) { in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) { - ret = -EINVAL; - goto pre_mutex_err; - } + if (!in_fence) + return -EINVAL; } if (args->flags & I915_EXEC_FENCE_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { ret = out_fence_fd; - out_fence_fd = -1; - goto pre_mutex_err; + goto err_in_fence; } } @@ -1878,6 +1875,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, intel_runtime_pm_put(dev_priv); if (out_fence_fd != -1) put_unused_fd(out_fence_fd); +err_in_fence: dma_fence_put(in_fence); return ret; } From ccfceda22cc018ce78c8523c102937c39272e543 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 3 Feb 2017 17:23:29 -0800 Subject: [PATCH 0084/1299] drm/i915: refactor register fw read/write macros for recent GENs The only difference for the more recent of those macros is the version of the *_reg__fw_domains function. Passing the function prefix in allows us to re-use the same macro to generate functions for different GENs and will make it easier to add new accessors in the future Cc: Tvrtko Ursulin Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1486171409-21542-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 40 ++++++++--------------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 3d243fefe09b..1ff8fd9911d7 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -992,29 +992,19 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, ___force_wake_auto(dev_priv, fw_domains); } -#define __gen6_read(x) \ +#define __gen_read(func, x) \ static u##x \ -gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ +func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_READ_HEADER(x); \ - fw_engine = __gen6_reg_read_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - val = __raw_i915_read##x(dev_priv, reg); \ - GEN6_READ_FOOTER; \ -} - -#define __fwtable_read(x) \ -static u##x \ -fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_READ_HEADER(x); \ - fw_engine = __fwtable_reg_read_fw_domains(offset); \ + fw_engine = __##func##_reg_read_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ val = __raw_i915_read##x(dev_priv, reg); \ GEN6_READ_FOOTER; \ } +#define __gen6_read(x) __gen_read(gen6, x) +#define __fwtable_read(x) __gen_read(fwtable, x) #define __gen9_decoupled_read(x) \ static u##x \ @@ -1115,29 +1105,19 @@ gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool GEN6_WRITE_FOOTER; \ } -#define __gen8_write(x) \ +#define __gen_write(func, x) \ static void \ -gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ +func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_WRITE_HEADER; \ - fw_engine = __gen8_reg_write_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - __raw_i915_write##x(dev_priv, reg, val); \ - GEN6_WRITE_FOOTER; \ -} - -#define __fwtable_write(x) \ -static void \ -fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_WRITE_HEADER; \ - fw_engine = __fwtable_reg_write_fw_domains(offset); \ + fw_engine = __##func##_reg_write_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ __raw_i915_write##x(dev_priv, reg, val); \ GEN6_WRITE_FOOTER; \ } +#define __gen8_write(x) __gen_write(gen8, x) +#define __fwtable_write(x) __gen_write(fwtable, x) #define __gen9_decoupled_write(x) \ static void \ From 28b6def6ec6408e08970a0f1107b483961bfc7ae Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 6 Feb 2017 10:23:13 +0100 Subject: [PATCH 0085/1299] drm/i915: Update DRIVER_DATE to 20170206 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd5a38be5b83..1e0d9561fd67 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170123" -#define DRIVER_TIMESTAMP 1485156432 +#define DRIVER_DATE "20170206" +#define DRIVER_TIMESTAMP 1486372993 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ From ba318c61a9719577b6f451c055f364e4116874b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Feb 2017 20:47:41 +0000 Subject: [PATCH 0086/1299] drm/i915: Drain the freed state from the tail of the next commit If we have any residual freed atomic state from earlier commits, flush the freed list after performing the current modeset. This prevents the freed list from ever-growing if userspace manages to starve the kernel threads (i.e. we are never able to run our free state worker and eventually the system may even oom). Fixes: eb955eee27d9 ("drm/i915: Move atomic state free from out of fence release") Testcase: igt/kms_cursor/legacy/all-pipes-single-bo Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170202204741.18231-1-chris@chris-wilson.co.uk Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 88689a0b4183..45e587496886 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14395,6 +14395,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state, } while (progress); } +static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) +{ + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + +static void intel_atomic_helper_free_state_worker(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + + intel_atomic_helper_free_state(dev_priv); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -14561,6 +14579,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * can happen also when the device is completely off. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + intel_atomic_helper_free_state(dev_priv); } static void intel_atomic_commit_work(struct work_struct *work) @@ -16615,18 +16635,6 @@ static void sanitize_watermarks(struct drm_device *dev) drm_modeset_acquire_fini(&ctx); } -static void intel_atomic_helper_free_state(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), atomic_helper.free_work); - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16647,7 +16655,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state); + intel_atomic_helper_free_state_worker); intel_init_quirks(dev); From 22cc440eae52d805ced3b9c3fcd42713d748140f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 4 Feb 2017 11:05:19 +0000 Subject: [PATCH 0087/1299] drm/i915: Print execlists restart after reset After resetting, show the requests that each engine restarts from in the debug log. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170204110519.7645-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 44a92ea464ba..754f77c394fb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1254,6 +1254,11 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } +static u32 port_seqno(struct execlist_port *port) +{ + return port->request ? port->request->global_seqno : 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1279,6 +1284,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); if (!execlists_elsp_idle(engine)) { + DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", + engine->name, + port_seqno(&engine->execlist_port[0]), + port_seqno(&engine->execlist_port[1])); engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; execlists_submit_ports(engine); From b196fbc719871e2746e71ac9e62cde2262555c9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 08:45:45 +0000 Subject: [PATCH 0088/1299] drm/i915: Manipulate the Global GTT size using I915_GTT_PAGE_SIZE I incorrectly converted the exclusion of the last 4096 bytes (that avoids any potential prefetching past the end of the GTT) to PAGE_SIZE and not to I915_GTT_PAGE_SIZE as it should be. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170206084547.27921-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c567b34800cf..5cf1ac4f070b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3243,9 +3243,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) * shrink the range used by drm_mm. */ mutex_lock(&dev_priv->drm.struct_mutex); - ggtt->base.total -= PAGE_SIZE; + ggtt->base.total -= I915_GTT_PAGE_SIZE; i915_address_space_init(&ggtt->base, dev_priv, "[global]"); - ggtt->base.total += PAGE_SIZE; + ggtt->base.total += I915_GTT_PAGE_SIZE; if (!HAS_LLC(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); From 47db922fa1c8498c61cbcae4e5faba84713cae70 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 08:45:46 +0000 Subject: [PATCH 0089/1299] drm/i915: Assign I915_COLOR_UNEVICTABLE to the address space head_node The drm_mm range manager (within i915_address_space) uses a special drm_mm_node that excludes the unavailable range (beyond the end of the drm_mm). However, we play games with the global GTT to use the head_node to exclude the tail page but tell ourselves that the whole range is available. This causes an issue when we try to evict using the full range of the global GTT which is wider than the drm_mm, resulting in complete confusion and catastrophe. One way to resolve this would be to use a reserved node to exclude the guard page, or we can treat the drm_mm's head_node as our guard page and assign it the appropriate colour. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170206084547.27921-2-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5cf1ac4f070b..9f1f3bfead59 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2160,10 +2160,14 @@ static void i915_address_space_init(struct i915_address_space *vm, const char *name) { i915_gem_timeline_init(dev_priv, &vm->timeline, name); + drm_mm_init(&vm->mm, vm->start, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + list_add_tail(&vm->global_link, &dev_priv->vm_list); } From a6508ded2a6601fea903185034adc3622d94da0b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 08:45:47 +0000 Subject: [PATCH 0090/1299] drm/i915: Use page coloring to provide the guard page at the end of the GTT As we now mark the reserved hole (drm_mm.head_node) with the special UNEVICTABLE color, we can use the page coloring to avoid prefetching of the CS beyond the end of the GTT. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170206084547.27921-3-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_evict.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 19 ++++++++++++------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a43e44e18042..6d2fcfafd3c5 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -253,6 +253,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, int ret = 0; lockdep_assert_held(&vm->i915->drm.struct_mutex); + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + trace_i915_gem_evict_node(vm, target, flags); /* Retire before we search the active list. Although we have @@ -268,9 +271,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, /* Expand search to cover neighbouring guard pages (or lack!) */ if (start > vm->start) start -= I915_GTT_PAGE_SIZE; - if (end < vm->start + vm->total) - end += I915_GTT_PAGE_SIZE; + + /* Always look at the page afterwards to avoid the end-of-GTT */ + end += I915_GTT_PAGE_SIZE; } + GEM_BUG_ON(start >= end); drm_mm_for_each_node_in_range(node, &vm->mm, start, end) { /* If we find any non-objects (!vma), we cannot evict them */ @@ -279,6 +284,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + GEM_BUG_ON(!node->allocated); vma = container_of(node, typeof(*vma), node); /* If we are using coloring to insert guard pages between diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9f1f3bfead59..850d40ec77af 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2718,11 +2718,16 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *start, u64 *end) { - if (node->color != color) + if (node->allocated && node->color != color) *start += I915_GTT_PAGE_SIZE; + /* Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ node = list_next_entry(node, node_list); - if (node->allocated && node->color != color) + if (node->color != color) *end -= I915_GTT_PAGE_SIZE; } @@ -3243,14 +3248,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->vm_list); - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm. + /* Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - ggtt->base.total -= I915_GTT_PAGE_SIZE; i915_address_space_init(&ggtt->base, dev_priv, "[global]"); - ggtt->base.total += I915_GTT_PAGE_SIZE; - if (!HAS_LLC(dev_priv)) + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); From 7fff8126d9cc902b2636d05d5d34894a75174993 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:18 +0200 Subject: [PATCH 0091/1299] drm/i915/gen9+: Enable hotplug detection early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For LSPCON resume time initialization we need to sample the corresponding pin's HPD level, but this is only available when HPD detection is enabled. Currently we enable detection only when enabling HPD interrupts which is too late, so bring the enabling of detection earlier. This is needed by the next patch. Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Cc: # v4.9+ Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 71 +++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 059d66b46e1a..8b868405e99b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3129,24 +3129,33 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } +static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + /* Enable digital hotplug on the PCH */ + hotplug = I915_READ(PCH_PORT_HOTPLUG); + hotplug |= PORTA_HOTPLUG_ENABLE | + PORTB_HOTPLUG_ENABLE | + PORTC_HOTPLUG_ENABLE | + PORTD_HOTPLUG_ENABLE; + I915_WRITE(PCH_PORT_HOTPLUG, hotplug); + + hotplug = I915_READ(PCH_PORT_HOTPLUG2); + hotplug |= PORTE_HOTPLUG_ENABLE; + I915_WRITE(PCH_PORT_HOTPLUG2, hotplug); +} + static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; + u32 hotplug_irqs, enabled_irqs; hotplug_irqs = SDE_HOTPLUG_MASK_SPT; enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - /* Enable digital hotplug on the PCH */ - hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug |= PORTD_HOTPLUG_ENABLE | PORTC_HOTPLUG_ENABLE | - PORTB_HOTPLUG_ENABLE | PORTA_HOTPLUG_ENABLE; - I915_WRITE(PCH_PORT_HOTPLUG, hotplug); - - hotplug = I915_READ(PCH_PORT_HOTPLUG2); - hotplug |= PORTE_HOTPLUG_ENABLE; - I915_WRITE(PCH_PORT_HOTPLUG2, hotplug); + spt_hpd_detection_setup(dev_priv); } static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) @@ -3183,18 +3192,15 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) ibx_hpd_irq_setup(dev_priv); } -static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void __bxt_hpd_detection_setup(struct drm_i915_private *dev_priv, + u32 enabled_irqs) { - u32 hotplug_irqs, hotplug, enabled_irqs; - - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt); - hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; - - bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); + u32 hotplug; hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug |= PORTC_HOTPLUG_ENABLE | PORTB_HOTPLUG_ENABLE | - PORTA_HOTPLUG_ENABLE; + hotplug |= PORTA_HOTPLUG_ENABLE | + PORTB_HOTPLUG_ENABLE | + PORTC_HOTPLUG_ENABLE; DRM_DEBUG_KMS("Invert bit setting: hp_ctl:%x hp_port:%x\n", hotplug, enabled_irqs); @@ -3204,7 +3210,6 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) * For BXT invert bit has to be set based on AOB design * for HPD detection logic, update it based on VBT fields. */ - if ((enabled_irqs & BXT_DE_PORT_HP_DDIA) && intel_bios_is_port_hpd_inverted(dev_priv, PORT_A)) hotplug |= BXT_DDIA_HPD_INVERT; @@ -3218,6 +3223,23 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } +static void bxt_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + __bxt_hpd_detection_setup(dev_priv, BXT_DE_PORT_HOTPLUG_MASK); +} + +static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt); + hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; + + bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); + + __bxt_hpd_detection_setup(dev_priv, enabled_irqs); +} + static void ibx_irq_postinstall(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -3233,6 +3255,12 @@ static void ibx_irq_postinstall(struct drm_device *dev) gen5_assert_iir_is_zero(dev_priv, SDEIIR); I915_WRITE(SDEIMR, ~mask); + + if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || + HAS_PCH_LPT(dev_priv)) + ; /* TODO: Enable HPD detection on older PCH platforms too */ + else + spt_hpd_detection_setup(dev_priv); } static void gen5_gt_irq_postinstall(struct drm_device *dev) @@ -3444,6 +3472,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) GEN5_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); GEN5_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); + + if (IS_GEN9_LP(dev_priv)) + bxt_hpd_detection_setup(dev_priv); } static int gen8_irq_postinstall(struct drm_device *dev) From 390b4e00241ce14ca3967c4698c8f6a158c5a674 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:19 +0200 Subject: [PATCH 0092/1299] drm/i915/lspcon: Fix resume time initialization due to unasserted HPD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During system resume time initialization the HPD level on LSPCON ports can stay low for an extended amount of time, leading to failed AUX transfers and LSPCON initialization. Fix this by waiting for HPD to get asserted. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99178 Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Cc: # v4.9+ Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-3-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- drivers/gpu/drm/i915/intel_drv.h | 2 ++ drivers/gpu/drm/i915/intel_lspcon.c | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2ab0192595c2..21924cf4c8ad 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4519,8 +4519,8 @@ static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv, * * Return %true if @port is connected, %false otherwise. */ -static bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +bool intel_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) { if (HAS_PCH_IBX(dev_priv)) return ibx_digital_port_connected(dev_priv, port); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1fb593ecc60c..424f7ea04b68 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1500,6 +1500,8 @@ bool __intel_dp_read_desc(struct intel_dp *intel_dp, bool intel_dp_read_desc(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); +bool intel_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port); /* intel_dp_aux_backlight.c */ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index f6d4e6940257..c300647ef604 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -158,6 +158,8 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; if (!lspcon->desc_valid) @@ -173,7 +175,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) if (!__intel_dp_read_desc(intel_dp, &desc)) return; - if (!memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + if (intel_digital_port_connected(dev_priv, dig_port) && + !memcmp(&intel_dp->desc, &desc, sizeof(desc))) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; From f123959594c000a43b267c75695bd3ffd0748eb1 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:20 +0200 Subject: [PATCH 0093/1299] drm/i915/lspcon: Remove DPCD compare based resume time workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This effectively reverts commit 489375c866c111f16cea93b2467ebe59c9022cc7 Author: Imre Deak Date: Mon Oct 24 19:33:31 2016 +0300 drm/i915/lspcon: Add workaround for resuming in PCON mode The workaround was added without considering that HPD is low during the failed AUX transfers the WA fixed. Since the previous patch we wait for HPD to get asserted. My tests also show that this happens _after_ the DPCD reads start to return correct values. This suggests that we don't need this WA any more, let's try to remove it to reduce the clutter. Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-4-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_lspcon.c | 17 ++--------------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 424f7ea04b68..973d7b6a7d5c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1002,7 +1002,6 @@ struct intel_dp { struct intel_lspcon { bool active; enum drm_lspcon_mode mode; - bool desc_valid; }; struct intel_digital_port { diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index c300647ef604..71cbe9c08932 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -162,21 +162,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; - if (!lspcon->desc_valid) - return; - while (1) { - struct intel_dp_desc desc; - - /* - * The w/a only applies in PCON mode and we don't expect any - * AUX errors. - */ - if (!__intel_dp_read_desc(intel_dp, &desc)) - return; - - if (intel_digital_port_connected(dev_priv, dig_port) && - !memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + if (intel_digital_port_connected(dev_priv, dig_port)) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; @@ -253,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - lspcon->desc_valid = intel_dp_read_desc(dp); + intel_dp_read_desc(dp); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; From 1a56b1a2db2d0c6422973fd4145d05135d8fb97c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:21 +0200 Subject: [PATCH 0094/1299] drm/i915/gen5+, pch: Enable hotplug detection early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be consistent with the recent change to enable hotplug detection early on GEN9 platforms do the same on all non-GMCH platforms starting from GEN5. On GMCH platforms enabling detection without interrupts isn't trivial, since AUX and HPD have a shared interrupt line. It could be done there too by using a SW interrupt mask, but I punt on that for now. Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-5-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 79 +++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8b868405e99b..9eb4c9c8a10b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3096,9 +3096,34 @@ static u32 intel_hpd_enabled_irqs(struct drm_i915_private *dev_priv, return enabled_irqs; } +static void ibx_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + /* + * Enable digital hotplug on the PCH, and configure the DP short pulse + * duration to 2ms (which is the minimum in the Display Port spec). + * The pulse duration bits are reserved on LPT+. + */ + hotplug = I915_READ(PCH_PORT_HOTPLUG); + hotplug &= ~(PORTB_PULSE_DURATION_MASK | + PORTC_PULSE_DURATION_MASK | + PORTD_PULSE_DURATION_MASK); + hotplug |= PORTB_HOTPLUG_ENABLE | PORTB_PULSE_DURATION_2ms; + hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; + /* + * When CPU and PCH are on the same package, port A + * HPD must be enabled in both north and south. + */ + if (HAS_PCH_LPT_LP(dev_priv)) + hotplug |= PORTA_HOTPLUG_ENABLE; + I915_WRITE(PCH_PORT_HOTPLUG, hotplug); +} + static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; + u32 hotplug_irqs, enabled_irqs; if (HAS_PCH_IBX(dev_priv)) { hotplug_irqs = SDE_HOTPLUG_MASK; @@ -3110,23 +3135,7 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); - /* - * Enable digital hotplug on the PCH, and configure the DP short pulse - * duration to 2ms (which is the minimum in the Display Port spec). - * The pulse duration bits are reserved on LPT+. - */ - hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug &= ~(PORTD_PULSE_DURATION_MASK|PORTC_PULSE_DURATION_MASK|PORTB_PULSE_DURATION_MASK); - hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; - hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; - hotplug |= PORTB_HOTPLUG_ENABLE | PORTB_PULSE_DURATION_2ms; - /* - * When CPU and PCH are on the same package, port A - * HPD must be enabled in both north and south. - */ - if (HAS_PCH_LPT_LP(dev_priv)) - hotplug |= PORTA_HOTPLUG_ENABLE; - I915_WRITE(PCH_PORT_HOTPLUG, hotplug); + ibx_hpd_detection_setup(dev_priv); } static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3158,9 +3167,25 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) spt_hpd_detection_setup(dev_priv); } +static void ilk_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + /* + * Enable digital hotplug on the CPU, and configure the DP short pulse + * duration to 2ms (which is the minimum in the Display Port spec) + * The pulse duration bits are reserved on HSW+. + */ + hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); + hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; + hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | + DIGITAL_PORTA_PULSE_DURATION_2ms; + I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); +} + static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; + u32 hotplug_irqs, enabled_irqs; if (INTEL_GEN(dev_priv) >= 8) { hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; @@ -3179,15 +3204,7 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } - /* - * Enable digital hotplug on the CPU, and configure the DP short pulse - * duration to 2ms (which is the minimum in the Display Port spec) - * The pulse duration bits are reserved on HSW+. - */ - hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); - hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; - hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | DIGITAL_PORTA_PULSE_DURATION_2ms; - I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); + ilk_hpd_detection_setup(dev_priv); ibx_hpd_irq_setup(dev_priv); } @@ -3258,7 +3275,7 @@ static void ibx_irq_postinstall(struct drm_device *dev) if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) - ; /* TODO: Enable HPD detection on older PCH platforms too */ + ibx_hpd_detection_setup(dev_priv); else spt_hpd_detection_setup(dev_priv); } @@ -3335,6 +3352,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev) gen5_gt_irq_postinstall(dev); + ilk_hpd_detection_setup(dev_priv); + ibx_irq_postinstall(dev); if (IS_IRONLAKE_M(dev_priv)) { @@ -3475,6 +3494,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_GEN9_LP(dev_priv)) bxt_hpd_detection_setup(dev_priv); + else if (IS_BROADWELL(dev_priv)) + ilk_hpd_detection_setup(dev_priv); } static int gen8_irq_postinstall(struct drm_device *dev) From 04da811b3d821567e7a9a8a0baf48a6c1718b582 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Mon, 6 Feb 2017 18:37:16 +0800 Subject: [PATCH 0095/1299] drm/i915: Let execlist_update_context() cover !FULL_PPGTT mode. execlist_update_context() will try to update PDPs in a context before a ELSP submission only for full PPGTT mode, while PDPs was populated during context initialization. Now the latter code path is removed. Let execlist_update_context() also cover !FULL_PPGTT mode. Fixes: 34869776c76b ("drm/i915: check ppgtt validity when init reg state") Cc: Tvrtko Ursulin Cc: Michal Winiarski Cc: Michel Thierry Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Zhenyu Wang Cc: Zhiyuan Lv Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/1486377436-15380-1-git-send-email-zhi.a.wang@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 754f77c394fb..15b8a132b8e0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -321,7 +321,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct drm_i915_gem_request *rq) { struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; - struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; + struct i915_hw_ppgtt *ppgtt = + rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; reg_state[CTX_RING_TAIL+1] = rq->tail; From eca56a35111c9e6663fbcd7dc37bcc572367efa3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 17:05:01 +0000 Subject: [PATCH 0096/1299] drm/i915: Mark the end of intel_ring_begin() and check in intel_ring_advance() It is required that the caller declare the exact number of dwords they wish to write into the ring. This is required for two reasons, we need to allocate sufficient space for the entire command packet and we need to be sure that the contents are completely written to avoid executing stale data. The current interface requires for any bug to be caught in review, the reader has to carefully count the number of intel_ring_emit() between intel_ring_begin() and intel_ring_advance(). If we record the end of the packet of each intel_ring_begin() we can also have CI check for us. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170206170502.30944-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.h | 9 +++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 3 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index a585d47c420a..412fa2794cbb 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,9 +28,18 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) + +#define GEM_BUG_ONLY(expr) expr +#define GEM_BUG_ONLY_DECLARE(var) var +#define GEM_BUG_ONLY_ON(expr) GEM_BUG_ON(expr) + #else #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) + +#define GEM_BUG_ONLY(expr) do { } while (0) +#define GEM_BUG_ONLY_DECLARE(var) +#define GEM_BUG_ONLY_ON(expr) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d32cbba25d98..383083ef2210 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2238,6 +2238,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= bytes; GEM_BUG_ON(ring->space < 0); + GEM_BUG_ONLY(ring->advance = ring->tail + bytes); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b9c15cd40fbf..2c6d3655985e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -144,6 +144,8 @@ struct intel_ring { u32 head; u32 tail; + GEM_BUG_ONLY_DECLARE(u32 advance); + int space; int size; int effective_size; @@ -516,6 +518,7 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ + GEM_BUG_ONLY_ON(ring->tail != ring->advance); } static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) From 2ffe80aa442461eb2fa3cd4c5dda81832e5dd291 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 17:05:02 +0000 Subject: [PATCH 0097/1299] drm/i915: Avoid unguarded reads from the request pointer In commit 86aa7e760a67 ("drm/i915: Assert that the context-switch completion matches our context") I added a read to the irq tasklet handler that compared the on-chip status with that of our sw tracking, using an unguarded read of the request pointer to get the context and beyond. Whilst we hold a reference to the request, we do not hold anything on the context and if we are unlucky it may be reaped from a second thread retiring the request (since it may retire the request as soon as the breadcrumb is complete, even before we finish processing the context switch) as we try to read from the context pointer. Avoid the racy read from underneath the request by storing the expected result in the execlist_port[]. v2: Include commentary about port[].request being unprotected. Fixes: 86aa7e760a67 ("drm/i915: Assert that the context-switch completion matches our context") Reported-by: Mika Kuoppala Testcase: igt/gem_ctx_create Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170206170502.30944-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 15b8a132b8e0..df8e6f74dc7e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -351,6 +351,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); + GEM_BUG_ONLY(port[0].context_id = upper_32_bits(desc[0])); port[0].count++; if (port[1].request) { @@ -358,6 +359,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[1].request, INTEL_CONTEXT_SCHEDULE_IN); desc[1] = execlists_update_context(port[1].request); + GEM_BUG_ONLY(port[1].context_id = upper_32_bits(desc[1])); port[1].count = 1; } else { desc[1] = 0; @@ -560,13 +562,29 @@ static void intel_lrc_irq_handler(unsigned long data) unsigned int idx = ++head % GEN8_CSB_ENTRIES; unsigned int status = readl(buf + 2 * idx); + /* We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; /* Check the context/desc id for this event matches */ - GEM_BUG_ON(readl(buf + 2 * idx + 1) != - upper_32_bits(intel_lr_context_descriptor(port[0].request->ctx, - engine))); + GEM_BUG_ONLY_ON(readl(buf + 2 * idx + 1) != + port[0].context_id); GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2c6d3655985e..896838ca502c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -381,6 +381,7 @@ struct intel_engine_cs { struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; + GEM_BUG_ONLY_DECLARE(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; struct rb_node *execlist_first; From b8cf691e28fa48f36f3a318385e1edc9a0a291fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:05 +0000 Subject: [PATCH 0098/1299] drm/i915: Generate i915_params {} using a macro I want to print the struct from the error state and so would like to use the existing struct definition as the template ala DEV_INFO* v2: Use MEMBER() rather than p(). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Jani Nikula Acked-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_params.h | 81 ++++++++++++++++-------------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 8e433de04679..9a8c60342a82 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -27,46 +27,51 @@ #include /* for __read_mostly */ +#define I915_PARAMS_FOR_EACH(func) \ + func(int, modeset); \ + func(int, panel_ignore_lid); \ + func(int, semaphores); \ + func(int, lvds_channel_mode); \ + func(int, panel_use_ssc); \ + func(int, vbt_sdvo_panel_type); \ + func(int, enable_rc6); \ + func(int, enable_dc); \ + func(int, enable_fbc); \ + func(int, enable_ppgtt); \ + func(int, enable_execlists); \ + func(int, enable_psr); \ + func(unsigned int, alpha_support); \ + func(int, disable_power_well); \ + func(int, enable_ips); \ + func(int, invert_brightness); \ + func(int, enable_guc_loading); \ + func(int, enable_guc_submission); \ + func(int, guc_log_level); \ + func(int, use_mmio_flip); \ + func(int, mmio_debug); \ + func(int, edp_vswing); \ + func(unsigned int, inject_load_failure); \ + /* leave bools at the end to not create holes */ \ + func(bool, enable_cmd_parser); \ + func(bool, enable_hangcheck); \ + func(bool, fastboot); \ + func(bool, prefault_disable); \ + func(bool, load_detect_test); \ + func(bool, force_reset_modeset_test); \ + func(bool, reset); \ + func(bool, error_capture); \ + func(bool, disable_display); \ + func(bool, verbose_state_checks); \ + func(bool, nuclear_pageflip); \ + func(bool, enable_dp_mst); \ + func(bool, enable_dpcd_backlight); \ + func(bool, enable_gvt) + +#define MEMBER(T, member) T member struct i915_params { - int modeset; - int panel_ignore_lid; - int semaphores; - int lvds_channel_mode; - int panel_use_ssc; - int vbt_sdvo_panel_type; - int enable_rc6; - int enable_dc; - int enable_fbc; - int enable_ppgtt; - int enable_execlists; - int enable_psr; - unsigned int alpha_support; - int disable_power_well; - int enable_ips; - int invert_brightness; - int enable_guc_loading; - int enable_guc_submission; - int guc_log_level; - int use_mmio_flip; - int mmio_debug; - int edp_vswing; - unsigned int inject_load_failure; - /* leave bools at the end to not create holes */ - bool enable_cmd_parser; - bool enable_hangcheck; - bool fastboot; - bool prefault_disable; - bool load_detect_test; - bool force_reset_modeset_test; - bool reset; - bool error_capture; - bool disable_display; - bool verbose_state_checks; - bool nuclear_pageflip; - bool enable_dp_mst; - bool enable_dpcd_backlight; - bool enable_gvt; + I915_PARAMS_FOR_EACH(MEMBER); }; +#undef MEMBER extern struct i915_params i915 __read_mostly; From 1a2010ca52f951b7a9e0cadb92d4bbcee643194c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:06 +0000 Subject: [PATCH 0099/1299] drm/i915: Use bool i915_param.alpha_support The alpha_support module option can only take one of two values, so assign it to a boolean type. The only advantage is in pretty printing via /sys/module/i915/parameters/alpha_support and elsewhere. Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Daniel Vetter Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_params.c | 2 +- drivers/gpu/drm/i915/i915_params.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 0e280fbd52f1..c2679fa7ed11 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -145,7 +145,7 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR " "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " "Default: -1 (use per-chip default)"); -module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400); +module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400); MODULE_PARM_DESC(alpha_support, "Enable alpha quality driver support for latest hardware. " "See also CONFIG_DRM_I915_ALPHA_SUPPORT."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 9a8c60342a82..55d47eea172e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -40,7 +40,6 @@ func(int, enable_ppgtt); \ func(int, enable_execlists); \ func(int, enable_psr); \ - func(unsigned int, alpha_support); \ func(int, disable_power_well); \ func(int, enable_ips); \ func(int, invert_brightness); \ @@ -52,6 +51,7 @@ func(int, edp_vswing); \ func(unsigned int, inject_load_failure); \ /* leave bools at the end to not create holes */ \ + func(bool, alpha_support); \ func(bool, enable_cmd_parser); \ func(bool, enable_hangcheck); \ func(bool, fastboot); \ From 642c8a72533f26b1614d9f11361947f368fb3e57 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:07 +0000 Subject: [PATCH 0100/1299] drm/i915: Capture module parameters for the GPU error state They include useful material such as what mode the VM address space is running in, what submission mode, extra quirks, etc. v2: Undef the right macro, use type specific pretty printers v3: Use strcmp(TYPENAME) rather than creating per-type pretty printers v4: Use __always_inline to force GCC to eliminate the calls to strcmp and generate the right call to seq_printf for each parameter. v5: With the strcmp elimination, we can now use BUILD_BUG to ensure there are no unhandled types, also use __builtin_strcmp to make it look even more magic. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Mika Kuoppala Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 42 ++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1e0d9561fd67..6006694bffb5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -904,6 +904,7 @@ struct drm_i915_error_state { u32 reset_count; u32 suspend_count; struct intel_device_info device_info; + struct i915_params params; /* Generic register state */ u32 eir; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5283fe815a4d..c28ccfef84ab 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -546,6 +546,29 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, #undef PRINT_FLAG } +static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + err_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else + BUILD_BUG(); +} + +static void err_print_params(struct drm_i915_error_state_buf *m, + const struct i915_params *p) +{ +#define PRINT(T, x) err_print_param(m, #x, #T, &p->x); + I915_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_error_state_file_priv *error_priv) { @@ -568,7 +591,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->boottime.tv_sec, error->boottime.tv_usec); err_printf(m, "Uptime: %ld s %ld us\n", error->uptime.tv_sec, error->uptime.tv_usec); - err_print_capabilities(m, &error->device_info); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && @@ -588,6 +610,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "PCI Subsystem: %04x:%04x\n", pdev->subsystem_vendor, pdev->subsystem_device); + err_printf(m, "IOMMU enabled?: %d\n", error->iommu); if (HAS_CSR(dev_priv)) { @@ -730,6 +753,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (error->display) intel_display_print_error_state(m, dev_priv, error->display); + err_print_capabilities(m, &error->device_info); + err_print_params(m, &error->params); + out: if (m->bytes == 0 && m->err) return m->err; @@ -1587,6 +1613,14 @@ static int capture(void *data) { struct drm_i915_error_state *error = data; + do_gettimeofday(&error->time); + error->boottime = ktime_to_timeval(ktime_get_boottime()); + error->uptime = + ktime_to_timeval(ktime_sub(ktime_get(), + error->i915->gt.last_init_time)); + + error->params = i915; + i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); i915_gem_record_fences(error->i915, error); @@ -1595,12 +1629,6 @@ static int capture(void *data) i915_capture_pinned_buffers(error->i915, error); i915_gem_capture_guc_log_buffer(error->i915, error); - do_gettimeofday(&error->time); - error->boottime = ktime_to_timeval(ktime_get_boottime()); - error->uptime = - ktime_to_timeval(ktime_sub(ktime_get(), - error->i915->gt.last_init_time)); - error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); From 418e3cd8005121601facd46dfc452ef9e40894f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:08 +0000 Subject: [PATCH 0101/1299] drm/i915: Show the current i915_params in debugfs/i915_capabilites Alongside the hw capabilities, it is useful to know which of those have been overridden by the user setting module parameters. v2: Use __always_inline and BUILD_BUG magic Signed-off-by: Chris Wilson Acked-by: Jani Nikula Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3ae06568df7b..152f23db4ec2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -61,6 +61,21 @@ drm_add_fake_info_node(struct drm_minor *minor, return 0; } +static __always_inline void seq_print_param(struct seq_file *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else + BUILD_BUG(); +} + static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -69,10 +84,17 @@ static int i915_capabilities(struct seq_file *m, void *data) seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + #define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG + kernel_param_lock(THIS_MODULE); +#define PRINT_PARAM(T, x) seq_print_param(m, #x, #T, &i915.x); + I915_PARAMS_FOR_EACH(PRINT_PARAM); +#undef PRINT_PARAM + kernel_param_unlock(THIS_MODULE); + return 0; } From db61ffe3a71c697aaa91c42c862a5f7557a0e562 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 31 Jan 2017 14:36:07 -0200 Subject: [PATCH 0102/1299] random: move random_min_urandom_seed into CONFIG_SYSCTL ifdef block Building arm allnodefconfig causes the following build warning: drivers/char/random.c:318:12: warning: 'random_min_urandom_seed' defined but not used [-Wunused-variable] Fix the warning by moving 'random_min_urandom_seed' declaration inside the CONFIG_SYSCTL ifdef block, where it is actually used. While at it, remove the comment prior to the variable declaration. Signed-off-by: Fabio Estevam Signed-off-by: Theodore Ts'o --- drivers/char/random.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/char/random.c b/drivers/char/random.c index 066ae125f2c8..0ab024918907 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -312,11 +312,6 @@ static int random_read_wakeup_bits = 64; */ static int random_write_wakeup_bits = 28 * OUTPUT_POOL_WORDS; -/* - * Variable is currently unused by left for user space compatibility. - */ -static int random_min_urandom_seed = 60; - /* * Originally, we used a primitive polynomial of degree .poolwords * over GF(2). The taps for various sizes are defined below. They @@ -1886,6 +1881,7 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, static int min_read_thresh = 8, min_write_thresh; static int max_read_thresh = OUTPUT_POOL_WORDS * 32; static int max_write_thresh = INPUT_POOL_WORDS * 32; +static int random_min_urandom_seed = 60; static char sysctl_bootid[16]; /* From e0ec3ec698851a6c97a12d696407b3ff77700c23 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 12:57:17 +0000 Subject: [PATCH 0103/1299] drm/i915: Remove overzealous fence warn on runtime suspend The goal of the WARN was to catch when we are still actively using the fence as we go into the runtime suspend. However, the reg->pin_count is too coarse as it does not distinguish between exclusive ownership of the fence register from activity. I've not improved on the WARN, nor have we captured this WARN in an exact igt, but it is showing up regularly in the wild: [ 1915.935332] WARNING: CPU: 1 PID: 10861 at drivers/gpu/drm/i915/i915_gem.c:2022 i915_gem_runtime_suspend+0x116/0x130 [i915] [ 1915.935383] WARN_ON(reg->pin_count)[ 1915.935399] Modules linked in: snd_hda_intel i915 drm_kms_helper vgem netconsole scsi_transport_iscsi fuse vfat fat x86_pkg_temp_thermal coretemp intel_cstate intel_uncore snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mei_me mei serio_raw intel_rapl_perf intel_pch_thermal soundcore wmi acpi_pad i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops drm r8169 mii video [last unloaded: drm_kms_helper] [ 1915.935785] CPU: 1 PID: 10861 Comm: kworker/1:0 Tainted: G U W 4.9.0-rc5+ #170 [ 1915.935799] Hardware name: LENOVO 80MX/Lenovo E31-80, BIOS DCCN34WW(V2.03) 12/01/2015 [ 1915.935822] Workqueue: pm pm_runtime_work [ 1915.935845] ffffc900044fbbf0 ffffffffac3220bc ffffc900044fbc40 0000000000000000 [ 1915.935890] ffffc900044fbc30 ffffffffac059bcb 000007e6044fbc60 ffff8801626e3198 [ 1915.935937] ffff8801626e0000 0000000000000002 ffffffffc05e5d4e 0000000000000000 [ 1915.935985] Call Trace: [ 1915.936013] [] dump_stack+0x4f/0x73 [ 1915.936038] [] __warn+0xcb/0xf0 [ 1915.936060] [] warn_slowpath_fmt+0x5f/0x80 [ 1915.936158] [] i915_gem_runtime_suspend+0x116/0x130 [i915] [ 1915.936251] [] intel_runtime_suspend+0x64/0x280 [i915] [ 1915.936277] [] ? dequeue_entity+0x241/0xbc0 [ 1915.936298] [] pci_pm_runtime_suspend+0x55/0x180 [ 1915.936317] [] ? pci_pm_runtime_resume+0xa0/0xa0 [ 1915.936339] [] __rpm_callback+0x32/0x70 [ 1915.936356] [] rpm_callback+0x24/0x80 [ 1915.936375] [] ? pci_pm_runtime_resume+0xa0/0xa0 [ 1915.936392] [] rpm_suspend+0x12d/0x680 [ 1915.936415] [] ? _raw_spin_unlock_irq+0x17/0x30 [ 1915.936435] [] ? finish_task_switch+0x88/0x220 [ 1915.936455] [] pm_runtime_work+0x6f/0xb0 [ 1915.936477] [] process_one_work+0x1f3/0x4d0 [ 1915.936501] [] worker_thread+0x48/0x4e0 [ 1915.936523] [] ? process_one_work+0x4d0/0x4d0 [ 1915.936542] [] ? process_one_work+0x4d0/0x4d0 [ 1915.936559] [] kthread+0xd9/0xf0 [ 1915.936580] [] ? kthread_park+0x60/0x60 [ 1915.936600] [] ret_from_fork+0x22/0x30 In the case the register is pinned, it should be present and we will need to invalidate them to be restored upon resume as we cannot expect the owner of the pin to call get_fence prior to use after resume. Fixes: 7c108fd8feac ("drm/i915: Move fence cancellation to runtime suspend") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98804 Reported-by: Lionel Landwerlin Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Imre Deak Cc: Jani Nikula Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170203125717.8431-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 276676b2bbc8..78191e00a00c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2475,6 +2475,7 @@ static int intel_runtime_resume(struct device *kdev) * we can do is to hope that things will still work (and disable RPM). */ i915_gem_init_swizzling(dev_priv); + i915_gem_restore_fences(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f3b5cb497921..495f1176d45e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2011,8 +2011,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - if (WARN_ON(reg->pin_count)) - continue; + /* Ideally we want to assert that the fence register is not + * live at this point (i.e. that no piece of code will be + * trying to write through fence + GTT, as that both violates + * our tracking of activity and associated locking/barriers, + * but also is illegal given that the hw is powered down). + * + * Previously we used reg->pin_count as a "liveness" indicator. + * That is not sufficient, and we need a more fine-grained + * tool if we want to have a sanity check here. + */ if (!reg->vma) continue; From 6248017ae5301ccb51cda92c4117b573b9aff6bb Mon Sep 17 00:00:00 2001 From: Arthur Heymans Date: Wed, 1 Feb 2017 00:50:26 +0100 Subject: [PATCH 0104/1299] drm/i915: Get correct display clock on 945gm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is according to Mobile Intel® 945 Express Chipset Family datasheet. Signed-off-by: Arthur Heymans Link: http://patchwork.freedesktop.org/patch/msgid/20170131235026.26003-1-arthur@aheymans.xyz Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a9538ae81673..4b5761d7716c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -119,7 +119,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GCFGC 0xf0 /* 915+ only */ #define GC_LOW_FREQUENCY_ENABLE (1 << 7) #define GC_DISPLAY_CLOCK_190_200_MHZ (0 << 4) -#define GC_DISPLAY_CLOCK_333_MHZ (4 << 4) +#define GC_DISPLAY_CLOCK_333_320_MHZ (4 << 4) #define GC_DISPLAY_CLOCK_267_MHZ_PNV (0 << 4) #define GC_DISPLAY_CLOCK_333_MHZ_PNV (1 << 4) #define GC_DISPLAY_CLOCK_444_MHZ_PNV (2 << 4) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 45e587496886..d0d042495dc7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7411,6 +7411,26 @@ static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) return 400000; } +static int i945gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133333; + else { + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + return 320000; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 200000; + } + } +} + static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 333333; @@ -7457,7 +7477,7 @@ static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) return 133333; else { switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_MHZ: + case GC_DISPLAY_CLOCK_333_320_MHZ: return 333333; default: case GC_DISPLAY_CLOCK_190_200_MHZ: @@ -16268,9 +16288,12 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) else if (IS_I915G(dev_priv)) dev_priv->display.get_display_clock_speed = i915_get_display_clock_speed; - else if (IS_I945GM(dev_priv) || IS_I845G(dev_priv)) + else if (IS_I845G(dev_priv)) dev_priv->display.get_display_clock_speed = i9xx_misc_get_display_clock_speed; + else if (IS_I945GM(dev_priv)) + dev_priv->display.get_display_clock_speed = + i945gm_get_display_clock_speed; else if (IS_I915GM(dev_priv)) dev_priv->display.get_display_clock_speed = i915gm_get_display_clock_speed; From c0dcb203fb009678e5be9e7782329dcfbbf16439 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Feb 2017 15:24:37 +0000 Subject: [PATCH 0105/1299] drm/i915: Restore context and pd for ringbuffer submission after reset Following a reset, the context and page directory registers are lost. However, the queue of requests that we resubmit after the reset may depend upon them - the registers are restored from a context image, but that restore may be inhibited and may simply be absent from the request if it was in the middle of a sequence using the same context. If we prime the CCID/PD registers with the first request in the queue (even for the hung request), we prevent invalid memory access for the following requests (and continually hung engines). v2: Magic BIT(8), reserved for future use but still appears unused. v3: Some commentary on handling innocent vs guilty requests v4: Add a wait for PD_BASE fetch. The reload appears to be instant on my Ivybridge, but this bit probably exists for a reason. Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170207152437.4252-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 18 +++----- drivers/gpu/drm/i915/i915_reg.h | 6 ++- drivers/gpu/drm/i915/intel_lrc.c | 16 ++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 58 +++++++++++++++++++++++-- 4 files changed, 81 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 495f1176d45e..d1841bc1cb50 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2745,21 +2745,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->irq_seqno_barrier(engine); request = i915_gem_find_active_request(engine); - if (!request) - return; + if (request && i915_gem_reset_request(request)) { + DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", + engine->name, request->global_seqno); - if (!i915_gem_reset_request(request)) - return; - - DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", - engine->name, request->global_seqno); + /* If this context is now banned, skip all pending requests. */ + if (i915_gem_context_is_banned(request->ctx)) + engine_skip_context(request); + } /* Setup the CS to resume from the breadcrumb of the hung request */ engine->reset_hw(engine, request); - - /* If this context is now banned, skip all of its pending requests. */ - if (i915_gem_context_is_banned(request->ctx)) - engine_skip_context(request); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4b5761d7716c..bd2ff1f87f32 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3307,8 +3307,10 @@ enum skl_disp_power_wells { /* * Logical Context regs */ -#define CCID _MMIO(0x2180) -#define CCID_EN (1<<0) +#define CCID _MMIO(0x2180) +#define CCID_EN BIT(0) +#define CCID_EXTENDED_STATE_RESTORE BIT(2) +#define CCID_EXTENDED_STATE_SAVE BIT(3) /* * Notes on SNB/IVB/VLV context size: * - Power context is saved elsewhere (LLC or stolen) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index df8e6f74dc7e..e42990b56fa8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1352,7 +1352,20 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { struct execlist_port *port = engine->execlist_port; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce; + + /* If the request was innocent, we leave the request in the ELSP + * and will try to replay it on restarting. The context image may + * have been corrupted by the reset, in which case we may have + * to service a new GPU hang, but more likely we can continue on + * without impact. + * + * If the request was guilty, we presume the context is corrupt + * and have to at least restore the RING register in the context + * image back to the expected values to skip over the guilty request. + */ + if (!request || request->fence.error != -EIO) + return; /* We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, @@ -1361,6 +1374,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ + ce = &request->ctx->engine[engine->id]; execlists_init_reg_state(ce->lrc_reg_state, request->ctx, engine, ce->ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 383083ef2210..d3d1e64f2498 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -599,10 +599,62 @@ static int init_ring_common(struct intel_engine_cs *engine) static void reset_ring_common(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct intel_ring *ring = request->ring; + /* Try to restore the logical GPU state to match the continuation + * of the request queue. If we skip the context/PD restore, then + * the next request may try to execute assuming that its context + * is valid and loaded on the GPU and so may try to access invalid + * memory, prompting repeated GPU hangs. + * + * If the request was guilty, we still restore the logical state + * in case the next request requires it (e.g. the aliasing ppgtt), + * but skip over the hung batch. + * + * If the request was innocent, we try to replay the request with + * the restored context. + */ + if (request) { + struct drm_i915_private *dev_priv = request->i915; + struct intel_context *ce = &request->ctx->engine[engine->id]; + struct i915_hw_ppgtt *ppgtt; - ring->head = request->postfix; - ring->last_retired_head = -1; + /* FIXME consider gen8 reset */ + + if (ce->state) { + I915_WRITE(CCID, + i915_ggtt_offset(ce->state) | + BIT(8) /* must be set! */ | + CCID_EXTENDED_STATE_SAVE | + CCID_EXTENDED_STATE_RESTORE | + CCID_EN); + } + + ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt; + if (ppgtt) { + u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; + + I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); + + /* Wait for the PD reload to complete */ + if (intel_wait_for_register(dev_priv, + RING_PP_DIR_BASE(engine), + BIT(0), 0, + 10)) + DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); + + ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); + } + + /* If the rq hung, jump to its breadcrumb and skip the batch */ + if (request->fence.error == -EIO) { + struct intel_ring *ring = request->ring; + + ring->head = request->postfix; + ring->last_retired_head = -1; + } + } else { + engine->legacy_active_context = NULL; + } } static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) From 038c95a313e4ca954ee5ab8a0c7559a646b0f462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Tue, 7 Feb 2017 20:55:59 +0100 Subject: [PATCH 0106/1299] drm/i915: Always convert incoming exec offsets to non-canonical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're using non-canonical addresses in drm_mm, and we're making sure that userspace is using canonical addressing - both in case of softpin (verifying incoming offset) and when relocating (converting to canonical when updating offset returned to userspace). Unfortunately when considering the need for relocations, we're comparing offset from userspace (in canonical form) with drm_mm node (in non-canonical form), and as a result, we end up always relocating if our offsets are in the "problematic" range. Let's always convert the offsets to avoid the performance impact of relocations. Fixes: a5f0edf63bdf ("drm/i915: Avoid writing relocs with addresses in non-canonical form") Cc: Chris Wilson Cc: Michel Thierry Reported-by: Michał Pyrzowski Signed-off-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170207195559.18798-1-michal.winiarski@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 26c26a6675df..ae31065ab708 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1185,14 +1185,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].offset != gen8_canonical_addr(exec[i].offset & PAGE_MASK)) return -EINVAL; - - /* From drm_mm perspective address space is continuous, - * so from this point we're always using non-canonical - * form internally. - */ - exec[i].offset = gen8_noncanonical_addr(exec[i].offset); } + /* From drm_mm perspective address space is continuous, + * so from this point we're always using non-canonical + * form internally. + */ + exec[i].offset = gen8_noncanonical_addr(exec[i].offset); + if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; From 3c779a49bd7ce6c9ecc35a679f28f94fe3cfaeab Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 8 Feb 2017 15:12:09 +0200 Subject: [PATCH 0107/1299] drm/i915: Avoid BIT(max) - 1 and use GENMASK(max - 1, 0) "BIT(max) - 1" will overflow when max = 32, and GCC will complain. We already have GENMASK for generating the mask, use it! v2: Majestic off by one spotted (Chris) Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index fcf81815daff..0891cc0e8626 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -234,7 +234,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = BIT(ss_max) - 1; + sseu->subslice_mask = GENMASK(ss_max - 1, 0); sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> GEN8_F2_SS_DIS_SHIFT); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index e0d9e72cf3d1..03b7391fb566 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -371,7 +371,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, return false; memcpy(save_enabled, enabled, count); - mask = BIT(count) - 1; + mask = GENMASK(count - 1, 0); conn_configured = 0; retry: for (i = 0; i < count; i++) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 66aa1bbb42f0..94df466a4801 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1249,7 +1249,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) +#define POWER_DOMAIN_MASK (GENMASK(POWER_DOMAIN_NUM - 1, 0)) static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id) From 8385c2ecd47941072bdb3900bc272553ceb957e3 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 8 Feb 2017 15:12:10 +0200 Subject: [PATCH 0108/1299] drm/i915: Use for_each_power_domain() in i915_power_domain_info() Macro seems to do exactly the same thing. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1486559530-15141-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 152f23db4ec2..6400f83d3ad1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2822,15 +2822,10 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) seq_printf(m, "%-25s %d\n", power_well->name, power_well->count); - for (power_domain = 0; power_domain < POWER_DOMAIN_NUM; - power_domain++) { - if (!(BIT(power_domain) & power_well->domains)) - continue; - + for_each_power_domain(power_domain, power_well->domains) seq_printf(m, " %-23s %d\n", intel_display_power_domain_str(power_domain), power_domains->domain_use_count[power_domain]); - } } mutex_unlock(&power_domains->lock); From fb51ff40956ddef8f7d9448265ad3edc67eb0b2b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Feb 2017 08:50:25 +0000 Subject: [PATCH 0109/1299] drm/i915/guc: Log significant events at the info level Currently to establish whether GuC firmware has been loaded or submission enabled (default DRM log level), one has to detect the absence of the message saying that the load has been skipped and infer the opposite. It is better to log the fact GuC firmware has been loaded and/or submission enabled explicitly to avoid any guesswork when looking at the logs. v2: * Log message polish. (Chris) * Future proof by reporting found firmware version. (Michal) Signed-off-by: Tvrtko Ursulin Cc: Arkadiusz Hiler Cc: Chris Wilson Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Reviewed-by: Michal Wajdeczko (v1) Link: http://patchwork.freedesktop.org/patch/msgid/1486457425-32548-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_guc_loader.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2f1cf9aea04e..8ef33d88d5a0 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -520,10 +520,6 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status), - intel_uc_fw_status_repr(guc_fw->load_status)); - intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { @@ -536,6 +532,11 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_interrupts_capture(dev_priv); } + DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", + i915.enable_guc_submission ? "submission enabled" : "loaded", + guc_fw->path, + guc_fw->major_ver_found, guc_fw->minor_ver_found); + return 0; fail: From a7d1b3f41a2d3246736816ed1b966e1904760f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Jan 2017 21:50:31 +0200 Subject: [PATCH 0110/1299] drm/i915: Store the pipe pixel rate in the crtc state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than recomputing the pipe pixel rate on demand everywhere, let's just stick the precomputed value into the crtc state. v2: Rebase due to min_pixclk[] code movement Document the new pixel_rate struct member (Ander) Combine vlv/chv with bdw+ in intel_modeset_readout_hw_state() v3: Fix typos in commit message (David) Cc: Ander Conselvan De Oliveira Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170126195031.32343-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 36 +++++++++++++++++++++------- drivers/gpu/drm/i915/intel_drv.h | 6 +++++ drivers/gpu/drm/i915/intel_fbc.c | 3 +-- drivers/gpu/drm/i915/intel_pm.c | 14 +++++------ 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d0d042495dc7..cd1248bc8753 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7184,7 +7184,7 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, * * Should measure whether using a lower cdclk w/o IPS */ - return ilk_pipe_pixel_rate(pipe_config) <= + return pipe_config->pixel_rate <= dev_priv->max_cdclk_freq * 95 / 100; } @@ -7208,6 +7208,19 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } +static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (HAS_GMCH_DISPLAY(dev_priv)) + /* FIXME calculate proper pipe pixel rate for GMCH pfit */ + crtc_state->pixel_rate = + crtc_state->base.adjusted_mode.crtc_clock; + else + crtc_state->pixel_rate = + ilk_pipe_pixel_rate(crtc_state); +} + static int intel_crtc_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -7254,6 +7267,8 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, adjusted_mode->crtc_hsync_start == adjusted_mode->crtc_hdisplay) return -EINVAL; + intel_crtc_compute_pixel_rate(pipe_config); + if (HAS_IPS(dev_priv)) hsw_compute_ips_config(crtc, pipe_config); @@ -10322,7 +10337,7 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state) continue; } - pixel_rate = ilk_pipe_pixel_rate(crtc_state); + pixel_rate = crtc_state->pixel_rate; if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, @@ -12832,9 +12847,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("adjusted mode:\n"); drm_mode_debug_printmodeline(&pipe_config->base.adjusted_mode); intel_dump_crtc_timings(&pipe_config->base.adjusted_mode); - DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d\n", + DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d, pixel rate %d\n", pipe_config->port_clock, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); + pipe_config->pipe_src_w, pipe_config->pipe_src_h, + pipe_config->pixel_rate); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n", @@ -13410,6 +13426,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, } PIPE_CONF_CHECK_I(scaler_state.scaler_id); + PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } /* BDW+ don't expose a synchronous way to read the state */ @@ -13701,6 +13718,8 @@ verify_crtc_state(struct drm_crtc *crtc, } } + intel_crtc_compute_pixel_rate(pipe_config); + if (!new_crtc_state->active) return; @@ -17177,10 +17196,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; + intel_crtc_compute_pixel_rate(crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc_state->pixel_rate; else WARN_ON(dev_priv->display.modeset_calc_cdclk); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 973d7b6a7d5c..e1dbd9aa5701 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -544,6 +544,12 @@ struct intel_crtc_state { * and get clipped at the edges. */ int pipe_src_w, pipe_src_h; + /* + * Pipe pixel rate, adjusted for + * panel fitter/pipe scaler downscaling. + */ + unsigned int pixel_rate; + /* Whether to set up the PCH/FDI. Note that we never allow sharing * between pch encoders and cpu encoders. */ bool has_pch_encoder; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 51585008fb9d..c66ba7e36289 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -742,8 +742,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cache->crtc.hsw_bdw_pixel_rate = - ilk_pipe_pixel_rate(crtc_state); + cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; cache->plane.rotation = plane_state->base.rotation; cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ec16f3d6dd2e..79cbe5736ed1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1820,12 +1820,12 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); if (!is_lp) return method1; - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1849,8 +1849,8 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1876,7 +1876,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, if (!cstate->base.active) return 0; - return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + return ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, width, cpp, mem_value); } @@ -3559,7 +3559,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst * Adjusted plane pixel rate is just the pipe's adjusted pixel rate * with additional adjustments for plane-specific scaling. */ - adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate); + adjusted_pixel_rate = cstate->pixel_rate; downscale_amount = skl_plane_downscale_amount(pstate); pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; @@ -3787,7 +3787,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (!cstate->base.active) return 0; - pixel_rate = ilk_pipe_pixel_rate(cstate); + pixel_rate = cstate->pixel_rate; if (WARN_ON(pixel_rate == 0)) return 0; From 4e841ecd4e185af2c6353f6d4ef480fb1f533d8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 20 Jan 2017 20:21:53 +0200 Subject: [PATCH 0111/1299] drm/i915: Nuke intel_mode_max_pixclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ilk_max_pixel_rate() will now give the "correct" pixel rate for all platforms, so let's rename it to intel_max_pixel_rate() and kill off intel_mode_max_pixclk(). v2: Fix typo in commit message (Ander) Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 41 ++++------------------------ 1 file changed, 6 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cd1248bc8753..07d74e8e4ce7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -122,7 +122,7 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); static void intel_modeset_setup_hw_state(struct drm_device *dev); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); -static int ilk_max_pixel_rate(struct drm_atomic_state *state); +static int intel_max_pixel_rate(struct drm_atomic_state *state); static int glk_calc_cdclk(int max_pixclk); static int bxt_calc_cdclk(int max_pixclk); @@ -6566,40 +6566,11 @@ static int bxt_calc_cdclk(int max_pixclk) return 144000; } -/* Compute the max pixel clock for new configuration. */ -static int intel_mode_max_pixclk(struct drm_device *dev, - struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - unsigned max_pixclk = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - int pixclk = 0; - - if (crtc_state->enable) - pixclk = crtc_state->adjusted_mode.crtc_clock; - - intel_state->min_pixclk[i] = pixclk; - } - - for_each_pipe(dev_priv, pipe) - max_pixclk = max(intel_state->min_pixclk[pipe], max_pixclk); - - return max_pixclk; -} - static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int max_pixclk = intel_mode_max_pixclk(dev, state); + int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); @@ -6615,7 +6586,7 @@ static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = ilk_max_pixel_rate(state); + int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); int cdclk; @@ -10315,7 +10286,7 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, } /* compute the max rate for new configuration */ -static int ilk_max_pixel_rate(struct drm_atomic_state *state) +static int intel_max_pixel_rate(struct drm_atomic_state *state) { struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); @@ -10447,7 +10418,7 @@ static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = ilk_max_pixel_rate(state); + int max_pixclk = intel_max_pixel_rate(state); int cdclk; /* @@ -10483,7 +10454,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = ilk_max_pixel_rate(state); + const int max_pixclk = intel_max_pixel_rate(state); int vco = intel_state->cdclk_pll_vco; int cdclk; From c49a0d054a054cc07f9ef4982a72e55a710e87d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2017 20:31:46 +0200 Subject: [PATCH 0112/1299] drm/i915: s/get_display_clock_speed/get_cdclk/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the .get_display_clock_speed() hook to .get_cdclk(). .get_cdclk() is more specific (which clock) and it's much shorter. v2: Deal with IS_GEN9_BC() v3: Deal with i945gm_get_display_clock_speed() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183146.19420-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 98 ++++++++++--------------- drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +- 3 files changed, 41 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6006694bffb5..f41496405484 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -602,7 +602,7 @@ struct intel_limit; struct dpll; struct drm_i915_display_funcs { - int (*get_display_clock_speed)(struct drm_i915_private *dev_priv); + int (*get_cdclk)(struct drm_i915_private *dev_priv); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 07d74e8e4ce7..8f560737c309 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5873,7 +5873,7 @@ static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) static void intel_update_cdclk(struct drm_i915_private *dev_priv) { - dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev_priv); + dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", @@ -6411,8 +6411,7 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ cmd = 2; @@ -6476,8 +6475,7 @@ static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); switch (cdclk) { case 333333: @@ -7249,7 +7247,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, return 0; } -static int skylake_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int skylake_get_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl; @@ -7310,7 +7308,7 @@ static void bxt_de_pll_update(struct drm_i915_private *dev_priv) dev_priv->cdclk_pll.ref; } -static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int broxton_get_cdclk(struct drm_i915_private *dev_priv) { u32 divider; int div, vco; @@ -7345,7 +7343,7 @@ static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) return DIV_ROUND_CLOSEST(vco, div); } -static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int broadwell_get_cdclk(struct drm_i915_private *dev_priv) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; @@ -7364,7 +7362,7 @@ static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) return 675000; } -static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int haswell_get_cdclk(struct drm_i915_private *dev_priv) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; @@ -7381,23 +7379,23 @@ static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) return 540000; } -static int valleyview_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int valleyview_get_cdclk(struct drm_i915_private *dev_priv) { return vlv_get_cck_clock_hpll(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL); } -static int ilk_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int ilk_get_cdclk(struct drm_i915_private *dev_priv) { return 450000; } -static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i945_get_cdclk(struct drm_i915_private *dev_priv) { return 400000; } -static int i945gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -7417,17 +7415,17 @@ static int i945gm_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i915_get_cdclk(struct drm_i915_private *dev_priv) { return 333333; } -static int i9xx_misc_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i9xx_misc_get_cdclk(struct drm_i915_private *dev_priv) { return 200000; } -static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int pnv_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -7452,7 +7450,7 @@ static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -7472,12 +7470,12 @@ static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i865_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i865_get_cdclk(struct drm_i915_private *dev_priv) { return 266667; } -static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i85x_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 hpllcc = 0; @@ -7515,7 +7513,7 @@ static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) return 0; } -static int i830_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i830_get_cdclk(struct drm_i915_private *dev_priv) { return 133333; } @@ -7588,7 +7586,7 @@ static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) return vco; } -static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int gm45_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); @@ -7611,7 +7609,7 @@ static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i965gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 16, 10, 8 }; @@ -7649,7 +7647,7 @@ static int i965gm_get_display_clock_speed(struct drm_i915_private *dev_priv) return 200000; } -static int g33_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int g33_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; @@ -16242,61 +16240,43 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) /* Returns the core display clock speed */ if (IS_GEN9_BC(dev_priv)) - dev_priv->display.get_display_clock_speed = - skylake_get_display_clock_speed; + dev_priv->display.get_cdclk = skylake_get_cdclk; else if (IS_GEN9_LP(dev_priv)) - dev_priv->display.get_display_clock_speed = - broxton_get_display_clock_speed; + dev_priv->display.get_cdclk = broxton_get_cdclk; else if (IS_BROADWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - broadwell_get_display_clock_speed; + dev_priv->display.get_cdclk = broadwell_get_cdclk; else if (IS_HASWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - haswell_get_display_clock_speed; + dev_priv->display.get_cdclk = haswell_get_cdclk; else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - valleyview_get_display_clock_speed; + dev_priv->display.get_cdclk = valleyview_get_cdclk; else if (IS_GEN5(dev_priv)) - dev_priv->display.get_display_clock_speed = - ilk_get_display_clock_speed; + dev_priv->display.get_cdclk = ilk_get_cdclk; else if (IS_I945G(dev_priv) || IS_I965G(dev_priv) || IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_display_clock_speed = - i945_get_display_clock_speed; + dev_priv->display.get_cdclk = i945_get_cdclk; else if (IS_GM45(dev_priv)) - dev_priv->display.get_display_clock_speed = - gm45_get_display_clock_speed; + dev_priv->display.get_cdclk = gm45_get_cdclk; else if (IS_I965GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i965gm_get_display_clock_speed; + dev_priv->display.get_cdclk = i965gm_get_cdclk; else if (IS_PINEVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - pnv_get_display_clock_speed; + dev_priv->display.get_cdclk = pnv_get_cdclk; else if (IS_G33(dev_priv) || IS_G4X(dev_priv)) - dev_priv->display.get_display_clock_speed = - g33_get_display_clock_speed; + dev_priv->display.get_cdclk = g33_get_cdclk; else if (IS_I915G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915_get_display_clock_speed; + dev_priv->display.get_cdclk = i915_get_cdclk; else if (IS_I845G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i9xx_misc_get_display_clock_speed; + dev_priv->display.get_cdclk = i9xx_misc_get_cdclk; else if (IS_I945GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i945gm_get_display_clock_speed; + dev_priv->display.get_cdclk = i945gm_get_cdclk; else if (IS_I915GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915gm_get_display_clock_speed; + dev_priv->display.get_cdclk = i915gm_get_cdclk; else if (IS_I865G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i865_get_display_clock_speed; + dev_priv->display.get_cdclk = i865_get_cdclk; else if (IS_I85X(dev_priv)) - dev_priv->display.get_display_clock_speed = - i85x_get_display_clock_speed; + dev_priv->display.get_cdclk = i85x_get_cdclk; else { /* 830 */ WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_display_clock_speed = - i830_get_display_clock_speed; + dev_priv->display.get_cdclk = i830_get_cdclk; } if (IS_GEN5(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 94df466a4801..915914f9444c 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -966,8 +966,7 @@ static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, { gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - WARN_ON(dev_priv->cdclk_freq != - dev_priv->display.get_display_clock_speed(dev_priv)); + WARN_ON(dev_priv->cdclk_freq != dev_priv->display.get_cdclk(dev_priv)); gen9_assert_dbuf_enabled(dev_priv); From 4717e8bb7341a00f44da3d1814ac53c5a2a1ce75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2017 20:32:26 +0200 Subject: [PATCH 0113/1299] drm/i915: Clean up the .get_cdclk() assignment if ladder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's clean up the mess we have in the if ladder that assigns the .get_cdclk() hooks. The grouping of the platforms by the function results in a thing that's not really legible, so let's do it the other way around and order the if ladder by platform and duplicate whatever assignments we need. To further avoid confusion with the function names let's rename them to just fixed__get_cdclk(). The other option would be to duplicate the functions entirely but it seems quite pointless to do that since each one just returns a fixed value. v2: Deal with i945gm_get_cdclk() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183226.19537-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 39 ++++++++++++++++------------ 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8f560737c309..c407cc41c0c9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7385,12 +7385,12 @@ static int valleyview_get_cdclk(struct drm_i915_private *dev_priv) CCK_DISPLAY_CLOCK_CONTROL); } -static int ilk_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 450000; } -static int i945_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 400000; } @@ -7415,12 +7415,12 @@ static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) } } -static int i915_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 333333; } -static int i9xx_misc_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 200000; } @@ -7470,7 +7470,7 @@ static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) } } -static int i865_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 266667; } @@ -7513,7 +7513,7 @@ static int i85x_get_cdclk(struct drm_i915_private *dev_priv) return 0; } -static int i830_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 133333; } @@ -16249,34 +16249,39 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.get_cdclk = haswell_get_cdclk; else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) dev_priv->display.get_cdclk = valleyview_get_cdclk; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; else if (IS_GEN5(dev_priv)) - dev_priv->display.get_cdclk = ilk_get_cdclk; - else if (IS_I945G(dev_priv) || IS_I965G(dev_priv) || - IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_cdclk = i945_get_cdclk; + dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; else if (IS_GM45(dev_priv)) dev_priv->display.get_cdclk = gm45_get_cdclk; + else if (IS_G4X(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; else if (IS_I965GM(dev_priv)) dev_priv->display.get_cdclk = i965gm_get_cdclk; + else if (IS_I965G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; else if (IS_PINEVIEW(dev_priv)) dev_priv->display.get_cdclk = pnv_get_cdclk; - else if (IS_G33(dev_priv) || IS_G4X(dev_priv)) + else if (IS_G33(dev_priv)) dev_priv->display.get_cdclk = g33_get_cdclk; - else if (IS_I915G(dev_priv)) - dev_priv->display.get_cdclk = i915_get_cdclk; - else if (IS_I845G(dev_priv)) - dev_priv->display.get_cdclk = i9xx_misc_get_cdclk; else if (IS_I945GM(dev_priv)) dev_priv->display.get_cdclk = i945gm_get_cdclk; + else if (IS_I945G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; else if (IS_I915GM(dev_priv)) dev_priv->display.get_cdclk = i915gm_get_cdclk; + else if (IS_I915G(dev_priv)) + dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; else if (IS_I865G(dev_priv)) - dev_priv->display.get_cdclk = i865_get_cdclk; + dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; else if (IS_I85X(dev_priv)) dev_priv->display.get_cdclk = i85x_get_cdclk; + else if (IS_I845G(dev_priv)) + dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; else { /* 830 */ WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_cdclk = i830_get_cdclk; + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; } if (IS_GEN5(dev_priv)) { From 7ff89ca2135814db74d7a1706b9abfb1448b8bed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2017 20:33:05 +0200 Subject: [PATCH 0114/1299] drm/i915: Move most cdclk/rawclk related code to intel_cdclk.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's try to shrink intel_display.c a bit by moving the cdclk/rawclk stuff to a new file. It's all reasonably self contained so we don't even have to add that many non-static symbols. We'll also take the opportunity to shuffle around the functions a bit to get things in a more consistent order based on the platform. v2: Add kernel-docs (Ander) v3: Deal with IS_GEN9_BC() v4: Deal with i945gm_get_cdclk() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183305.19656-1-ville.syrjala@linux.intel.com --- Documentation/gpu/i915.rst | 9 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/intel_cdclk.c | 1794 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 1697 +----------------------- drivers/gpu/drm/i915/intel_drv.h | 9 +- 5 files changed, 1816 insertions(+), 1694 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_cdclk.c diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 104296dffad1..1cd0ee518dee 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -213,6 +213,15 @@ Video BIOS Table (VBT) .. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h :internal: +Display clocks +-------------- + +.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c + :doc: CDCLK / RAWCLK + +.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c + :internal: + Display PLLs ------------ diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 74ca2e8b2494..e58fc8f1b83b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -72,6 +72,7 @@ i915-y += intel_audio.o \ intel_atomic.o \ intel_atomic_plane.o \ intel_bios.o \ + intel_cdclk.o \ intel_color.o \ intel_display.o \ intel_dpio_phy.o \ diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c new file mode 100644 index 000000000000..04a46c14c898 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -0,0 +1,1794 @@ +/* + * Copyright © 2006-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "intel_drv.h" + +/** + * DOC: CDCLK / RAWCLK + * + * The display engine uses several different clocks to do its work. There + * are two main clocks involved that aren't directly related to the actual + * pixel clock or any symbol/bit clock of the actual output port. These + * are the core display clock (CDCLK) and RAWCLK. + * + * CDCLK clocks most of the display pipe logic, and thus its frequency + * must be high enough to support the rate at which pixels are flowing + * through the pipes. Downscaling must also be accounted as that increases + * the effective pixel rate. + * + * On several platforms the CDCLK frequency can be changed dynamically + * to minimize power consumption for a given display configuration. + * Typically changes to the CDCLK frequency require all the display pipes + * to be shut down while the frequency is being changed. + * + * On SKL+ the DMC will toggle the CDCLK off/on during DC5/6 entry/exit. + * DMC will not change the active CDCLK frequency however, so that part + * will still be performed by the driver directly. + * + * RAWCLK is a fixed frequency clock, often used by various auxiliary + * blocks such as AUX CH or backlight PWM. Hence the only thing we + * really need to know about RAWCLK is its frequency so that various + * dividers can be programmed correctly. + */ + +static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 133333; +} + +static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 200000; +} + +static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 266667; +} + +static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 333333; +} + +static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 400000; +} + +static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 450000; +} + +static int i85x_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 hpllcc = 0; + + /* + * 852GM/852GMV only supports 133 MHz and the HPLLCC + * encoding is different :( + * FIXME is this the right way to detect 852GM/852GMV? + */ + if (pdev->revision == 0x1) + return 133333; + + pci_bus_read_config_word(pdev->bus, + PCI_DEVFN(0, 3), HPLLCC, &hpllcc); + + /* Assume that the hardware is in the high speed state. This + * should be the default. + */ + switch (hpllcc & GC_CLOCK_CONTROL_MASK) { + case GC_CLOCK_133_200: + case GC_CLOCK_133_200_2: + case GC_CLOCK_100_200: + return 200000; + case GC_CLOCK_166_250: + return 250000; + case GC_CLOCK_100_133: + return 133333; + case GC_CLOCK_133_266: + case GC_CLOCK_133_266_2: + case GC_CLOCK_166_266: + return 266667; + } + + /* Shouldn't happen */ + return 0; +} + +static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133333; + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + return 333333; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 190000; + } +} + +static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133333; + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + return 320000; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 200000; + } +} + +static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) +{ + static const unsigned int blb_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 6400000, + }; + static const unsigned int pnv_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 2666667, + }; + static const unsigned int cl_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 3333333, + [5] = 3566667, + [6] = 4266667, + }; + static const unsigned int elk_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + }; + static const unsigned int ctg_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 2666667, + [5] = 4266667, + }; + const unsigned int *vco_table; + unsigned int vco; + uint8_t tmp = 0; + + /* FIXME other chipsets? */ + if (IS_GM45(dev_priv)) + vco_table = ctg_vco; + else if (IS_G4X(dev_priv)) + vco_table = elk_vco; + else if (IS_I965GM(dev_priv)) + vco_table = cl_vco; + else if (IS_PINEVIEW(dev_priv)) + vco_table = pnv_vco; + else if (IS_G33(dev_priv)) + vco_table = blb_vco; + else + return 0; + + tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); + + vco = vco_table[tmp & 0x7]; + if (vco == 0) + DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); + else + DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); + + return vco; +} + +static int g33_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; + static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; + static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; + static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; + const uint8_t *div_table; + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + uint16_t tmp = 0; + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 4) & 0x7; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 4800000: + div_table = div_4800; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", + vco, tmp); + return 190476; +} + +static int pnv_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_267_MHZ_PNV: + return 266667; + case GC_DISPLAY_CLOCK_333_MHZ_PNV: + return 333333; + case GC_DISPLAY_CLOCK_444_MHZ_PNV: + return 444444; + case GC_DISPLAY_CLOCK_200_MHZ_PNV: + return 200000; + default: + DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); + case GC_DISPLAY_CLOCK_133_MHZ_PNV: + return 133333; + case GC_DISPLAY_CLOCK_167_MHZ_PNV: + return 166667; + } +} + +static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 16, 10, 8 }; + static const uint8_t div_4000[] = { 20, 12, 10 }; + static const uint8_t div_5333[] = { 24, 16, 14 }; + const uint8_t *div_table; + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + uint16_t tmp = 0; + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = ((tmp >> 8) & 0x1f) - 1; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", + vco, tmp); + return 200000; +} + +static int gm45_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + uint16_t tmp = 0; + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 12) & 0x1; + + switch (vco) { + case 2666667: + case 4000000: + case 5333333: + return cdclk_sel ? 333333 : 222222; + case 3200000: + return cdclk_sel ? 320000 : 228571; + default: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", + vco, tmp); + return 222222; + } +} + +static int hsw_get_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + return 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + return 450000; + else if (freq == LCPLL_CLK_FREQ_450) + return 450000; + else if (IS_HSW_ULT(dev_priv)) + return 337500; + else + return 540000; +} + +static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, + int max_pixclk) +{ + int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? + 333333 : 320000; + int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; + + /* + * We seem to get an unstable or solid color picture at 200MHz. + * Not sure what's wrong. For now use 200MHz only when all pipes + * are off. + */ + if (!IS_CHERRYVIEW(dev_priv) && + max_pixclk > freq_320*limit/100) + return 400000; + else if (max_pixclk > 266667*limit/100) + return freq_320; + else if (max_pixclk > 0) + return 266667; + else + return 200000; +} + +static int vlv_get_cdclk(struct drm_i915_private *dev_priv) +{ + return vlv_get_cck_clock_hpll(dev_priv, "cdclk", + CCK_DISPLAY_CLOCK_CONTROL); +} + +static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) +{ + unsigned int credits, default_credits; + + if (IS_CHERRYVIEW(dev_priv)) + default_credits = PFI_CREDIT(12); + else + default_credits = PFI_CREDIT(8); + + if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { + /* CHV suggested value is 31 or 63 */ + if (IS_CHERRYVIEW(dev_priv)) + credits = PFI_CREDIT_63; + else + credits = PFI_CREDIT(15); + } else { + credits = default_credits; + } + + /* + * WA - write default credits before re-programming + * FIXME: should we also set the resend bit here? + */ + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + default_credits); + + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + credits | PFI_CREDIT_RESEND); + + /* + * FIXME is this guaranteed to clear + * immediately or should we poll for it? + */ + WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); +} + +static void vlv_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 val, cmd; + + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); + + if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ + cmd = 2; + else if (cdclk == 266667) + cmd = 1; + else + cmd = 0; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK; + val |= (cmd << DSPFREQGUAR_SHIFT); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + mutex_lock(&dev_priv->sb_lock); + + if (cdclk == 400000) { + u32 divider; + + divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, + cdclk) - 1; + + /* adjust cdclk divider */ + val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); + val &= ~CCK_FREQUENCY_VALUES; + val |= divider; + vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); + + if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & + CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), + 50)) + DRM_ERROR("timed out waiting for CDclk change\n"); + } + + /* adjust self-refresh exit latency value */ + val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); + val &= ~0x7f; + + /* + * For high bandwidth configs, we set a higher latency in the bunit + * so that the core display fetch happens in time to avoid underruns. + */ + if (cdclk == 400000) + val |= 4500 / 250; /* 4.5 usec */ + else + val |= 3000 / 250; /* 3.0 usec */ + vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); + + mutex_unlock(&dev_priv->sb_lock); + + intel_update_cdclk(dev_priv); +} + +static void chv_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 val, cmd; + + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); + + switch (cdclk) { + case 333333: + case 320000: + case 266667: + case 200000: + break; + default: + MISSING_CASE(cdclk); + return; + } + + /* + * Specs are full of misinformation, but testing on actual + * hardware has shown that we just need to write the desired + * CCK divider into the Punit register. + */ + cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK_CHV; + val |= (cmd << DSPFREQGUAR_SHIFT_CHV); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + +static int bdw_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; +} + +static int bdw_get_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + return 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + return 450000; + else if (freq == LCPLL_CLK_FREQ_450) + return 450000; + else if (freq == LCPLL_CLK_FREQ_54O_BDW) + return 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + return 337500; + else + return 675000; +} + +static void bdw_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + uint32_t val, data; + int ret; + + if (WARN((I915_READ(LCPLL_CTL) & + (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | + LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | + LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | + LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, + "trying to change cdclk frequency with cdclk not enabled\n")) + return; + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, + BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("failed to inform pcode about cdclk change\n"); + return; + } + + val = I915_READ(LCPLL_CTL); + val |= LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us(I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE, 1)) + DRM_ERROR("Switching to FCLK failed\n"); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CLK_FREQ_MASK; + + switch (cdclk) { + case 450000: + val |= LCPLL_CLK_FREQ_450; + data = 0; + break; + case 540000: + val |= LCPLL_CLK_FREQ_54O_BDW; + data = 1; + break; + case 337500: + val |= LCPLL_CLK_FREQ_337_5_BDW; + data = 2; + break; + case 675000: + val |= LCPLL_CLK_FREQ_675_BDW; + data = 3; + break; + default: + WARN(1, "invalid cdclk frequency\n"); + return; + } + + I915_WRITE(LCPLL_CTL, val); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us((I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + DRM_ERROR("Switching back to LCPLL failed\n"); + + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); + mutex_unlock(&dev_priv->rps.hw_lock); + + I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); + + intel_update_cdclk(dev_priv); + + WARN(cdclk != dev_priv->cdclk_freq, + "cdclk requested %d kHz but got %d kHz\n", + cdclk, dev_priv->cdclk_freq); +} + +static int skl_calc_cdclk(int max_pixclk, int vco) +{ + if (vco == 8640000) { + if (max_pixclk > 540000) + return 617143; + else if (max_pixclk > 432000) + return 540000; + else if (max_pixclk > 308571) + return 432000; + else + return 308571; + } else { + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; + } +} + +static void skl_dpll0_update(struct drm_i915_private *dev_priv) +{ + u32 val; + + dev_priv->cdclk_pll.ref = 24000; + dev_priv->cdclk_pll.vco = 0; + + val = I915_READ(LCPLL1_CTL); + if ((val & LCPLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) + return; + + val = I915_READ(DPLL_CTRL1); + + if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | + DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) + return; + + switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): + dev_priv->cdclk_pll.vco = 8100000; + break; + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): + dev_priv->cdclk_pll.vco = 8640000; + break; + default: + MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + break; + } +} + +static int skl_get_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl; + + skl_dpll0_update(dev_priv); + + if (dev_priv->cdclk_pll.vco == 0) + return dev_priv->cdclk_pll.ref; + + cdctl = I915_READ(CDCLK_CTL); + + if (dev_priv->cdclk_pll.vco == 8640000) { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + return 432000; + case CDCLK_FREQ_337_308: + return 308571; + case CDCLK_FREQ_540: + return 540000; + case CDCLK_FREQ_675_617: + return 617143; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + } + } else { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + return 450000; + case CDCLK_FREQ_337_308: + return 337500; + case CDCLK_FREQ_540: + return 540000; + case CDCLK_FREQ_675_617: + return 675000; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + } + } + + return dev_priv->cdclk_pll.ref; +} + +/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ +static int skl_cdclk_decimal(int cdclk) +{ + return DIV_ROUND_CLOSEST(cdclk - 1000, 500); +} + +static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, + int vco) +{ + bool changed = dev_priv->skl_preferred_vco_freq != vco; + + dev_priv->skl_preferred_vco_freq = vco; + + if (changed) + intel_update_max_cdclk(dev_priv); +} + +static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) +{ + int min_cdclk = skl_calc_cdclk(0, vco); + u32 val; + + WARN_ON(vco != 8100000 && vco != 8640000); + + /* select the minimum CDCLK before enabling DPLL 0 */ + val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); + I915_WRITE(CDCLK_CTL, val); + POSTING_READ(CDCLK_CTL); + + /* + * We always enable DPLL0 with the lowest link rate possible, but still + * taking into account the VCO required to operate the eDP panel at the + * desired frequency. The usual DP link rates operate with a VCO of + * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. + * The modeset code is responsible for the selection of the exact link + * rate later on, with the constraint of choosing a frequency that + * works with vco. + */ + val = I915_READ(DPLL_CTRL1); + + val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); + if (vco == 8640000) + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, + SKL_DPLL0); + else + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, + SKL_DPLL0); + + I915_WRITE(DPLL_CTRL1, val); + POSTING_READ(DPLL_CTRL1); + + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); + + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, + 5)) + DRM_ERROR("DPLL0 not locked\n"); + + dev_priv->cdclk_pll.vco = vco; + + /* We'll want to keep using the current vco from now on. */ + skl_set_preferred_cdclk_vco(dev_priv, vco); +} + +static void skl_dpll0_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, 0, + 1)) + DRM_ERROR("Couldn't disable DPLL0\n"); + + dev_priv->cdclk_pll.vco = 0; +} + +static void skl_set_cdclk(struct drm_i915_private *dev_priv, + int cdclk, int vco) +{ + u32 freq_select, pcu_ack; + int ret; + + WARN_ON((cdclk == 24000) != (vco == 0)); + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", + cdclk, vco); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); + return; + } + + /* set CDCLK_CTL */ + switch (cdclk) { + case 450000: + case 432000: + freq_select = CDCLK_FREQ_450_432; + pcu_ack = 1; + break; + case 540000: + freq_select = CDCLK_FREQ_540; + pcu_ack = 2; + break; + case 308571: + case 337500: + default: + freq_select = CDCLK_FREQ_337_308; + pcu_ack = 0; + break; + case 617143: + case 675000: + freq_select = CDCLK_FREQ_675_617; + pcu_ack = 3; + break; + } + + if (dev_priv->cdclk_pll.vco != 0 && + dev_priv->cdclk_pll.vco != vco) + skl_dpll0_disable(dev_priv); + + if (dev_priv->cdclk_pll.vco != vco) + skl_dpll0_enable(dev_priv, vco); + + I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + POSTING_READ(CDCLK_CTL); + + /* inform PCU of the change */ + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + +static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t cdctl, expected; + + /* + * check if the pre-os initialized the display + * There is SWF18 scratchpad register defined which is set by the + * pre-os which can be used by the OS drivers to check the status + */ + if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) + goto sanitize; + + intel_update_cdclk(dev_priv); + /* Is PLL enabled and locked ? */ + if (dev_priv->cdclk_pll.vco == 0 || + dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Noticed in some instances that the freq selection is correct but + * decimal part is programmed wrong from BIOS where pre-os does not + * enable display. Verify the same as well. + */ + cdctl = I915_READ(CDCLK_CTL); + expected = (cdctl & CDCLK_FREQ_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk_freq); + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk_freq = 0; + /* force full PLL disable + enable */ + dev_priv->cdclk_pll.vco = -1; +} + +/** + * skl_init_cdclk - Initialize CDCLK on SKL + * @dev_priv: i915 device + * + * Initialize CDCLK for SKL and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void skl_init_cdclk(struct drm_i915_private *dev_priv) +{ + int cdclk, vco; + + skl_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { + /* + * Use the current vco as our initial + * guess as to what the preferred vco is. + */ + if (dev_priv->skl_preferred_vco_freq == 0) + skl_set_preferred_cdclk_vco(dev_priv, + dev_priv->cdclk_pll.vco); + return; + } + + vco = dev_priv->skl_preferred_vco_freq; + if (vco == 0) + vco = 8100000; + cdclk = skl_calc_cdclk(0, vco); + + skl_set_cdclk(dev_priv, cdclk, vco); +} + +/** + * skl_uninit_cdclk - Uninitialize CDCLK on SKL + * @dev_priv: i915 device + * + * Uninitialize CDCLK for SKL and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void skl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); +} + +static int bxt_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 576000) + return 624000; + else if (max_pixclk > 384000) + return 576000; + else if (max_pixclk > 288000) + return 384000; + else if (max_pixclk > 144000) + return 288000; + else + return 144000; +} + +static int glk_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 2 * 158400) + return 316800; + else if (max_pixclk > 2 * 79200) + return 158400; + else + return 79200; +} + +static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk_pll.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 144000: + case 288000: + case 384000: + case 576000: + ratio = 60; + break; + case 624000: + ratio = 65; + break; + } + + return dev_priv->cdclk_pll.ref * ratio; +} + +static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk_pll.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 79200: + case 158400: + case 316800: + ratio = 33; + break; + } + + return dev_priv->cdclk_pll.ref * ratio; +} + +static void bxt_de_pll_update(struct drm_i915_private *dev_priv) +{ + u32 val; + + dev_priv->cdclk_pll.ref = 19200; + dev_priv->cdclk_pll.vco = 0; + + val = I915_READ(BXT_DE_PLL_ENABLE); + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) + return; + + val = I915_READ(BXT_DE_PLL_CTL); + dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * + dev_priv->cdclk_pll.ref; +} + +static int bxt_get_cdclk(struct drm_i915_private *dev_priv) +{ + u32 divider; + int div, vco; + + bxt_de_pll_update(dev_priv); + + vco = dev_priv->cdclk_pll.vco; + if (vco == 0) + return dev_priv->cdclk_pll.ref; + + divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; + + switch (divider) { + case BXT_CDCLK_CD2X_DIV_SEL_1: + div = 2; + break; + case BXT_CDCLK_CD2X_DIV_SEL_1_5: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + div = 3; + break; + case BXT_CDCLK_CD2X_DIV_SEL_2: + div = 4; + break; + case BXT_CDCLK_CD2X_DIV_SEL_4: + div = 8; + break; + default: + MISSING_CASE(divider); + return dev_priv->cdclk_pll.ref; + } + + return DIV_ROUND_CLOSEST(vco, div); +} + +static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(BXT_DE_PLL_ENABLE, 0); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, + 1)) + DRM_ERROR("timeout waiting for DE PLL unlock\n"); + + dev_priv->cdclk_pll.vco = 0; +} + +static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) +{ + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); + u32 val; + + val = I915_READ(BXT_DE_PLL_CTL); + val &= ~BXT_DE_PLL_RATIO_MASK; + val |= BXT_DE_PLL_RATIO(ratio); + I915_WRITE(BXT_DE_PLL_CTL, val); + + I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, + BXT_DE_PLL_LOCK, + BXT_DE_PLL_LOCK, + 1)) + DRM_ERROR("timeout waiting for DE PLL lock\n"); + + dev_priv->cdclk_pll.vco = vco; +} + +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +{ + u32 val, divider; + int vco, ret; + + if (IS_GEMINILAKE(dev_priv)) + vco = glk_de_pll_vco(dev_priv, cdclk); + else + vco = bxt_de_pll_vco(dev_priv, cdclk); + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", + cdclk, vco); + + /* cdclk = vco / 2 / div{1,1.5,2,4} */ + switch (DIV_ROUND_CLOSEST(vco, cdclk)) { + case 8: + divider = BXT_CDCLK_CD2X_DIV_SEL_4; + break; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; + break; + case 3: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + default: + WARN_ON(cdclk != dev_priv->cdclk_pll.ref); + WARN_ON(vco != 0); + + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + } + + /* Inform power controller of upcoming frequency change */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", + ret, cdclk); + return; + } + + if (dev_priv->cdclk_pll.vco != 0 && + dev_priv->cdclk_pll.vco != vco) + bxt_de_pll_disable(dev_priv); + + if (dev_priv->cdclk_pll.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + + val = divider | skl_cdclk_decimal(cdclk); + /* + * FIXME if only the cd2x divider needs changing, it could be done + * without shutting off the pipe (if only one pipe is active). + */ + val |= BXT_CDCLK_CD2X_PIPE_NONE; + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (cdclk >= 500000) + val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + I915_WRITE(CDCLK_CTL, val); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + DIV_ROUND_UP(cdclk, 25000)); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", + ret, cdclk); + return; + } + + intel_update_cdclk(dev_priv); +} + +static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl, expected; + + intel_update_cdclk(dev_priv); + + if (dev_priv->cdclk_pll.vco == 0 || + dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Some BIOS versions leave an incorrect decimal frequency value and + * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, + * so sanitize this register. + */ + cdctl = I915_READ(CDCLK_CTL); + /* + * Let's ignore the pipe field, since BIOS could have configured the + * dividers both synching to an active pipe, or asynchronously + * (PIPE_NONE). + */ + cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + + expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk_freq); + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (dev_priv->cdclk_freq >= 500000) + expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk_freq = 0; + + /* force full PLL disable + enable */ + dev_priv->cdclk_pll.vco = -1; +} + +/** + * bxt_init_cdclk - Initialize CDCLK on BXT + * @dev_priv: i915 device + * + * Initialize CDCLK for BXT and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void bxt_init_cdclk(struct drm_i915_private *dev_priv) +{ + int cdclk; + + bxt_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) + return; + + /* + * FIXME: + * - The initial CDCLK needs to be read from VBT. + * Need to make this change after VBT has changes for BXT. + */ + if (IS_GEMINILAKE(dev_priv)) + cdclk = glk_calc_cdclk(0); + else + cdclk = bxt_calc_cdclk(0); + + bxt_set_cdclk(dev_priv, cdclk); +} + +/** + * bxt_uninit_cdclk - Uninitialize CDCLK on BXT + * @dev_priv: i915 device + * + * Uninitialize CDCLK for BXT and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); +} + +static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, + int pixel_rate) +{ + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) + pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + + /* BSpec says "Do not use DisplayPort with CDCLK less than + * 432 MHz, audio enabled, port width x4, and link rate + * HBR2 (5.4 GHz), or else there may be audio corruption or + * screen corruption." + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->has_audio && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) + pixel_rate = max(432000, pixel_rate); + + return pixel_rate; +} + +/* compute the max rate for new configuration */ +static int intel_max_pixel_rate(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_crtc_state *crtc_state; + unsigned int max_pixel_rate = 0, i; + enum pipe pipe; + + memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, + sizeof(intel_state->min_pixclk)); + + for_each_crtc_in_state(state, crtc, cstate, i) { + int pixel_rate; + + crtc_state = to_intel_crtc_state(cstate); + if (!crtc_state->base.enable) { + intel_state->min_pixclk[i] = 0; + continue; + } + + pixel_rate = crtc_state->pixel_rate; + + if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) + pixel_rate = + bdw_adjust_min_pipe_pixel_rate(crtc_state, + pixel_rate); + + intel_state->min_pixclk[i] = pixel_rate; + } + + for_each_pipe(dev_priv, pipe) + max_pixel_rate = max(intel_state->min_pixclk[pipe], + max_pixel_rate); + + return max_pixel_rate; +} + +static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + + intel_state->cdclk = intel_state->dev_cdclk = + vlv_calc_cdclk(dev_priv, max_pixclk); + + if (!intel_state->active_crtcs) + intel_state->dev_cdclk = vlv_calc_cdclk(dev_priv, 0); + + return 0; +} + +static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = old_intel_state->dev_cdclk; + + /* + * FIXME: We can end up here with all power domains off, yet + * with a CDCLK frequency other than the minimum. To account + * for this take the PIPE-A power domain, which covers the HW + * blocks needed for the following programming. This can be + * removed once it's guaranteed that we get here either with + * the minimum CDCLK set, or the required power domains + * enabled. + */ + intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + + if (IS_CHERRYVIEW(dev_priv)) + chv_set_cdclk(dev, req_cdclk); + else + vlv_set_cdclk(dev, req_cdclk); + + vlv_program_pfi_credits(dev_priv); + + intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); +} + +static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int max_pixclk = intel_max_pixel_rate(state); + int cdclk; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = bdw_calc_cdclk(max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk = intel_state->dev_cdclk = cdclk; + if (!intel_state->active_crtcs) + intel_state->dev_cdclk = bdw_calc_cdclk(0); + + return 0; +} + +static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = old_intel_state->dev_cdclk; + + bdw_set_cdclk(dev, req_cdclk); +} + +static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + const int max_pixclk = intel_max_pixel_rate(state); + int vco = intel_state->cdclk_pll_vco; + int cdclk; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = skl_calc_cdclk(max_pixclk, vco); + + /* + * FIXME move the cdclk caclulation to + * compute_config() so we can fail gracegully. + */ + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + cdclk = dev_priv->max_cdclk_freq; + } + + intel_state->cdclk = intel_state->dev_cdclk = cdclk; + if (!intel_state->active_crtcs) + intel_state->dev_cdclk = skl_calc_cdclk(0, vco); + + return 0; +} + +static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_i915_private *dev_priv = to_i915(old_state->dev); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = intel_state->dev_cdclk; + unsigned int req_vco = intel_state->cdclk_pll_vco; + + skl_set_cdclk(dev_priv, req_cdclk, req_vco); +} + +static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk; + + if (IS_GEMINILAKE(dev_priv)) + cdclk = glk_calc_cdclk(max_pixclk); + else + cdclk = bxt_calc_cdclk(max_pixclk); + + intel_state->cdclk = intel_state->dev_cdclk = cdclk; + + if (!intel_state->active_crtcs) { + if (IS_GEMINILAKE(dev_priv)) + cdclk = glk_calc_cdclk(0); + else + cdclk = bxt_calc_cdclk(0); + + intel_state->dev_cdclk = cdclk; + } + + return 0; +} + +static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = old_intel_state->dev_cdclk; + + bxt_set_cdclk(to_i915(dev), req_cdclk); +} + +static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) +{ + int max_cdclk_freq = dev_priv->max_cdclk_freq; + + if (IS_GEMINILAKE(dev_priv)) + return 2 * max_cdclk_freq; + else if (INTEL_INFO(dev_priv)->gen >= 9 || + IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + return max_cdclk_freq; + else if (IS_CHERRYVIEW(dev_priv)) + return max_cdclk_freq*95/100; + else if (INTEL_INFO(dev_priv)->gen < 4) + return 2*max_cdclk_freq*90/100; + else + return max_cdclk_freq*90/100; +} + +/** + * intel_update_max_cdclk - Determine the maximum support CDCLK frequency + * @dev_priv: i915 device + * + * Determine the maximum CDCLK frequency the platform supports, and also + * derive the maximum dot clock frequency the maximum CDCLK frequency + * allows. + */ +void intel_update_max_cdclk(struct drm_i915_private *dev_priv) +{ + if (IS_GEN9_BC(dev_priv)) { + u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; + int max_cdclk, vco; + + vco = dev_priv->skl_preferred_vco_freq; + WARN_ON(vco != 8100000 && vco != 8640000); + + /* + * Use the lower (vco 8640) cdclk values as a + * first guess. skl_calc_cdclk() will correct it + * if the preferred vco is 8100 instead. + */ + if (limit == SKL_DFSM_CDCLK_LIMIT_675) + max_cdclk = 617143; + else if (limit == SKL_DFSM_CDCLK_LIMIT_540) + max_cdclk = 540000; + else if (limit == SKL_DFSM_CDCLK_LIMIT_450) + max_cdclk = 432000; + else + max_cdclk = 308571; + + dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 316800; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->max_cdclk_freq = 624000; + } else if (IS_BROADWELL(dev_priv)) { + /* + * FIXME with extra cooling we can allow + * 540 MHz for ULX and 675 Mhz for ULT. + * How can we know if extra cooling is + * available? PCI ID, VTB, something else? + */ + if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULX(dev_priv)) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULT(dev_priv)) + dev_priv->max_cdclk_freq = 540000; + else + dev_priv->max_cdclk_freq = 675000; + } else if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 320000; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 400000; + } else { + /* otherwise assume cdclk is fixed */ + dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; + } + + dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); + + DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", + dev_priv->max_cdclk_freq); + + DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", + dev_priv->max_dotclk_freq); +} + +/** + * intel_update_cdclk - Determine the current CDCLK frequency + * @dev_priv: i915 device + * + * Determine the current CDCLK frequency. + */ +void intel_update_cdclk(struct drm_i915_private *dev_priv) +{ + dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); + + if (INTEL_GEN(dev_priv) >= 9) + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", + dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, + dev_priv->cdclk_pll.ref); + else + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", + dev_priv->cdclk_freq); + + /* + * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): + * Programmng [sic] note: bit[9:2] should be programmed to the number + * of cdclk that generates 4MHz reference clock freq which is used to + * generate GMBus clock. This will vary with the cdclk freq. + */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + I915_WRITE(GMBUSFREQ_VLV, + DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); +} + +static int pch_rawclk(struct drm_i915_private *dev_priv) +{ + return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; +} + +static int vlv_hrawclk(struct drm_i915_private *dev_priv) +{ + /* RAWCLK_FREQ_VLV register updated from power well code */ + return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", + CCK_DISPLAY_REF_CLOCK_CONTROL); +} + +static int g4x_hrawclk(struct drm_i915_private *dev_priv) +{ + uint32_t clkcfg; + + /* hrawclock is 1/4 the FSB frequency */ + clkcfg = I915_READ(CLKCFG); + switch (clkcfg & CLKCFG_FSB_MASK) { + case CLKCFG_FSB_400: + return 100000; + case CLKCFG_FSB_533: + return 133333; + case CLKCFG_FSB_667: + return 166667; + case CLKCFG_FSB_800: + return 200000; + case CLKCFG_FSB_1067: + return 266667; + case CLKCFG_FSB_1333: + return 333333; + /* these two are just a guess; one of them might be right */ + case CLKCFG_FSB_1600: + case CLKCFG_FSB_1600_ALT: + return 400000; + default: + return 133333; + } +} + +/** + * intel_update_rawclk - Determine the current RAWCLK frequency + * @dev_priv: i915 device + * + * Determine the current RAWCLK frequency. RAWCLK is a fixed + * frequency clock so this needs to done only once. + */ +void intel_update_rawclk(struct drm_i915_private *dev_priv) +{ + if (HAS_PCH_SPLIT(dev_priv)) + dev_priv->rawclk_freq = pch_rawclk(dev_priv); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->rawclk_freq = vlv_hrawclk(dev_priv); + else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) + dev_priv->rawclk_freq = g4x_hrawclk(dev_priv); + else + /* no rawclk on other platforms, or no need to know it */ + return; + + DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); +} + +/** + * intel_init_cdclk_hooks - Initialize CDCLK related modesetting hooks + * @dev_priv: i915 device + */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) +{ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + vlv_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_BROADWELL(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + bdw_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + bdw_modeset_calc_cdclk; + } else if (IS_GEN9_LP(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + bxt_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + bxt_modeset_calc_cdclk; + } else if (IS_GEN9_BC(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + skl_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + skl_modeset_calc_cdclk; + } + + if (IS_GEN9_BC(dev_priv)) + dev_priv->display.get_cdclk = skl_get_cdclk; + else if (IS_GEN9_LP(dev_priv)) + dev_priv->display.get_cdclk = bxt_get_cdclk; + else if (IS_BROADWELL(dev_priv)) + dev_priv->display.get_cdclk = bdw_get_cdclk; + else if (IS_HASWELL(dev_priv)) + dev_priv->display.get_cdclk = hsw_get_cdclk; + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display.get_cdclk = vlv_get_cdclk; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_GEN5(dev_priv)) + dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; + else if (IS_GM45(dev_priv)) + dev_priv->display.get_cdclk = gm45_get_cdclk; + else if (IS_G4X(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I965GM(dev_priv)) + dev_priv->display.get_cdclk = i965gm_get_cdclk; + else if (IS_I965G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_PINEVIEW(dev_priv)) + dev_priv->display.get_cdclk = pnv_get_cdclk; + else if (IS_G33(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I945GM(dev_priv)) + dev_priv->display.get_cdclk = i945gm_get_cdclk; + else if (IS_I945G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_I915GM(dev_priv)) + dev_priv->display.get_cdclk = i915gm_get_cdclk; + else if (IS_I915G(dev_priv)) + dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; + else if (IS_I865G(dev_priv)) + dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; + else if (IS_I85X(dev_priv)) + dev_priv->display.get_cdclk = i85x_get_cdclk; + else if (IS_I845G(dev_priv)) + dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; + else { /* 830 */ + WARN(!IS_I830(dev_priv), + "Unknown platform. Assuming 133 MHz CDCLK\n"); + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; + } +} diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c407cc41c0c9..e996f8ec8f08 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -122,9 +122,6 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); static void intel_modeset_setup_hw_state(struct drm_device *dev); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); -static int intel_max_pixel_rate(struct drm_atomic_state *state); -static int glk_calc_cdclk(int max_pixclk); -static int bxt_calc_cdclk(int max_pixclk); struct intel_limit { struct { @@ -170,8 +167,8 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); } -static int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, - const char *name, u32 reg) +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg) { if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = valleyview_get_vco(dev_priv); @@ -180,63 +177,6 @@ static int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, dev_priv->hpll_freq); } -static int -intel_pch_rawclk(struct drm_i915_private *dev_priv) -{ - return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; -} - -static int -intel_vlv_hrawclk(struct drm_i915_private *dev_priv) -{ - /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", - CCK_DISPLAY_REF_CLOCK_CONTROL); -} - -static int -intel_g4x_hrawclk(struct drm_i915_private *dev_priv) -{ - uint32_t clkcfg; - - /* hrawclock is 1/4 the FSB frequency */ - clkcfg = I915_READ(CLKCFG); - switch (clkcfg & CLKCFG_FSB_MASK) { - case CLKCFG_FSB_400: - return 100000; - case CLKCFG_FSB_533: - return 133333; - case CLKCFG_FSB_667: - return 166667; - case CLKCFG_FSB_800: - return 200000; - case CLKCFG_FSB_1067: - return 266667; - case CLKCFG_FSB_1333: - return 333333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400000; - default: - return 133333; - } -} - -void intel_update_rawclk(struct drm_i915_private *dev_priv) -{ - if (HAS_PCH_SPLIT(dev_priv)) - dev_priv->rawclk_freq = intel_pch_rawclk(dev_priv); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_vlv_hrawclk(dev_priv); - else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_g4x_hrawclk(dev_priv); - else - return; /* no rawclk on other platforms, or no need to know it */ - - DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); -} - static void intel_update_czclk(struct drm_i915_private *dev_priv) { if (!(IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))) @@ -5791,888 +5731,6 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, intel_display_power_put(dev_priv, domain); } -static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) -{ - int max_cdclk_freq = dev_priv->max_cdclk_freq; - - if (IS_GEMINILAKE(dev_priv)) - return 2 * max_cdclk_freq; - else if (INTEL_INFO(dev_priv)->gen >= 9 || - IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - return max_cdclk_freq; - else if (IS_CHERRYVIEW(dev_priv)) - return max_cdclk_freq*95/100; - else if (INTEL_INFO(dev_priv)->gen < 4) - return 2*max_cdclk_freq*90/100; - else - return max_cdclk_freq*90/100; -} - -static int skl_calc_cdclk(int max_pixclk, int vco); - -static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) -{ - if (IS_GEN9_BC(dev_priv)) { - u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; - int max_cdclk, vco; - - vco = dev_priv->skl_preferred_vco_freq; - WARN_ON(vco != 8100000 && vco != 8640000); - - /* - * Use the lower (vco 8640) cdclk values as a - * first guess. skl_calc_cdclk() will correct it - * if the preferred vco is 8100 instead. - */ - if (limit == SKL_DFSM_CDCLK_LIMIT_675) - max_cdclk = 617143; - else if (limit == SKL_DFSM_CDCLK_LIMIT_540) - max_cdclk = 540000; - else if (limit == SKL_DFSM_CDCLK_LIMIT_450) - max_cdclk = 432000; - else - max_cdclk = 308571; - - dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 316800; - } else if (IS_BROXTON(dev_priv)) { - dev_priv->max_cdclk_freq = 624000; - } else if (IS_BROADWELL(dev_priv)) { - /* - * FIXME with extra cooling we can allow - * 540 MHz for ULX and 675 Mhz for ULT. - * How can we know if extra cooling is - * available? PCI ID, VTB, something else? - */ - if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULX(dev_priv)) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULT(dev_priv)) - dev_priv->max_cdclk_freq = 540000; - else - dev_priv->max_cdclk_freq = 675000; - } else if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 320000; - } else if (IS_VALLEYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 400000; - } else { - /* otherwise assume cdclk is fixed */ - dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; - } - - dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); - - DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", - dev_priv->max_cdclk_freq); - - DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", - dev_priv->max_dotclk_freq); -} - -static void intel_update_cdclk(struct drm_i915_private *dev_priv) -{ - dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); - - if (INTEL_GEN(dev_priv) >= 9) - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, - dev_priv->cdclk_pll.ref); - else - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", - dev_priv->cdclk_freq); - - /* - * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): - * Programmng [sic] note: bit[9:2] should be programmed to the number - * of cdclk that generates 4MHz reference clock freq which is used to - * generate GMBus clock. This will vary with the cdclk freq. - */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); -} - -/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ -static int skl_cdclk_decimal(int cdclk) -{ - return DIV_ROUND_CLOSEST(cdclk - 1000, 500); -} - -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; - - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; - - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 79200: - case 158400: - case 316800: - ratio = 33; - break; - } - - return dev_priv->cdclk_pll.ref * ratio; -} - -static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(BXT_DE_PLL_ENABLE, 0); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, - 1)) - DRM_ERROR("timeout waiting for DE PLL unlock\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) -{ - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); - u32 val; - - val = I915_READ(BXT_DE_PLL_CTL); - val &= ~BXT_DE_PLL_RATIO_MASK; - val |= BXT_DE_PLL_RATIO(ratio); - I915_WRITE(BXT_DE_PLL_CTL, val); - - I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, - BXT_DE_PLL_LOCK, - BXT_DE_PLL_LOCK, - 1)) - DRM_ERROR("timeout waiting for DE PLL lock\n"); - - dev_priv->cdclk_pll.vco = vco; -} - -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) -{ - u32 val, divider; - int vco, ret; - - if (IS_GEMINILAKE(dev_priv)) - vco = glk_de_pll_vco(dev_priv, cdclk); - else - vco = bxt_de_pll_vco(dev_priv, cdclk); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - default: - WARN_ON(cdclk != dev_priv->cdclk_pll.ref); - WARN_ON(vco != 0); - - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - } - - /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_enable(dev_priv, vco); - - val = divider | skl_cdclk_decimal(cdclk); - /* - * FIXME if only the cd2x divider needs changing, it could be done - * without shutting off the pipe (if only one pipe is active). - */ - val |= BXT_CDCLK_CD2X_PIPE_NONE; - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (cdclk >= 500000) - val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - I915_WRITE(CDCLK_CTL, val); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); - - if (ret) { - DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", - ret, cdclk); - return; - } - - intel_update_cdclk(dev_priv); -} - -static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; - - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (dev_priv->cdclk_freq >= 500000) - expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -void bxt_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk; - - bxt_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) - return; - - /* - * FIXME: - * - The initial CDCLK needs to be read from VBT. - * Need to make this change after VBT has changes for BXT. - */ - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - bxt_set_cdclk(dev_priv, cdclk); -} - -void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); -} - -static int skl_calc_cdclk(int max_pixclk, int vco) -{ - if (vco == 8640000) { - if (max_pixclk > 540000) - return 617143; - else if (max_pixclk > 432000) - return 540000; - else if (max_pixclk > 308571) - return 432000; - else - return 308571; - } else { - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; - } -} - -static void -skl_dpll0_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 24000; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(LCPLL1_CTL); - if ((val & LCPLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) - return; - - val = I915_READ(DPLL_CTRL1); - - if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | - DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) - return; - - switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8100000; - break; - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8640000; - break; - default: - MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - break; - } -} - -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco) -{ - bool changed = dev_priv->skl_preferred_vco_freq != vco; - - dev_priv->skl_preferred_vco_freq = vco; - - if (changed) - intel_update_max_cdclk(dev_priv); -} - -static void -skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) -{ - int min_cdclk = skl_calc_cdclk(0, vco); - u32 val; - - WARN_ON(vco != 8100000 && vco != 8640000); - - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - - /* - * We always enable DPLL0 with the lowest link rate possible, but still - * taking into account the VCO required to operate the eDP panel at the - * desired frequency. The usual DP link rates operate with a VCO of - * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. - * The modeset code is responsible for the selection of the exact link - * rate later on, with the constraint of choosing a frequency that - * works with vco. - */ - val = I915_READ(DPLL_CTRL1); - - val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); - if (vco == 8640000) - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, - SKL_DPLL0); - else - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, - SKL_DPLL0); - - I915_WRITE(DPLL_CTRL1, val); - POSTING_READ(DPLL_CTRL1); - - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); - - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, - 5)) - DRM_ERROR("DPLL0 not locked\n"); - - dev_priv->cdclk_pll.vco = vco; - - /* We'll want to keep using the current vco from now on. */ - skl_set_preferred_cdclk_vco(dev_priv, vco); -} - -static void -skl_dpll0_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, 0, - 1)) - DRM_ERROR("Couldn't disable DPLL0\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) -{ - u32 freq_select, pcu_ack; - int ret; - - WARN_ON((cdclk == 24000) != (vco == 0)); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - /* set CDCLK_CTL */ - switch (cdclk) { - case 450000: - case 432000: - freq_select = CDCLK_FREQ_450_432; - pcu_ack = 1; - break; - case 540000: - freq_select = CDCLK_FREQ_540; - pcu_ack = 2; - break; - case 308571: - case 337500: - default: - freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; - break; - case 617143: - case 675000: - freq_select = CDCLK_FREQ_675_617; - pcu_ack = 3; - break; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - skl_dpll0_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - skl_dpll0_enable(dev_priv, vco); - - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); - POSTING_READ(CDCLK_CTL); - - /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv); - -void skl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); -} - -void skl_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk, vco; - - skl_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { - /* - * Use the current vco as our initial - * guess as to what the preferred vco is. - */ - if (dev_priv->skl_preferred_vco_freq == 0) - skl_set_preferred_cdclk_vco(dev_priv, - dev_priv->cdclk_pll.vco); - return; - } - - vco = dev_priv->skl_preferred_vco_freq; - if (vco == 0) - vco = 8100000; - cdclk = skl_calc_cdclk(0, vco); - - skl_set_cdclk(dev_priv, cdclk, vco); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t cdctl, expected; - - /* - * check if the pre-os intialized the display - * There is SWF18 scratchpad register defined which is set by the - * pre-os which can be used by the OS drivers to check the status - */ - if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) - goto sanitize; - - intel_update_cdclk(dev_priv); - /* Is PLL enabled and locked ? */ - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Noticed in some instances that the freq selection is correct but - * decimal part is programmed wrong from BIOS where pre-os does not - * enable display. Verify the same as well. - */ - cdctl = I915_READ(CDCLK_CTL); - expected = (cdctl & CDCLK_FREQ_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -/* Adjust CDclk dividers to allow high res or save power if possible */ -static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ - cmd = 2; - else if (cdclk == 266667) - cmd = 1; - else - cmd = 0; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK; - val |= (cmd << DSPFREQGUAR_SHIFT); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - mutex_lock(&dev_priv->sb_lock); - - if (cdclk == 400000) { - u32 divider; - - divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - /* adjust cdclk divider */ - val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); - val &= ~CCK_FREQUENCY_VALUES; - val |= divider; - vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); - - if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & - CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), - 50)) - DRM_ERROR("timed out waiting for CDclk change\n"); - } - - /* adjust self-refresh exit latency value */ - val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); - val &= ~0x7f; - - /* - * For high bandwidth configs, we set a higher latency in the bunit - * so that the core display fetch happens in time to avoid underruns. - */ - if (cdclk == 400000) - val |= 4500 / 250; /* 4.5 usec */ - else - val |= 3000 / 250; /* 3.0 usec */ - vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - - mutex_unlock(&dev_priv->sb_lock); - - intel_update_cdclk(dev_priv); -} - -static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - - switch (cdclk) { - case 333333: - case 320000: - case 266667: - case 200000: - break; - default: - MISSING_CASE(cdclk); - return; - } - - /* - * Specs are full of misinformation, but testing on actual - * hardware has shown that we just need to write the desired - * CCK divider into the Punit register. - */ - cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK_CHV; - val |= (cmd << DSPFREQGUAR_SHIFT_CHV); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, - int max_pixclk) -{ - int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? 333333 : 320000; - int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; - - /* - * Really only a few cases to deal with, as only 4 CDclks are supported: - * 200MHz - * 267MHz - * 320/333MHz (depends on HPLL freq) - * 400MHz (VLV only) - * So we check to see whether we're above 90% (VLV) or 95% (CHV) - * of the lower bin and adjust if needed. - * - * We seem to get an unstable or solid color picture at 200MHz. - * Not sure what's wrong. For now use 200MHz only when all pipes - * are off. - */ - if (!IS_CHERRYVIEW(dev_priv) && - max_pixclk > freq_320*limit/100) - return 400000; - else if (max_pixclk > 266667*limit/100) - return freq_320; - else if (max_pixclk > 0) - return 266667; - else - return 200000; -} - -static int glk_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 2 * 158400) - return 316800; - else if (max_pixclk > 2 * 79200) - return 158400; - else - return 79200; -} - -static int bxt_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 576000) - return 624000; - else if (max_pixclk > 384000) - return 576000; - else if (max_pixclk > 288000) - return 384000; - else if (max_pixclk > 144000) - return 288000; - else - return 144000; -} - -static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int max_pixclk = intel_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - - intel_state->cdclk = intel_state->dev_cdclk = - valleyview_calc_cdclk(dev_priv, max_pixclk); - - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = valleyview_calc_cdclk(dev_priv, 0); - - return 0; -} - -static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = intel_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int cdclk; - - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(max_pixclk); - else - cdclk = bxt_calc_cdclk(max_pixclk); - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - - if (!intel_state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - intel_state->dev_cdclk = cdclk; - } - - return 0; -} - -static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) -{ - unsigned int credits, default_credits; - - if (IS_CHERRYVIEW(dev_priv)) - default_credits = PFI_CREDIT(12); - else - default_credits = PFI_CREDIT(8); - - if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { - /* CHV suggested value is 31 or 63 */ - if (IS_CHERRYVIEW(dev_priv)) - credits = PFI_CREDIT_63; - else - credits = PFI_CREDIT(15); - } else { - credits = default_credits; - } - - /* - * WA - write default credits before re-programming - * FIXME: should we also set the resend bit here? - */ - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - default_credits); - - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - credits | PFI_CREDIT_RESEND); - - /* - * FIXME is this guaranteed to clear - * immediately or should we poll for it? - */ - WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); -} - -static void valleyview_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - /* - * FIXME: We can end up here with all power domains off, yet - * with a CDCLK frequency other than the minimum. To account - * for this take the PIPE-A power domain, which covers the HW - * blocks needed for the following programming. This can be - * removed once it's guaranteed that we get here either with - * the minimum CDCLK set, or the required power domains - * enabled. - */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_set_cdclk(dev, req_cdclk); - else - valleyview_set_cdclk(dev, req_cdclk); - - vlv_program_pfi_credits(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); -} - static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { @@ -7247,448 +6305,6 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, return 0; } -static int skylake_get_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl; - - skl_dpll0_update(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0) - return dev_priv->cdclk_pll.ref; - - cdctl = I915_READ(CDCLK_CTL); - - if (dev_priv->cdclk_pll.vco == 8640000) { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 432000; - case CDCLK_FREQ_337_308: - return 308571; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 617143; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } else { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 450000; - case CDCLK_FREQ_337_308: - return 337500; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 675000; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } - - return dev_priv->cdclk_pll.ref; -} - -static void bxt_de_pll_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 19200; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - val = I915_READ(BXT_DE_PLL_CTL); - dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * - dev_priv->cdclk_pll.ref; -} - -static int broxton_get_cdclk(struct drm_i915_private *dev_priv) -{ - u32 divider; - int div, vco; - - bxt_de_pll_update(dev_priv); - - vco = dev_priv->cdclk_pll.vco; - if (vco == 0) - return dev_priv->cdclk_pll.ref; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - div = 3; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - case BXT_CDCLK_CD2X_DIV_SEL_4: - div = 8; - break; - default: - MISSING_CASE(divider); - return dev_priv->cdclk_pll.ref; - } - - return DIV_ROUND_CLOSEST(vco, div); -} - -static int broadwell_get_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; - else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; - else - return 675000; -} - -static int haswell_get_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (IS_HSW_ULT(dev_priv)) - return 337500; - else - return 540000; -} - -static int valleyview_get_cdclk(struct drm_i915_private *dev_priv) -{ - return vlv_get_cck_clock_hpll(dev_priv, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL); -} - -static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 450000; -} - -static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 400000; -} - -static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_320_MHZ: - return 320000; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 200000; - } - } -} - -static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 333333; -} - -static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 200000; -} - -static int pnv_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_267_MHZ_PNV: - return 266667; - case GC_DISPLAY_CLOCK_333_MHZ_PNV: - return 333333; - case GC_DISPLAY_CLOCK_444_MHZ_PNV: - return 444444; - case GC_DISPLAY_CLOCK_200_MHZ_PNV: - return 200000; - default: - DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); - case GC_DISPLAY_CLOCK_133_MHZ_PNV: - return 133333; - case GC_DISPLAY_CLOCK_167_MHZ_PNV: - return 166667; - } -} - -static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_320_MHZ: - return 333333; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; - } - } -} - -static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 266667; -} - -static int i85x_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 hpllcc = 0; - - /* - * 852GM/852GMV only supports 133 MHz and the HPLLCC - * encoding is different :( - * FIXME is this the right way to detect 852GM/852GMV? - */ - if (pdev->revision == 0x1) - return 133333; - - pci_bus_read_config_word(pdev->bus, - PCI_DEVFN(0, 3), HPLLCC, &hpllcc); - - /* Assume that the hardware is in the high speed state. This - * should be the default. - */ - switch (hpllcc & GC_CLOCK_CONTROL_MASK) { - case GC_CLOCK_133_200: - case GC_CLOCK_133_200_2: - case GC_CLOCK_100_200: - return 200000; - case GC_CLOCK_166_250: - return 250000; - case GC_CLOCK_100_133: - return 133333; - case GC_CLOCK_133_266: - case GC_CLOCK_133_266_2: - case GC_CLOCK_166_266: - return 266667; - } - - /* Shouldn't happen */ - return 0; -} - -static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 133333; -} - -static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) -{ - static const unsigned int blb_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 6400000, - }; - static const unsigned int pnv_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 2666667, - }; - static const unsigned int cl_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 3333333, - [5] = 3566667, - [6] = 4266667, - }; - static const unsigned int elk_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - }; - static const unsigned int ctg_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 2666667, - [5] = 4266667, - }; - const unsigned int *vco_table; - unsigned int vco; - uint8_t tmp = 0; - - /* FIXME other chipsets? */ - if (IS_GM45(dev_priv)) - vco_table = ctg_vco; - else if (IS_G4X(dev_priv)) - vco_table = elk_vco; - else if (IS_I965GM(dev_priv)) - vco_table = cl_vco; - else if (IS_PINEVIEW(dev_priv)) - vco_table = pnv_vco; - else if (IS_G33(dev_priv)) - vco_table = blb_vco; - else - return 0; - - tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); - - vco = vco_table[tmp & 0x7]; - if (vco == 0) - DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); - else - DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); - - return vco; -} - -static int gm45_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 12) & 0x1; - - switch (vco) { - case 2666667: - case 4000000: - case 5333333: - return cdclk_sel ? 333333 : 222222; - case 3200000: - return cdclk_sel ? 320000 : 228571; - default: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", vco, tmp); - return 222222; - } -} - -static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 16, 10, 8 }; - static const uint8_t div_4000[] = { 20, 12, 10 }; - static const uint8_t div_5333[] = { 24, 16, 14 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = ((tmp >> 8) & 0x1f) - 1; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", vco, tmp); - return 200000; -} - -static int g33_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; - static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; - static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; - static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 4) & 0x7; - - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; - - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 4800000: - div_table = div_4800; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } - - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", vco, tmp); - return 190476; -} - static void intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den) { @@ -10250,245 +8866,6 @@ void hsw_disable_pc8(struct drm_i915_private *dev_priv) } } -static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; - - bxt_set_cdclk(to_i915(dev), req_cdclk); -} - -static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, - int pixel_rate) -{ - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); - - /* BSpec says "Do not use DisplayPort with CDCLK less than - * 432 MHz, audio enabled, port width x4, and link rate - * HBR2 (5.4 GHz), or else there may be audio corruption or - * screen corruption." - */ - if (intel_crtc_has_dp_encoder(crtc_state) && - crtc_state->has_audio && - crtc_state->port_clock >= 540000 && - crtc_state->lane_count == 4) - pixel_rate = max(432000, pixel_rate); - - return pixel_rate; -} - -/* compute the max rate for new configuration */ -static int intel_max_pixel_rate(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; - struct intel_crtc_state *crtc_state; - unsigned max_pixel_rate = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, cstate, i) { - int pixel_rate; - - crtc_state = to_intel_crtc_state(cstate); - if (!crtc_state->base.enable) { - intel_state->min_pixclk[i] = 0; - continue; - } - - pixel_rate = crtc_state->pixel_rate; - - if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) - pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, - pixel_rate); - - intel_state->min_pixclk[i] = pixel_rate; - } - - for_each_pipe(dev_priv, pipe) - max_pixel_rate = max(intel_state->min_pixclk[pipe], max_pixel_rate); - - return max_pixel_rate; -} - -static void broadwell_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t val, data; - int ret; - - if (WARN((I915_READ(LCPLL_CTL) & - (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | - LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | - LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | - LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, - "trying to change cdclk frequency with cdclk not enabled\n")) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, - BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("failed to inform pcode about cdclk change\n"); - return; - } - - val = I915_READ(LCPLL_CTL); - val |= LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) - DRM_ERROR("Switching to FCLK failed\n"); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CLK_FREQ_MASK; - - switch (cdclk) { - case 450000: - val |= LCPLL_CLK_FREQ_450; - data = 0; - break; - case 540000: - val |= LCPLL_CLK_FREQ_54O_BDW; - data = 1; - break; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; - break; - case 675000: - val |= LCPLL_CLK_FREQ_675_BDW; - data = 3; - break; - default: - WARN(1, "invalid cdclk frequency\n"); - return; - } - - I915_WRITE(LCPLL_CTL, val); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) - DRM_ERROR("Switching back to LCPLL failed\n"); - - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); - - I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); - - intel_update_cdclk(dev_priv); - - WARN(cdclk != dev_priv->cdclk_freq, - "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk_freq); -} - -static int broadwell_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; -} - -static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = intel_max_pixel_rate(state); - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = broadwell_calc_cdclk(max_pixclk); - - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = broadwell_calc_cdclk(0); - - return 0; -} - -static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - broadwell_set_cdclk(dev, req_cdclk); -} - -static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = intel_max_pixel_rate(state); - int vco = intel_state->cdclk_pll_vco; - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = skl_calc_cdclk(max_pixclk, vco); - - /* - * FIXME move the cdclk caclulation to - * compute_config() so we can fail gracegully. - */ - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - cdclk = dev_priv->max_cdclk_freq; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = skl_calc_cdclk(0, vco); - - return 0; -} - -static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(old_state); - unsigned int req_cdclk = intel_state->dev_cdclk; - unsigned int req_vco = intel_state->cdclk_pll_vco; - - skl_set_cdclk(dev_priv, req_cdclk, req_vco); -} - static int haswell_crtc_compute_clock(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -16170,6 +14547,8 @@ static const struct drm_mode_config_funcs intel_mode_funcs = { */ void intel_init_display_hooks(struct drm_i915_private *dev_priv) { + intel_init_cdclk_hooks(dev_priv); + if (INTEL_INFO(dev_priv)->gen >= 9) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = @@ -16238,52 +14617,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.crtc_disable = i9xx_crtc_disable; } - /* Returns the core display clock speed */ - if (IS_GEN9_BC(dev_priv)) - dev_priv->display.get_cdclk = skylake_get_cdclk; - else if (IS_GEN9_LP(dev_priv)) - dev_priv->display.get_cdclk = broxton_get_cdclk; - else if (IS_BROADWELL(dev_priv)) - dev_priv->display.get_cdclk = broadwell_get_cdclk; - else if (IS_HASWELL(dev_priv)) - dev_priv->display.get_cdclk = haswell_get_cdclk; - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->display.get_cdclk = valleyview_get_cdclk; - else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; - else if (IS_GEN5(dev_priv)) - dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; - else if (IS_GM45(dev_priv)) - dev_priv->display.get_cdclk = gm45_get_cdclk; - else if (IS_G4X(dev_priv)) - dev_priv->display.get_cdclk = g33_get_cdclk; - else if (IS_I965GM(dev_priv)) - dev_priv->display.get_cdclk = i965gm_get_cdclk; - else if (IS_I965G(dev_priv)) - dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; - else if (IS_PINEVIEW(dev_priv)) - dev_priv->display.get_cdclk = pnv_get_cdclk; - else if (IS_G33(dev_priv)) - dev_priv->display.get_cdclk = g33_get_cdclk; - else if (IS_I945GM(dev_priv)) - dev_priv->display.get_cdclk = i945gm_get_cdclk; - else if (IS_I945G(dev_priv)) - dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; - else if (IS_I915GM(dev_priv)) - dev_priv->display.get_cdclk = i915gm_get_cdclk; - else if (IS_I915G(dev_priv)) - dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; - else if (IS_I865G(dev_priv)) - dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; - else if (IS_I85X(dev_priv)) - dev_priv->display.get_cdclk = i85x_get_cdclk; - else if (IS_I845G(dev_priv)) - dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; - else { /* 830 */ - WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; - } - if (IS_GEN5(dev_priv)) { dev_priv->display.fdi_link_train = ironlake_fdi_link_train; } else if (IS_GEN6(dev_priv)) { @@ -16295,28 +14628,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.fdi_link_train = hsw_fdi_link_train; } - if (IS_BROADWELL(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - broadwell_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - broadwell_modeset_calc_cdclk; - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - valleyview_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - valleyview_modeset_calc_cdclk; - } else if (IS_GEN9_LP(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bxt_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - bxt_modeset_calc_cdclk; - } else if (IS_GEN9_BC(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - skl_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - skl_modeset_calc_cdclk; - } - if (dev_priv->info.gen >= 9) dev_priv->display.update_crtcs = skl_update_crtcs; else diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e1dbd9aa5701..4bdb1f3cb0b5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1246,12 +1246,19 @@ void intel_audio_codec_disable(struct intel_encoder *encoder); void i915_audio_component_init(struct drm_i915_private *dev_priv); void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); +/* intel_cdclk.c */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); +void intel_update_max_cdclk(struct drm_i915_private *dev_priv); +void intel_update_cdclk(struct drm_i915_private *dev_priv); +void intel_update_rawclk(struct drm_i915_private *dev_priv); + /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco); void intel_update_rawclk(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg); void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); void lpt_disable_iclkip(struct drm_i915_private *dev_priv); extern const struct drm_plane_funcs intel_plane_funcs; From 8f0cfa4d2a628a53e469f98aff7988555d654bc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 20 Jan 2017 20:21:57 +0200 Subject: [PATCH 0115/1299] drm/i915: Pass computed vco to bxt_set_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than compute the vco inside bxt_set_cdclk() let's precompute it outside and pass it in. A small step towards a fully precomputed cdclk state. Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 33 +++++++++++++++++------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 04a46c14c898..a0736b5f2d47 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1143,15 +1143,11 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk_pll.vco = vco; } -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, + int cdclk, int vco) { u32 val, divider; - int vco, ret; - - if (IS_GEMINILAKE(dev_priv)) - vco = glk_de_pll_vco(dev_priv, cdclk); - else - vco = bxt_de_pll_vco(dev_priv, cdclk); + int ret; DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); @@ -1284,7 +1280,7 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) */ void bxt_init_cdclk(struct drm_i915_private *dev_priv) { - int cdclk; + int cdclk, vco; bxt_sanitize_cdclk(dev_priv); @@ -1296,12 +1292,15 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) * - The initial CDCLK needs to be read from VBT. * Need to make this change after VBT has changes for BXT. */ - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) { cdclk = glk_calc_cdclk(0); - else + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { cdclk = bxt_calc_cdclk(0); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } - bxt_set_cdclk(dev_priv, cdclk); + bxt_set_cdclk(dev_priv, cdclk, vco); } /** @@ -1313,7 +1312,7 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) */ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); + bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); } static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, @@ -1533,12 +1532,18 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) { - struct drm_device *dev = old_state->dev; + struct drm_i915_private *dev_priv = to_i915(old_state->dev); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); unsigned int req_cdclk = old_intel_state->dev_cdclk; + unsigned int req_vco; - bxt_set_cdclk(to_i915(dev), req_cdclk); + if (IS_GEMINILAKE(dev_priv)) + req_vco = glk_de_pll_vco(dev_priv, req_cdclk); + else + req_vco = bxt_de_pll_vco(dev_priv, req_cdclk); + + bxt_set_cdclk(dev_priv, req_cdclk, req_vco); } static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) From 49cd97a35d9041b53ecf39d447f6a0f8f2de75eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 7 Feb 2017 20:33:45 +0200 Subject: [PATCH 0116/1299] drm/i915: Start moving the cdclk stuff into a distinct state structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce intel_cdclk state which for now will track the cdclk frequency, the vco frequency and the reference frequency (not sure we want the last one, but I put it there anyway). We'll also make the .get_cdclk() function fill out this state structure rather than just returning the current cdclk frequency. One immediate benefit is that calling .get_cdclk() will no longer clobber state stored under dev_priv unless ex[plicitly told to do so. Previously it clobbered the vco and reference clocks stored there on some platforms. We'll expand the use of this structure to actually precomputing the state and whatnot later. v2: Constify intel_cdclk_state_compare() v3: Document intel_cdclk_state_compare() v4: Deal with i945gm_get_cdclk() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183345.19763-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 14 +- drivers/gpu/drm/i915/intel_audio.c | 2 +- drivers/gpu/drm/i915/intel_cdclk.c | 382 ++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 18 +- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 3 + drivers/gpu/drm/i915/intel_fbc.c | 2 +- drivers/gpu/drm/i915/intel_panel.c | 4 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 5 +- 10 files changed, 258 insertions(+), 176 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6400f83d3ad1..1ccc2978a21a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1281,7 +1281,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_puts(m, "no P-state info available\n"); } - seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk_freq); + seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk.hw.cdclk); seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f41496405484..aa59b2153374 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -600,9 +600,11 @@ struct intel_initial_plane_config; struct intel_crtc; struct intel_limit; struct dpll; +struct intel_cdclk_state; struct drm_i915_display_funcs { - int (*get_cdclk)(struct drm_i915_private *dev_priv); + void (*get_cdclk)(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, @@ -2060,6 +2062,10 @@ struct i915_oa_ops { bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); }; +struct intel_cdclk_state { + unsigned int cdclk, vco, ref; +}; + struct drm_i915_private { struct drm_device drm; @@ -2164,7 +2170,7 @@ struct drm_i915_private { unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; - unsigned int cdclk_freq, max_cdclk_freq; + unsigned int max_cdclk_freq; /* * For reading holding any crtc lock is sufficient, @@ -2178,8 +2184,8 @@ struct drm_i915_private { unsigned int czclk_freq; struct { - unsigned int vco, ref; - } cdclk_pll; + struct intel_cdclk_state hw; + } cdclk; /** * wq - Driver workqueue for GEM. diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index cd9d20763140..1ab401faed34 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -734,7 +734,7 @@ static int i915_audio_component_get_cdclk_freq(struct device *kdev) if (WARN_ON_ONCE(!HAS_DDI(dev_priv))) return -ENODEV; - return dev_priv->cdclk_freq; + return dev_priv->cdclk.hw.cdclk; } /* diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index a0736b5f2d47..4e74e87a8676 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -51,37 +51,44 @@ * dividers can be programmed correctly. */ -static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 133333; + cdclk_state->cdclk = 133333; } -static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 200000; + cdclk_state->cdclk = 200000; } -static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 266667; + cdclk_state->cdclk = 266667; } -static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 333333; + cdclk_state->cdclk = 333333; } -static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 400000; + cdclk_state->cdclk = 400000; } -static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 450000; + cdclk_state->cdclk = 450000; } -static int i85x_get_cdclk(struct drm_i915_private *dev_priv) +static void i85x_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 hpllcc = 0; @@ -91,8 +98,10 @@ static int i85x_get_cdclk(struct drm_i915_private *dev_priv) * encoding is different :( * FIXME is this the right way to detect 852GM/852GMV? */ - if (pdev->revision == 0x1) - return 133333; + if (pdev->revision == 0x1) { + cdclk_state->cdclk = 133333; + return; + } pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 3), HPLLCC, &hpllcc); @@ -104,56 +113,67 @@ static int i85x_get_cdclk(struct drm_i915_private *dev_priv) case GC_CLOCK_133_200: case GC_CLOCK_133_200_2: case GC_CLOCK_100_200: - return 200000; + cdclk_state->cdclk = 200000; + break; case GC_CLOCK_166_250: - return 250000; + cdclk_state->cdclk = 250000; + break; case GC_CLOCK_100_133: - return 133333; + cdclk_state->cdclk = 133333; + break; case GC_CLOCK_133_266: case GC_CLOCK_133_266_2: case GC_CLOCK_166_266: - return 266667; + cdclk_state->cdclk = 266667; + break; } - - /* Shouldn't happen */ - return 0; } -static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) +static void i915gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; pci_read_config_word(pdev, GCFGC, &gcfgc); - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_333_320_MHZ: - return 333333; + cdclk_state->cdclk = 333333; + break; default: case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; + cdclk_state->cdclk = 190000; + break; } } -static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) +static void i945gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; pci_read_config_word(pdev, GCFGC, &gcfgc); - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_333_320_MHZ: - return 320000; + cdclk_state->cdclk = 320000; + break; default: case GC_DISPLAY_CLOCK_190_200_MHZ: - return 200000; + cdclk_state->cdclk = 200000; + break; } } @@ -225,7 +245,8 @@ static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) return vco; } -static int g33_get_cdclk(struct drm_i915_private *dev_priv) +static void g33_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; @@ -233,9 +254,11 @@ static int g33_get_cdclk(struct drm_i915_private *dev_priv) static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + unsigned int cdclk_sel; uint16_t tmp = 0; + cdclk_state->vco = intel_hpll_vco(dev_priv); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = (tmp >> 4) & 0x7; @@ -243,7 +266,7 @@ static int g33_get_cdclk(struct drm_i915_private *dev_priv) if (cdclk_sel >= ARRAY_SIZE(div_3200)) goto fail; - switch (vco) { + switch (cdclk_state->vco) { case 3200000: div_table = div_3200; break; @@ -260,15 +283,18 @@ static int g33_get_cdclk(struct drm_i915_private *dev_priv) goto fail; } - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; fail: DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", - vco, tmp); - return 190476; + cdclk_state->vco, tmp); + cdclk_state->cdclk = 190476; } -static int pnv_get_cdclk(struct drm_i915_private *dev_priv) +static void pnv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -277,32 +303,41 @@ static int pnv_get_cdclk(struct drm_i915_private *dev_priv) switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_267_MHZ_PNV: - return 266667; + cdclk_state->cdclk = 266667; + break; case GC_DISPLAY_CLOCK_333_MHZ_PNV: - return 333333; + cdclk_state->cdclk = 333333; + break; case GC_DISPLAY_CLOCK_444_MHZ_PNV: - return 444444; + cdclk_state->cdclk = 444444; + break; case GC_DISPLAY_CLOCK_200_MHZ_PNV: - return 200000; + cdclk_state->cdclk = 200000; + break; default: DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); case GC_DISPLAY_CLOCK_133_MHZ_PNV: - return 133333; + cdclk_state->cdclk = 133333; + break; case GC_DISPLAY_CLOCK_167_MHZ_PNV: - return 166667; + cdclk_state->cdclk = 166667; + break; } } -static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) +static void i965gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 16, 10, 8 }; static const uint8_t div_4000[] = { 20, 12, 10 }; static const uint8_t div_5333[] = { 24, 16, 14 }; const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + unsigned int cdclk_sel; uint16_t tmp = 0; + cdclk_state->vco = intel_hpll_vco(dev_priv); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = ((tmp >> 8) & 0x1f) - 1; @@ -310,7 +345,7 @@ static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) if (cdclk_sel >= ARRAY_SIZE(div_3200)) goto fail; - switch (vco) { + switch (cdclk_state->vco) { case 3200000: div_table = div_3200; break; @@ -324,53 +359,62 @@ static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) goto fail; } - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; fail: DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", - vco, tmp); - return 200000; + cdclk_state->vco, tmp); + cdclk_state->cdclk = 200000; } -static int gm45_get_cdclk(struct drm_i915_private *dev_priv) +static void gm45_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + unsigned int cdclk_sel; uint16_t tmp = 0; + cdclk_state->vco = intel_hpll_vco(dev_priv); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = (tmp >> 12) & 0x1; - switch (vco) { + switch (cdclk_state->vco) { case 2666667: case 4000000: case 5333333: - return cdclk_sel ? 333333 : 222222; + cdclk_state->cdclk = cdclk_sel ? 333333 : 222222; + break; case 3200000: - return cdclk_sel ? 320000 : 228571; + cdclk_state->cdclk = cdclk_sel ? 320000 : 228571; + break; default: DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", - vco, tmp); - return 222222; + cdclk_state->vco, tmp); + cdclk_state->cdclk = 222222; + break; } } -static int hsw_get_cdclk(struct drm_i915_private *dev_priv) +static void hsw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; + cdclk_state->cdclk = 800000; else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; + cdclk_state->cdclk = 450000; else if (freq == LCPLL_CLK_FREQ_450) - return 450000; + cdclk_state->cdclk = 450000; else if (IS_HSW_ULT(dev_priv)) - return 337500; + cdclk_state->cdclk = 337500; else - return 540000; + cdclk_state->cdclk = 540000; } static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, @@ -396,10 +440,13 @@ static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, return 200000; } -static int vlv_get_cdclk(struct drm_i915_private *dev_priv) +static void vlv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return vlv_get_cck_clock_hpll(dev_priv, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL); + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); + cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", + CCK_DISPLAY_CLOCK_CONTROL, + cdclk_state->vco); } static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) @@ -411,7 +458,7 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) else default_credits = PFI_CREDIT(8); - if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { + if (dev_priv->cdclk.hw.cdclk >= dev_priv->czclk_freq) { /* CHV suggested value is 31 or 63 */ if (IS_CHERRYVIEW(dev_priv)) credits = PFI_CREDIT_63; @@ -443,8 +490,6 @@ static void vlv_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ cmd = 2; else if (cdclk == 266667) @@ -508,8 +553,6 @@ static void chv_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - switch (cdclk) { case 333333: case 320000: @@ -555,23 +598,24 @@ static int bdw_calc_cdclk(int max_pixclk) return 337500; } -static int bdw_get_cdclk(struct drm_i915_private *dev_priv) +static void bdw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; + cdclk_state->cdclk = 800000; else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; + cdclk_state->cdclk = 450000; else if (freq == LCPLL_CLK_FREQ_450) - return 450000; + cdclk_state->cdclk = 450000; else if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; + cdclk_state->cdclk = 540000; else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; + cdclk_state->cdclk = 337500; else - return 675000; + cdclk_state->cdclk = 675000; } static void bdw_set_cdclk(struct drm_device *dev, int cdclk) @@ -648,9 +692,9 @@ static void bdw_set_cdclk(struct drm_device *dev, int cdclk) intel_update_cdclk(dev_priv); - WARN(cdclk != dev_priv->cdclk_freq, + WARN(cdclk != dev_priv->cdclk.hw.cdclk, "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk_freq); + cdclk, dev_priv->cdclk.hw.cdclk); } static int skl_calc_cdclk(int max_pixclk, int vco) @@ -676,12 +720,13 @@ static int skl_calc_cdclk(int max_pixclk, int vco) } } -static void skl_dpll0_update(struct drm_i915_private *dev_priv) +static void skl_dpll0_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 val; - dev_priv->cdclk_pll.ref = 24000; - dev_priv->cdclk_pll.vco = 0; + cdclk_state->ref = 24000; + cdclk_state->vco = 0; val = I915_READ(LCPLL1_CTL); if ((val & LCPLL_PLL_ENABLE) == 0) @@ -703,11 +748,11 @@ static void skl_dpll0_update(struct drm_i915_private *dev_priv) case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8100000; + cdclk_state->vco = 8100000; break; case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8640000; + cdclk_state->vco = 8640000; break; default: MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); @@ -715,46 +760,57 @@ static void skl_dpll0_update(struct drm_i915_private *dev_priv) } } -static int skl_get_cdclk(struct drm_i915_private *dev_priv) +static void skl_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 cdctl; - skl_dpll0_update(dev_priv); + skl_dpll0_update(dev_priv, cdclk_state); - if (dev_priv->cdclk_pll.vco == 0) - return dev_priv->cdclk_pll.ref; + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; cdctl = I915_READ(CDCLK_CTL); - if (dev_priv->cdclk_pll.vco == 8640000) { + if (cdclk_state->vco == 8640000) { switch (cdctl & CDCLK_FREQ_SEL_MASK) { case CDCLK_FREQ_450_432: - return 432000; + cdclk_state->cdclk = 432000; + break; case CDCLK_FREQ_337_308: - return 308571; + cdclk_state->cdclk = 308571; + break; case CDCLK_FREQ_540: - return 540000; + cdclk_state->cdclk = 540000; + break; case CDCLK_FREQ_675_617: - return 617143; + cdclk_state->cdclk = 617143; + break; default: MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; } } else { switch (cdctl & CDCLK_FREQ_SEL_MASK) { case CDCLK_FREQ_450_432: - return 450000; + cdclk_state->cdclk = 450000; + break; case CDCLK_FREQ_337_308: - return 337500; + cdclk_state->cdclk = 337500; + break; case CDCLK_FREQ_540: - return 540000; + cdclk_state->cdclk = 540000; + break; case CDCLK_FREQ_675_617: - return 675000; + cdclk_state->cdclk = 675000; + break; default: MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; } } - - return dev_priv->cdclk_pll.ref; } /* convert from kHz to .1 fixpoint MHz with -1MHz offset */ @@ -817,7 +873,7 @@ static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) 5)) DRM_ERROR("DPLL0 not locked\n"); - dev_priv->cdclk_pll.vco = vco; + dev_priv->cdclk.hw.vco = vco; /* We'll want to keep using the current vco from now on. */ skl_set_preferred_cdclk_vco(dev_priv, vco); @@ -831,7 +887,7 @@ static void skl_dpll0_disable(struct drm_i915_private *dev_priv) 1)) DRM_ERROR("Couldn't disable DPLL0\n"); - dev_priv->cdclk_pll.vco = 0; + dev_priv->cdclk.hw.vco = 0; } static void skl_set_cdclk(struct drm_i915_private *dev_priv, @@ -881,11 +937,11 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, break; } - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) skl_dpll0_disable(dev_priv); - if (dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != vco) skl_dpll0_enable(dev_priv, vco); I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); @@ -913,8 +969,8 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) intel_update_cdclk(dev_priv); /* Is PLL enabled and locked ? */ - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) goto sanitize; /* DPLL okay; verify the cdclock @@ -925,7 +981,7 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) */ cdctl = I915_READ(CDCLK_CTL); expected = (cdctl & CDCLK_FREQ_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); if (cdctl == expected) /* All well; nothing to sanitize */ return; @@ -934,9 +990,9 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); /* force cdclk programming */ - dev_priv->cdclk_freq = 0; + dev_priv->cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; + dev_priv->cdclk.hw.vco = -1; } /** @@ -954,14 +1010,15 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) skl_sanitize_cdclk(dev_priv); - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) { /* * Use the current vco as our initial * guess as to what the preferred vco is. */ if (dev_priv->skl_preferred_vco_freq == 0) skl_set_preferred_cdclk_vco(dev_priv, - dev_priv->cdclk_pll.vco); + dev_priv->cdclk.hw.vco); return; } @@ -982,7 +1039,7 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) */ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) { - skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); + skl_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); } static int bxt_calc_cdclk(int max_pixclk) @@ -1013,7 +1070,7 @@ static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; - if (cdclk == dev_priv->cdclk_pll.ref) + if (cdclk == dev_priv->cdclk.hw.ref) return 0; switch (cdclk) { @@ -1030,14 +1087,14 @@ static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) break; } - return dev_priv->cdclk_pll.ref * ratio; + return dev_priv->cdclk.hw.ref * ratio; } static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; - if (cdclk == dev_priv->cdclk_pll.ref) + if (cdclk == dev_priv->cdclk.hw.ref) return 0; switch (cdclk) { @@ -1050,15 +1107,16 @@ static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) break; } - return dev_priv->cdclk_pll.ref * ratio; + return dev_priv->cdclk.hw.ref * ratio; } -static void bxt_de_pll_update(struct drm_i915_private *dev_priv) +static void bxt_de_pll_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 val; - dev_priv->cdclk_pll.ref = 19200; - dev_priv->cdclk_pll.vco = 0; + cdclk_state->ref = 19200; + cdclk_state->vco = 0; val = I915_READ(BXT_DE_PLL_ENABLE); if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) @@ -1068,20 +1126,21 @@ static void bxt_de_pll_update(struct drm_i915_private *dev_priv) return; val = I915_READ(BXT_DE_PLL_CTL); - dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * - dev_priv->cdclk_pll.ref; + cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; } -static int bxt_get_cdclk(struct drm_i915_private *dev_priv) +static void bxt_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 divider; - int div, vco; + int div; - bxt_de_pll_update(dev_priv); + bxt_de_pll_update(dev_priv, cdclk_state); - vco = dev_priv->cdclk_pll.vco; - if (vco == 0) - return dev_priv->cdclk_pll.ref; + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1101,10 +1160,10 @@ static int bxt_get_cdclk(struct drm_i915_private *dev_priv) break; default: MISSING_CASE(divider); - return dev_priv->cdclk_pll.ref; + return; } - return DIV_ROUND_CLOSEST(vco, div); + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1117,12 +1176,12 @@ static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) 1)) DRM_ERROR("timeout waiting for DE PLL unlock\n"); - dev_priv->cdclk_pll.vco = 0; + dev_priv->cdclk.hw.vco = 0; } static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) { - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); u32 val; val = I915_READ(BXT_DE_PLL_CTL); @@ -1140,7 +1199,7 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) 1)) DRM_ERROR("timeout waiting for DE PLL lock\n"); - dev_priv->cdclk_pll.vco = vco; + dev_priv->cdclk.hw.vco = vco; } static void bxt_set_cdclk(struct drm_i915_private *dev_priv, @@ -1168,7 +1227,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; default: - WARN_ON(cdclk != dev_priv->cdclk_pll.ref); + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); WARN_ON(vco != 0); divider = BXT_CDCLK_CD2X_DIV_SEL_1; @@ -1187,11 +1246,11 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, return; } - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) bxt_de_pll_disable(dev_priv); - if (dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != vco) bxt_de_pll_enable(dev_priv, vco); val = divider | skl_cdclk_decimal(cdclk); @@ -1228,8 +1287,8 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) intel_update_cdclk(dev_priv); - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) goto sanitize; /* DPLL okay; verify the cdclock @@ -1247,12 +1306,12 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); /* * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (dev_priv->cdclk_freq >= 500000) + if (dev_priv->cdclk.hw.cdclk >= 500000) expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; if (cdctl == expected) @@ -1263,10 +1322,10 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); /* force cdclk programming */ - dev_priv->cdclk_freq = 0; + dev_priv->cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; + dev_priv->cdclk.hw.vco = -1; } /** @@ -1284,7 +1343,8 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) bxt_sanitize_cdclk(dev_priv); - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) return; /* @@ -1312,7 +1372,21 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) */ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); + bxt_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); +} + +/** + * intel_cdclk_state_compare - Determine if two CDCLK states differ + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states are identical, false if they differ. + */ +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; } static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, @@ -1620,7 +1694,7 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) dev_priv->max_cdclk_freq = 400000; } else { /* otherwise assume cdclk is fixed */ - dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->max_cdclk_freq = dev_priv->cdclk.hw.cdclk; } dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); @@ -1640,15 +1714,11 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) */ void intel_update_cdclk(struct drm_i915_private *dev_priv) { - dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); + dev_priv->display.get_cdclk(dev_priv, &dev_priv->cdclk.hw); - if (INTEL_GEN(dev_priv) >= 9) - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, - dev_priv->cdclk_pll.ref); - else - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", - dev_priv->cdclk_freq); + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", + dev_priv->cdclk.hw.cdclk, dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.ref); /* * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): @@ -1658,7 +1728,7 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv) */ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) I915_WRITE(GMBUSFREQ_VLV, - DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); + DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); } static int pch_rawclk(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e996f8ec8f08..691db36d8388 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -135,7 +135,7 @@ struct intel_limit { }; /* returns HPLL frequency in kHz */ -static int valleyview_get_vco(struct drm_i915_private *dev_priv) +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; @@ -171,7 +171,7 @@ int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, const char *name, u32 reg) { if (dev_priv->hpll_freq == 0) - dev_priv->hpll_freq = valleyview_get_vco(dev_priv); + dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); return vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); @@ -12413,7 +12413,7 @@ static int intel_modeset_checks(struct drm_atomic_state *state) */ if (dev_priv->display.modeset_calc_cdclk) { if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->cdclk_pll.vco; + intel_state->cdclk_pll_vco = dev_priv->cdclk.hw.vco; if (!intel_state->cdclk_pll_vco) intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq; @@ -12433,8 +12433,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state) } /* All pipes must be switched off while we change the cdclk. */ - if (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco) { + if (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || + intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; @@ -12869,8 +12869,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_update_legacy_modeset_state(state->dev, state); if (dev_priv->display.modeset_commit_cdclk && - (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) + (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || + intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco)) dev_priv->display.modeset_commit_cdclk(state); /* @@ -14855,7 +14855,7 @@ void intel_modeset_init_hw(struct drm_device *dev) intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; intel_init_clock_gating(dev_priv); } @@ -15031,7 +15031,7 @@ int intel_modeset_init(struct drm_device *dev) intel_update_czclk(dev_priv); intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; intel_shared_dpll_init(dev); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 21924cf4c8ad..f8c23fed4a2c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -918,7 +918,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * divide by 2000 and use that */ if (intel_dig_port->port == PORT_A) - return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000); + return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4bdb1f3cb0b5..ba0728ccff75 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1251,10 +1251,13 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); void intel_update_rawclk(struct drm_i915_private *dev_priv); +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index c66ba7e36289..22c56ae086f2 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -817,7 +817,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) /* WaFbcExceedCdClockThreshold:hsw,bdw */ if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && - cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk_freq * 95 / 100) { + cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk.hw.cdclk * 95 / 100) { fbc->no_fbc_reason = "pixel rate is too big"; return false; } diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 1a6ff26dea20..cb50c527401f 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1315,7 +1315,7 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_PINEVIEW(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 32); } @@ -1333,7 +1333,7 @@ static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_G4X(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 128); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 915914f9444c..0f00a5aab69c 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -964,9 +964,12 @@ static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { + struct intel_cdclk_state cdclk_state = {}; + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - WARN_ON(dev_priv->cdclk_freq != dev_priv->display.get_cdclk(dev_priv)); + dev_priv->display.get_cdclk(dev_priv, &cdclk_state); + WARN_ON(!intel_cdclk_state_compare(&dev_priv->cdclk.hw, &cdclk_state)); gen9_assert_dbuf_enabled(dev_priv); From bb0f4aab0e7677e91cde443fecc18e71fbb85038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 20 Jan 2017 20:21:59 +0200 Subject: [PATCH 0117/1299] drm/i915: Track full cdclk state for the logical and actual cdclk frequencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current dev_cdclk vs. cdclk vs. atomic_cdclk_freq is quite a mess. So here I'm introducing the "actual" and "logical" naming for our cdclk state. "actual" is what we'll bash into the hardware and "logical" is what everyone should use for state computaion/checking and whatnot. We'll track both using the intel_cdclk_state as both will need other differing parameters than just the actual cdclk frequency. While doing that we can at the same time unify the appearance of the .modeset_calc_cdclk() implementations a little bit. v2: Commit dev_priv->cdclk.actual since that already has the new state by the time .modeset_commit_cdclk() is called. v3: s/locical/logical/ and improve the docs a bit Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 20 +++-- drivers/gpu/drm/i915/intel_cdclk.c | 123 +++++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 39 +++++---- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 22 +++-- drivers/gpu/drm/i915/intel_pm.c | 4 +- 6 files changed, 127 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index aa59b2153374..323bf93c3e92 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2172,18 +2172,26 @@ struct drm_i915_private { unsigned int skl_preferred_vco_freq; unsigned int max_cdclk_freq; - /* - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. - */ - unsigned int atomic_cdclk_freq; - unsigned int max_dotclk_freq; unsigned int rawclk_freq; unsigned int hpll_freq; unsigned int czclk_freq; struct { + /* + * The current logical cdclk state. + * See intel_atomic_state.cdclk.logical + * + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. + */ + struct intel_cdclk_state logical; + /* + * The current actual cdclk state. + * See intel_atomic_state.cdclk.actual + */ + struct intel_cdclk_state actual; + /* The current hardware cdclk state */ struct intel_cdclk_state hw; } cdclk; diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 4e74e87a8676..6529a2687fdd 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1460,12 +1460,26 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int cdclk; - intel_state->cdclk = intel_state->dev_cdclk = - vlv_calc_cdclk(dev_priv, max_pixclk); + cdclk = vlv_calc_cdclk(dev_priv, max_pixclk); - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = vlv_calc_cdclk(dev_priv, 0); + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = vlv_calc_cdclk(dev_priv, 0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } return 0; } @@ -1474,9 +1488,7 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; /* * FIXME: We can end up here with all power domains off, yet @@ -1518,9 +1530,16 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return -EINVAL; } - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = bdw_calc_cdclk(0); + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = bdw_calc_cdclk(0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } return 0; } @@ -1528,9 +1547,7 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; + unsigned int req_cdclk = to_i915(dev)->cdclk.actual.cdclk; bdw_set_cdclk(dev, req_cdclk); } @@ -1540,8 +1557,11 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); const int max_pixclk = intel_max_pixel_rate(state); - int vco = intel_state->cdclk_pll_vco; - int cdclk; + int cdclk, vco; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; /* * FIXME should also account for plane ratio @@ -1549,19 +1569,24 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) */ cdclk = skl_calc_cdclk(max_pixclk, vco); - /* - * FIXME move the cdclk caclulation to - * compute_config() so we can fail gracegully. - */ if (cdclk > dev_priv->max_cdclk_freq) { - DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - cdclk = dev_priv->max_cdclk_freq; + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; } - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = skl_calc_cdclk(0, vco); + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = skl_calc_cdclk(0, vco); + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } return 0; } @@ -1569,10 +1594,8 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = intel_state->dev_cdclk; - unsigned int req_vco = intel_state->cdclk_pll_vco; + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; + unsigned int req_vco = dev_priv->cdclk.actual.vco; skl_set_cdclk(dev_priv, req_cdclk, req_vco); } @@ -1583,22 +1606,39 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int cdclk; + int cdclk, vco; - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) { cdclk = glk_calc_cdclk(max_pixclk); - else + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { cdclk = bxt_calc_cdclk(max_pixclk); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } - intel_state->cdclk = intel_state->dev_cdclk = cdclk; + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; if (!intel_state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) { cdclk = glk_calc_cdclk(0); - else + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { cdclk = bxt_calc_cdclk(0); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } - intel_state->dev_cdclk = cdclk; + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; } return 0; @@ -1607,15 +1647,8 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; - unsigned int req_vco; - - if (IS_GEMINILAKE(dev_priv)) - req_vco = glk_de_pll_vco(dev_priv, req_cdclk); - else - req_vco = bxt_de_pll_vco(dev_priv, req_cdclk); + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; + unsigned int req_vco = dev_priv->cdclk.actual.vco; bxt_set_cdclk(dev_priv, req_cdclk, req_vco); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 691db36d8388..c0ad2ddaed9b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12393,6 +12393,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state) intel_state->modeset = true; intel_state->active_crtcs = dev_priv->active_crtcs; + intel_state->cdclk.logical = dev_priv->cdclk.logical; + intel_state->cdclk.actual = dev_priv->cdclk.actual; for_each_crtc_in_state(state, crtc, crtc_state, i) { if (crtc_state->active) @@ -12412,38 +12414,35 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * adjusted_mode bits in the crtc directly. */ if (dev_priv->display.modeset_calc_cdclk) { - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->cdclk.hw.vco; - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq; - ret = dev_priv->display.modeset_calc_cdclk(state); if (ret < 0) return ret; /* - * Writes to dev_priv->atomic_cdclk_freq must protected by + * Writes to dev_priv->cdclk.logical must protected by * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (intel_state->cdclk != dev_priv->atomic_cdclk_freq) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.logical, + &intel_state->cdclk.logical)) { ret = intel_lock_all_pipes(state); if (ret < 0) return ret; } /* All pipes must be switched off while we change the cdclk. */ - if (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || - intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; } - DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n", - intel_state->cdclk, intel_state->dev_cdclk); + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + intel_state->cdclk.logical.cdclk, + intel_state->cdclk.actual.cdclk); } else { - to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq; + to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } intel_modeset_clear_plls(state); @@ -12546,7 +12545,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; } else { - intel_state->cdclk = dev_priv->atomic_cdclk_freq; + intel_state->cdclk.logical = dev_priv->cdclk.logical; } ret = drm_atomic_helper_check_planes(dev, state); @@ -12869,8 +12868,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_update_legacy_modeset_state(state->dev, state); if (dev_priv->display.modeset_commit_cdclk && - (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || - intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco)) + !intel_cdclk_state_compare(&dev_priv->cdclk.hw, + &dev_priv->cdclk.actual)) dev_priv->display.modeset_commit_cdclk(state); /* @@ -13059,7 +13058,8 @@ static int intel_atomic_commit(struct drm_device *dev, memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, sizeof(intel_state->min_pixclk)); dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->atomic_cdclk_freq = intel_state->cdclk; + dev_priv->cdclk.logical = intel_state->cdclk.logical; + dev_priv->cdclk.actual = intel_state->cdclk.actual; } drm_atomic_state_get(state); @@ -13297,7 +13297,7 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state return DRM_PLANE_HELPER_NO_SCALING; crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk; + cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock)) return DRM_PLANE_HELPER_NO_SCALING; @@ -14854,8 +14854,7 @@ void intel_modeset_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); intel_update_cdclk(dev_priv); - - dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_init_clock_gating(dev_priv); } @@ -15031,7 +15030,7 @@ int intel_modeset_init(struct drm_device *dev) intel_update_czclk(dev_priv); intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_shared_dpll_init(dev); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f8c23fed4a2c..0f14e97e519b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1785,7 +1785,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, break; } - to_intel_atomic_state(pipe_config->base.state)->cdclk_pll_vco = vco; + to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; } if (!HAS_DDI(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ba0728ccff75..97d848197ff3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -333,13 +333,20 @@ struct dpll { struct intel_atomic_state { struct drm_atomic_state base; - unsigned int cdclk; + struct { + /* + * Logical state of cdclk (used for all scaling, watermark, + * etc. calculations and checks). This is computed as if all + * enabled crtcs were active. + */ + struct intel_cdclk_state logical; - /* - * Calculated device cdclk, can be different from cdclk - * only when all crtc's are DPMS off. - */ - unsigned int dev_cdclk; + /* + * Actual state of cdclk, can be different from the logical + * state only when all crtc's are DPMS off. + */ + struct intel_cdclk_state actual; + } cdclk; bool dpll_set, modeset; @@ -356,9 +363,6 @@ struct intel_atomic_state { unsigned int active_crtcs; unsigned int min_pixclk[I915_MAX_PIPES]; - /* SKL/KBL Only */ - unsigned int cdclk_pll_vco; - struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; /* diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 79cbe5736ed1..b1d07e63ff8b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2108,7 +2108,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) return 0; if (WARN_ON(adjusted_mode->crtc_clock == 0)) return 0; - if (WARN_ON(intel_state->cdclk == 0)) + if (WARN_ON(intel_state->cdclk.logical.cdclk == 0)) return 0; /* The WM are computed with base on how long it takes to fill a single @@ -2117,7 +2117,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, adjusted_mode->crtc_clock); ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - intel_state->cdclk); + intel_state->cdclk.logical.cdclk); return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | PIPE_WM_LINETIME_TIME(linetime); From 3d5dbb10f34ad0521bfb7091bd5b47f6c984b9aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 20 Jan 2017 20:22:00 +0200 Subject: [PATCH 0118/1299] drm/i915: Pass dev_priv to remainder of the cdclk functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up the dev vs. dev_priv straggles that are making things look inconsistentt. v2: Deal with churn Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 6529a2687fdd..9d1e2a4859ab 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -485,9 +485,8 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); } -static void vlv_set_cdclk(struct drm_device *dev, int cdclk) +static void vlv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ @@ -548,9 +547,8 @@ static void vlv_set_cdclk(struct drm_device *dev, int cdclk) intel_update_cdclk(dev_priv); } -static void chv_set_cdclk(struct drm_device *dev, int cdclk) +static void chv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; switch (cdclk) { @@ -618,9 +616,8 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = 675000; } -static void bdw_set_cdclk(struct drm_device *dev, int cdclk) +static void bdw_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t val, data; int ret; @@ -1455,8 +1452,7 @@ static int intel_max_pixel_rate(struct drm_atomic_state *state) static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(state->dev); int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); @@ -1486,8 +1482,7 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { - struct drm_device *dev = old_state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(old_state->dev); unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; /* @@ -1502,9 +1497,9 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); if (IS_CHERRYVIEW(dev_priv)) - chv_set_cdclk(dev, req_cdclk); + chv_set_cdclk(dev_priv, req_cdclk); else - vlv_set_cdclk(dev, req_cdclk); + vlv_set_cdclk(dev_priv, req_cdclk); vlv_program_pfi_credits(dev_priv); @@ -1546,10 +1541,10 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) { - struct drm_device *dev = old_state->dev; - unsigned int req_cdclk = to_i915(dev)->cdclk.actual.cdclk; + struct drm_i915_private *dev_priv = to_i915(old_state->dev); + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - bdw_set_cdclk(dev, req_cdclk); + bdw_set_cdclk(dev_priv, req_cdclk); } static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) From 83c5fda74f98307a19189414889363341ede8067 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 20 Jan 2017 20:22:01 +0200 Subject: [PATCH 0119/1299] drm/i915: Pass the cdclk state to the set_cdclk() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than passing all the different parameters (cdclk,vco so far) sparately to the set_cdclk() functions, just pass the entire cdclk state. v2: Deal with churn v3: Drop the usless .ref assignment (Ander) Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 78 ++++++++++++++++++------------ 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 9d1e2a4859ab..d0dc6f94277c 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -485,8 +485,10 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); } -static void vlv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void vlv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; u32 val, cmd; if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ @@ -547,8 +549,10 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) intel_update_cdclk(dev_priv); } -static void chv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void chv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; u32 val, cmd; switch (cdclk) { @@ -616,8 +620,10 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = 675000; } -static void bdw_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void bdw_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; uint32_t val, data; int ret; @@ -888,8 +894,10 @@ static void skl_dpll0_disable(struct drm_i915_private *dev_priv) } static void skl_set_cdclk(struct drm_i915_private *dev_priv, - int cdclk, int vco) + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; u32 freq_select, pcu_ack; int ret; @@ -1003,7 +1011,7 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) */ void skl_init_cdclk(struct drm_i915_private *dev_priv) { - int cdclk, vco; + struct intel_cdclk_state cdclk_state; skl_sanitize_cdclk(dev_priv); @@ -1019,12 +1027,14 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) return; } - vco = dev_priv->skl_preferred_vco_freq; - if (vco == 0) - vco = 8100000; - cdclk = skl_calc_cdclk(0, vco); + cdclk_state = dev_priv->cdclk.hw; - skl_set_cdclk(dev_priv, cdclk, vco); + cdclk_state.vco = dev_priv->skl_preferred_vco_freq; + if (cdclk_state.vco == 0) + cdclk_state.vco = 8100000; + cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); + + skl_set_cdclk(dev_priv, &cdclk_state); } /** @@ -1036,7 +1046,12 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) */ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) { - skl_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + skl_set_cdclk(dev_priv, &cdclk_state); } static int bxt_calc_cdclk(int max_pixclk) @@ -1200,8 +1215,10 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) } static void bxt_set_cdclk(struct drm_i915_private *dev_priv, - int cdclk, int vco) + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; u32 val, divider; int ret; @@ -1336,7 +1353,7 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) */ void bxt_init_cdclk(struct drm_i915_private *dev_priv) { - int cdclk, vco; + struct intel_cdclk_state cdclk_state; bxt_sanitize_cdclk(dev_priv); @@ -1344,20 +1361,22 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) dev_priv->cdclk.hw.vco != 0) return; + cdclk_state = dev_priv->cdclk.hw; + /* * FIXME: * - The initial CDCLK needs to be read from VBT. * Need to make this change after VBT has changes for BXT. */ if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(0); - vco = glk_de_pll_vco(dev_priv, cdclk); + cdclk_state.cdclk = glk_calc_cdclk(0); + cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); } else { - cdclk = bxt_calc_cdclk(0); - vco = bxt_de_pll_vco(dev_priv, cdclk); + cdclk_state.cdclk = bxt_calc_cdclk(0); + cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); } - bxt_set_cdclk(dev_priv, cdclk, vco); + bxt_set_cdclk(dev_priv, &cdclk_state); } /** @@ -1369,7 +1388,12 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) */ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { - bxt_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + bxt_set_cdclk(dev_priv, &cdclk_state); } /** @@ -1483,7 +1507,6 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; /* * FIXME: We can end up here with all power domains off, yet @@ -1497,9 +1520,9 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); if (IS_CHERRYVIEW(dev_priv)) - chv_set_cdclk(dev_priv, req_cdclk); + chv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); else - vlv_set_cdclk(dev_priv, req_cdclk); + vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); vlv_program_pfi_credits(dev_priv); @@ -1542,9 +1565,8 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - bdw_set_cdclk(dev_priv, req_cdclk); + bdw_set_cdclk(dev_priv, &dev_priv->cdclk.actual); } static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) @@ -1589,10 +1611,8 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - unsigned int req_vco = dev_priv->cdclk.actual.vco; - skl_set_cdclk(dev_priv, req_cdclk, req_vco); + skl_set_cdclk(dev_priv, &dev_priv->cdclk.actual); } static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) @@ -1642,10 +1662,8 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - unsigned int req_vco = dev_priv->cdclk.actual.vco; - bxt_set_cdclk(dev_priv, req_cdclk, req_vco); + bxt_set_cdclk(dev_priv, &dev_priv->cdclk.actual); } static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) From 1a5301a58e5fca15598a1b3e79706b522d763574 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Jan 2017 21:57:19 +0200 Subject: [PATCH 0120/1299] drm/i915: Move PFI credit reprogramming into vlv/chv_set_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the vlv_program_pfi_credits() into vlv_set_cdclk() and chv_set_cdclk() so that we can neuter vlv_modeset_commit_cdclk(). v2: Do the PFI programming after cdclk readout since it currently depends on the readout to fill dev_priv->cdclk.hw Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170126195719.309-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index d0dc6f94277c..305c07820cae 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -547,6 +547,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->sb_lock); intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); } static void chv_set_cdclk(struct drm_i915_private *dev_priv, @@ -586,6 +588,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->rps.hw_lock); intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); } static int bdw_calc_cdclk(int max_pixclk) @@ -1524,7 +1528,6 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) else vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); - vlv_program_pfi_credits(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); } From 63ff30442519bb40307f940c5a89a95ca904723a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 20 Jan 2017 20:22:03 +0200 Subject: [PATCH 0121/1299] drm/i915: Nuke the VLV/CHV PFI programming power domain workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hack to grab the pipe A power domain around VLV/CHV cdclk programming has surely outlived its usefulness. We should be holding sufficient power domains during any modeset, so let's just nuke this hack. v2: Fix typo in commit message (Ander) Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-13-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 305c07820cae..1d4799124e39 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1512,24 +1512,10 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - /* - * FIXME: We can end up here with all power domains off, yet - * with a CDCLK frequency other than the minimum. To account - * for this take the PIPE-A power domain, which covers the HW - * blocks needed for the following programming. This can be - * removed once it's guaranteed that we get here either with - * the minimum CDCLK set, or the required power domains - * enabled. - */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - if (IS_CHERRYVIEW(dev_priv)) chv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); else vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); - - - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); } static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) From b0587e4d20dcf8af3e350b010b4d2a21f95f2702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Jan 2017 21:52:01 +0200 Subject: [PATCH 0122/1299] drm/i915: Replace the .modeset_commit_cdclk() hook with a more direct .set_cdclk() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the cdclk state, all the .modeset_commit_cdclk() hooks are now pointless wrappers. Let's replace them with just a .set_cdclk() function pointer. However let's wrap that in a small helper that does the state comparison and prints a unified debug message across all platforms. We didn't even have the debug print on all platforms previously. This reduces the clutter in intel_atomic_commit_tail() a little bit. v2: Wrap .set_cdclk() in intel_set_cdclk() v3: Add kernel-docs v4: Deal with IS_GEN9_BC() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170126195201.32638-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/intel_cdclk.c | 79 ++++++++++++---------------- drivers/gpu/drm/i915/intel_display.c | 5 +- drivers/gpu/drm/i915/intel_drv.h | 2 + 4 files changed, 38 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 323bf93c3e92..7fb362f1fba5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -605,6 +605,8 @@ struct intel_cdclk_state; struct drm_i915_display_funcs { void (*get_cdclk)(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state); + void (*set_cdclk)(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, @@ -619,7 +621,6 @@ struct drm_i915_display_funcs { int (*compute_global_watermarks)(struct drm_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct drm_atomic_state *state); - void (*modeset_commit_cdclk)(struct drm_atomic_state *state); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 1d4799124e39..d643c0c5321b 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -907,9 +907,6 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, WARN_ON((cdclk == 24000) != (vco == 0)); - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", - cdclk, vco); - mutex_lock(&dev_priv->rps.hw_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, @@ -1226,9 +1223,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", - cdclk, vco); - /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { case 8: @@ -1414,6 +1408,30 @@ bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, return memcmp(a, b, sizeof(*a)) == 0; } +/** + * intel_set_cdclk - Push the CDCLK state to the hardware + * @dev_priv: i915 device + * @cdclk_state: new CDCLK state + * + * Program the hardware based on the passed in CDCLK state, + * if necessary. + */ +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + if (intel_cdclk_state_compare(&dev_priv->cdclk.hw, cdclk_state)) + return; + + if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) + return; + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz\n", + cdclk_state->cdclk, cdclk_state->vco, + cdclk_state->ref); + + dev_priv->display.set_cdclk(dev_priv, cdclk_state); +} + static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, int pixel_rate) { @@ -1508,16 +1526,6 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - if (IS_CHERRYVIEW(dev_priv)) - chv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); - else - vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); @@ -1551,13 +1559,6 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - bdw_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { struct intel_atomic_state *intel_state = to_intel_atomic_state(state); @@ -1597,13 +1598,6 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - skl_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); @@ -1648,13 +1642,6 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - bxt_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) { int max_cdclk_freq = dev_priv->max_cdclk_freq; @@ -1834,24 +1821,24 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv) */ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - vlv_modeset_commit_cdclk; + if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = chv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = vlv_set_cdclk; dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; } else if (IS_BROADWELL(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bdw_modeset_commit_cdclk; + dev_priv->display.set_cdclk = bdw_set_cdclk; dev_priv->display.modeset_calc_cdclk = bdw_modeset_calc_cdclk; } else if (IS_GEN9_LP(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bxt_modeset_commit_cdclk; + dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; } else if (IS_GEN9_BC(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - skl_modeset_commit_cdclk; + dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c0ad2ddaed9b..ff764878dd6b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12867,10 +12867,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); - if (dev_priv->display.modeset_commit_cdclk && - !intel_cdclk_state_compare(&dev_priv->cdclk.hw, - &dev_priv->cdclk.actual)) - dev_priv->display.modeset_commit_cdclk(state); + intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual); /* * SKL workaround: bspec recommends we disable the SAGV when we diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 97d848197ff3..41fc4274afa4 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1257,6 +1257,8 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); From ceb993201cf270a178671611a15a3b2037434ef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 20 Jan 2017 20:22:05 +0200 Subject: [PATCH 0123/1299] drm/i915: Move ilk_pipe_pixel_rate() to intel_display.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move ilk_pipe_pixel_rate() next to its only caller (intel_crtc_compute_pixel_rate()). Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-15-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 35 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_pm.c | 33 -------------------------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ff764878dd6b..f6259c949da2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6235,6 +6235,41 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } +static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +{ + uint32_t pixel_rate; + + pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; + + /* + * We only use IF-ID interlacing. If we ever use + * PF-ID we'll need to adjust the pixel_rate here. + */ + + if (pipe_config->pch_pfit.enabled) { + uint64_t pipe_w, pipe_h, pfit_w, pfit_h; + uint32_t pfit_size = pipe_config->pch_pfit.size; + + pipe_w = pipe_config->pipe_src_w; + pipe_h = pipe_config->pipe_src_h; + + pfit_w = (pfit_size >> 16) & 0xFFFF; + pfit_h = pfit_size & 0xFFFF; + if (pipe_w < pfit_w) + pipe_w = pfit_w; + if (pipe_h < pfit_h) + pipe_h = pfit_h; + + if (WARN_ON(!pfit_w || !pfit_h)) + return pixel_rate; + + pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, + pfit_w * pfit_h); + } + + return pixel_rate; +} + static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 41fc4274afa4..321ba45967aa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1822,7 +1822,6 @@ bool skl_wm_level_equals(const struct skl_wm_level *l1, bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); static inline int intel_enable_rc6(void) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b1d07e63ff8b..c0b0f5a4b9f1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1714,39 +1714,6 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(FW_BLC, fwater_lo); } -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) -{ - uint32_t pixel_rate; - - pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; - - /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to - * adjust the pixel_rate here. */ - - if (pipe_config->pch_pfit.enabled) { - uint64_t pipe_w, pipe_h, pfit_w, pfit_h; - uint32_t pfit_size = pipe_config->pch_pfit.size; - - pipe_w = pipe_config->pipe_src_w; - pipe_h = pipe_config->pipe_src_h; - - pfit_w = (pfit_size >> 16) & 0xFFFF; - pfit_h = pfit_size & 0xFFFF; - if (pipe_w < pfit_w) - pipe_w = pfit_w; - if (pipe_h < pfit_h) - pipe_h = pfit_h; - - if (WARN_ON(!pfit_w || !pfit_h)) - return pixel_rate; - - pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, - pfit_w * pfit_h); - } - - return pixel_rate; -} - /* latency must be in 0.1us units. */ static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) { From 519d52498156b07c4bc69ad28ab8f784cc124628 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 10:47:10 +0000 Subject: [PATCH 0124/1299] drm/i915: i915_gem_shrink_all() needs an awake device Since to unbind an object, we may need a powered up device to access the GTT entries, we only shrink bound objects if awake. Callers to i915_gem_shrink_all() had to take this into account and take the rpm wakeref, but we can move this wakeref into the shrink_all itself for convenience and making the function live up to its name. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170208104710.18089-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 4 ---- drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d1841bc1cb50..ee62f66ea540 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4648,14 +4648,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink_all(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 401006b4c6a3..f249a1eb46ac 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -259,10 +259,13 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { unsigned long freed; + intel_runtime_pm_get(dev_priv); freed = i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); + intel_runtime_pm_put(dev_priv); + rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ return freed; @@ -380,9 +383,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) return NOTIFY_DONE; - intel_runtime_pm_get(dev_priv); freed_pages = i915_gem_shrink_all(dev_priv); - intel_runtime_pm_put(dev_priv); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not From 20a8a74ad9473e498da47e3094fa73057f3246d1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 14:30:31 +0000 Subject: [PATCH 0125/1299] drm/i915: Move calling engine->init_hw() to its own function Just a simple refactor to move a loop and some support code out of i915_gem_init_hw(). This is in preparation for avoiding a race between the tasklet writing to ELSP whilst simultaneously being written by engine->init_hw() following a GPU reset. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ee62f66ea540..85251964ed8d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4367,11 +4367,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -int -i915_gem_init_hw(struct drm_i915_private *dev_priv) +static int __i915_gem_restart_engines(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; + + for_each_engine(engine, i915, id) { + err = engine->init_hw(engine); + if (err) + return err; + } + + return 0; +} + +int i915_gem_init_hw(struct drm_i915_private *dev_priv) +{ int ret; dev_priv->gt.last_init_time = ktime_get(); @@ -4417,11 +4430,9 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv) } /* Need to do basic initialisation of all rings first: */ - for_each_engine(engine, dev_priv, id) { - ret = engine->init_hw(engine); - if (ret) - goto out; - } + ret = __i915_gem_restart_engines(dev_priv); + if (ret) + goto out; intel_mocs_init_l3cc_table(dev_priv); From d80270931314a88d79d9bd5e0a5df93c12196375 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 14:30:32 +0000 Subject: [PATCH 0126/1299] drm/i915: Split GEM resetting into 3 phases Currently we do a reset prepare/finish around the call to reset the GPU, but it looks like we need a later stage after the hw has been reinitialised to allow GEM to restart itself. Start by splitting the 2 GEM phases into 3: prepare - before the reset, check if GEM recovered, then stop GEM reset - after the reset, update GEM bookkeeping finish - after the re-initialisation following the reset, restart GEM Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 78191e00a00c..d48c02a7933f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1836,7 +1836,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); + i915_gem_reset(dev_priv); intel_overlay_reset(dev_priv); /* Ok, now get things going again... */ @@ -1859,6 +1859,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } + i915_gem_reset_finish(dev_priv); i915_queue_hangcheck(dev_priv); wakeup: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7fb362f1fba5..604b20ff0390 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3362,6 +3362,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) } int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85251964ed8d..e8129935d78b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2758,7 +2758,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->reset_hw(engine, request); } -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -2780,6 +2780,11 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } +void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->drm.struct_mutex); +} + static void nop_submit_request(struct drm_i915_gem_request *request) { dma_fence_set_error(&request->fence, -EIO); From 1f7b847d72c3583df5048d83bd945d0c2c524c28 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 14:30:33 +0000 Subject: [PATCH 0127/1299] drm/i915: Disable engine->irq_tasklet around resets When we restart the engines, and we have active requests, a request on the first engine may complete and queue a request to the second engine before we try to restart the second engine. That queueing of the request may race with the engine to restart, and so may corrupt the current state. Disabling the engine->irq_tasklet prevents the two paths from writing into ELSP simultaneously (and modifyin the execlists_port[] at the same time). Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Testcase: igt/gem_exec_fence/await-hang Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e8129935d78b..239e5144dbda 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2643,6 +2643,15 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its engine->irq_tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the engine->irq_tasklet until the reset is over + * prevents the race. + */ + tasklet_disable(&engine->irq_tasklet); tasklet_kill(&engine->irq_tasklet); if (engine_stalled(engine)) { @@ -2782,7 +2791,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) void i915_gem_reset_finish(struct drm_i915_private *dev_priv) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + for_each_engine(engine, dev_priv, id) + tasklet_enable(&engine->irq_tasklet); } static void nop_submit_request(struct drm_i915_gem_request *request) From ca4c38909fabb1bf18e2cb805569c9559d7a56dc Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 3 Feb 2017 16:03:13 +0200 Subject: [PATCH 0128/1299] drm/i915: Remove WA for swapped HPD pins in broxton A stepping Remove workaround for swapped HPD pins in broxton A stepping, which is pre-production hardware. Cc: Jani Nikula Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170203140316.20792-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 9 +-------- drivers/gpu/drm/i915/intel_dp.c | 2 -- drivers/gpu/drm/i915/intel_hdmi.c | 9 +-------- 3 files changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index d957211c76ad..b5b3065cb60c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2264,14 +2264,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) goto err; intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && port == PORT_B) - dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port; - else - dev_priv->hotplug.irq_port[port] = intel_dig_port; + dev_priv->hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder->type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0f14e97e519b..dee5fc758109 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5995,8 +5995,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, break; case PORT_B: intel_encoder->hpd_pin = HPD_PORT_B; - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index af16b0fa6b69..6c83442e2152 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1868,14 +1868,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, switch (port) { case PORT_B: - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - else - intel_encoder->hpd_pin = HPD_PORT_B; + intel_encoder->hpd_pin = HPD_PORT_B; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; From b71953a16da6fd2df2cb73824064f885e9a582d0 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 3 Feb 2017 16:03:14 +0200 Subject: [PATCH 0129/1299] drm/i915/dp: Move initialization of hpd_pin to a new function This shaves a few lines from intel_dp_init_connector() and will serve as a good place to add other port specific information in a follow up patch. While at it, convert BUG() to MISSING_CASE() in the default case. v2: s/BUG/MISSING_CASE. (Chris) Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170203140316.20792-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 48 +++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index dee5fc758109..fa77e96c49c7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5903,6 +5903,33 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, return false; } +static void +intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) +{ + struct intel_encoder *encoder = &intel_dig_port->base; + + /* Set up the hotplug pin. */ + switch (intel_dig_port->port) { + case PORT_A: + encoder->hpd_pin = HPD_PORT_A; + break; + case PORT_B: + encoder->hpd_pin = HPD_PORT_B; + break; + case PORT_C: + encoder->hpd_pin = HPD_PORT_C; + break; + case PORT_D: + encoder->hpd_pin = HPD_PORT_D; + break; + case PORT_E: + encoder->hpd_pin = HPD_PORT_E; + break; + default: + MISSING_CASE(intel_dig_port->port); + } +} + bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) @@ -5988,26 +6015,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, else intel_connector->get_hw_state = intel_connector_get_hw_state; - /* Set up the hotplug pin. */ - switch (port) { - case PORT_A: - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_B: - intel_encoder->hpd_pin = HPD_PORT_B; - break; - case PORT_C: - intel_encoder->hpd_pin = HPD_PORT_C; - break; - case PORT_D: - intel_encoder->hpd_pin = HPD_PORT_D; - break; - case PORT_E: - intel_encoder->hpd_pin = HPD_PORT_E; - break; - default: - BUG(); - } + intel_dp_init_connector_port_info(intel_dig_port); /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && From c282222a45cb9503cbfbebfdb60491f06ae84b49 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Feb 2017 11:52:29 +0100 Subject: [PATCH 0130/1299] xfrm: policy: init locks early Dmitry reports following splat: INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 13059 Comm: syz-executor1 Not tainted 4.10.0-rc7-next-20170207 #1 [..] spin_lock_bh include/linux/spinlock.h:304 [inline] xfrm_policy_flush+0x32/0x470 net/xfrm/xfrm_policy.c:963 xfrm_policy_fini+0xbf/0x560 net/xfrm/xfrm_policy.c:3041 xfrm_net_init+0x79f/0x9e0 net/xfrm/xfrm_policy.c:3091 ops_init+0x10a/0x530 net/core/net_namespace.c:115 setup_net+0x2ed/0x690 net/core/net_namespace.c:291 copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396 create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106 unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205 SYSC_unshare kernel/fork.c:2281 [inline] Problem is that when we get error during xfrm_net_init we will call xfrm_policy_fini which will acquire xfrm_policy_lock before it was initialized. Just move it around so locks get set up first. Reported-by: Dmitry Vyukov Fixes: 283bc9f35bbbcb0e9 ("xfrm: Namespacify xfrm state/policy locks") Signed-off-by: Florian Westphal Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 177e208e8ff5..3c8f5b70abf8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3062,6 +3062,11 @@ static int __net_init xfrm_net_init(struct net *net) { int rv; + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + spin_lock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; @@ -3078,11 +3083,6 @@ static int __net_init xfrm_net_init(struct net *net) if (rv < 0) goto out; - /* Initialize the per-net locks here */ - spin_lock_init(&net->xfrm.xfrm_state_lock); - spin_lock_init(&net->xfrm.xfrm_policy_lock); - mutex_init(&net->xfrm.xfrm_cfg_mutex); - return 0; out: From e81ecb5e31db6c2a259d694738cf620d9fa70861 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 8 Feb 2017 21:03:33 +0800 Subject: [PATCH 0131/1299] drm/i915: A hotfix for making aliasing PPGTT work for GVT-g This patch makes PPGTT page table non-shrinkable when using aliasing PPGTT mode. It's just a temporary solution for making GVT-g work. Fixes: 2ce5179fe826 ("drm/i915/gtt: Free unused lower-level page tables") Cc: Tvrtko Ursulin Cc: Michal Winiarski Cc: Michel Thierry Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Daniel Vetter Cc: Zhenyu Wang Cc: Zhiyuan Lv Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/1486559013-25251-2-git-send-email-zhi.a.wang@intel.com Reviewed-by: Chris Wilson Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 850d40ec77af..90acddbaaa5e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -755,9 +755,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, GEM_BUG_ON(pte_end > GEN8_PTES); bitmap_clear(pt->used_ptes, pte, num_entries); - - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) - return true; + if (USES_FULL_PPGTT(vm->i915)) { + if (bitmap_empty(pt->used_ptes, GEN8_PTES)) + return true; + } pt_vaddr = kmap_px(pt); From 969bb72cbfd906d347cf76dc9b8c8dbaf83ba27a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 18:12:38 +0000 Subject: [PATCH 0132/1299] drm/i915: Check for timeout completion when waiting for the rq to submitted We first wait for a request to be submitted to hw and assigned a seqno, before we can wait for the hw to signal completion (otherwise we don't know the hw id we need to wait upon). Whilst waiting for the request to be submitted, we may exceed the user's timeout and need to propagate the error back. v2: Make ETIME into an error from wait_for_execute for consistent exit handling. Reported-by: Tvrtko Ursulin Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Testcase: igt/gem_wait/basic-await Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170208181238.7232-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 72b7f7d9461d..f31deeb72703 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1025,8 +1025,13 @@ __i915_request_wait_for_execute(struct drm_i915_gem_request *request, break; } + if (!timeout) { + timeout = -ETIME; + break; + } + timeout = io_schedule_timeout(timeout); - } while (timeout); + } while (1); finish_wait(&request->execute.wait, &wait); if (flags & I915_WAIT_LOCKED) From e1cc3db020c7ef864b7aa02d5e193a110f8f3450 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Feb 2017 11:19:33 +0000 Subject: [PATCH 0133/1299] drm/i915: Assert that we never create a vma for the aliasing_ppgtt The aliasing_ppgtt is just a container for the HW context that mirrors the global gtt. It should never be used directly, so assert if we make the mistake of trying to allocate a VMA for it. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170209111933.12420-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 155906e84812..e75494c1ef52 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -78,6 +78,9 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; int i; + /* The aliasing_ppgtt should never be used directly! */ + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + vma = kmem_cache_zalloc(vm->i915->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); From 6d1f9fb3120c9865c28f56ceba8d706a9ecb4b56 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 9 Feb 2017 13:34:25 +0200 Subject: [PATCH 0134/1299] drm/i915: Add __destroy_hw_context __create_hw_context can use a good counterpart. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1486640065-13695-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 680105421bb9..0c9acd29c4d1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -351,6 +351,13 @@ __create_hw_context(struct drm_i915_private *dev_priv, return ERR_PTR(ret); } +static void __destroy_hw_context(struct i915_gem_context *ctx, + struct drm_i915_file_private *file_priv) +{ + idr_remove(&file_priv->context_idr, ctx->user_handle); + context_close(ctx); +} + /** * The default context needs to exist per ring that uses contexts. It stores the * context state of the GPU for applications that don't utilize HW contexts, as @@ -375,8 +382,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); return ERR_CAST(ppgtt); } @@ -1038,8 +1044,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return PTR_ERR(ctx); } - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); mutex_unlock(&dev->struct_mutex); DRM_DEBUG("HW context %d destroyed\n", args->ctx_id); From 8d2b47dde8a097e6fef2ebb5042cbb267cc75adf Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 2 Feb 2017 08:41:42 +0100 Subject: [PATCH 0135/1299] drm/i915: Enable atomic support by default on supported platforms. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915 is pretty much feature complete. Support for atomic i915-specific connector properties is still missing; those properties can (for now) only be set through the legacy ioctl. ILK style watermarks and gen9+ watermarks are handled atomically, and nonblocking modesets work. FBC has also been made to work with atomic. gen4x- and vlv/chv watermarks still need to be fixed, so disable atomic by default there for now. Flip the switch!! Signed-off-by: Maarten Lankhorst Cc: Ander Conselvan de Oliveira Cc: Chris Wilson Cc: Daniel Vetter Cc: Lyude Cc: Matt Roper Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1486021302-24910-1-git-send-email-maarten.lankhorst@linux.intel.com [mlankhorst: Fix checkpatch warning about extra space in match_info] Acked-by: Daniel Stone Reviewed-by: Lyude --- drivers/gpu/drm/i915/i915_drv.c | 10 +++++++--- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d48c02a7933f..a17dde86dc8c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1214,11 +1214,15 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) { + const struct intel_device_info *match_info = + (struct intel_device_info *)ent->driver_data; struct drm_i915_private *dev_priv; int ret; - if (i915.nuclear_pageflip) - driver.driver_features |= DRIVER_ATOMIC; + /* Enable nuclear pageflip on ILK+, except vlv/chv */ + if (!i915.nuclear_pageflip && + (match_info->gen < 5 || match_info->has_gmch_display)) + driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); @@ -2624,7 +2628,7 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER | DRIVER_MODESET, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, .open = i915_driver_open, .lastclose = i915_driver_lastclose, .preclose = i915_driver_preclose, diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index c2679fa7ed11..2e9645e6555a 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -205,9 +205,9 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600); MODULE_PARM_DESC(verbose_state_checks, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); -module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600); +module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400); MODULE_PARM_DESC(nuclear_pageflip, - "Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false)."); + "Force enable atomic functionality on platforms that don't have full support yet."); /* WA to get away with the default setting in VBT for early platforms.Will be removed */ module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400); From 949e8ab3a94befd514eb79f2535f6017d8b4488f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Feb 2017 14:40:36 +0000 Subject: [PATCH 0136/1299] drm/i915: Use the size/type of address space to make decisions Once the address space has been created (using 3 or 4 levels of page tables), we should use that to program the appropriate type into the contexts. This gives us the flexibility to handle different types of address spaces at runtime. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Matthew Auld Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170209144036.23664-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_context.c | 20 +++++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ drivers/gpu/drm/i915/i915_reg.h | 3 --- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0c9acd29c4d1..fb11b674f319 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -236,16 +236,20 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } -static u32 default_desc_template(const struct drm_i915_private *dev_priv) +static u32 default_desc_template(const struct drm_i915_private *i915, + const struct i915_hw_ppgtt *ppgtt) { + u32 address_mode; u32 desc; - desc = GEN8_CTX_VALID | - GEN8_CTX_PRIVILEGE | - GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - if (IS_GEN8(dev_priv)) + address_mode = INTEL_LEGACY_32B_CONTEXT; + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) + address_mode = INTEL_LEGACY_64B_CONTEXT; + desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN8(i915)) desc |= GEN8_CTX_L3LLC_COHERENT; /* TODO: WaDisableLiteRestore when we start using semaphore @@ -329,7 +333,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = default_desc_template(dev_priv); + ctx->desc_template = + default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not @@ -387,6 +392,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, } ctx->ppgtt = ppgtt; + ctx->desc_template = default_desc_template(dev_priv, ppgtt); } trace_i915_context_create(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3c5ef5358cef..7e678ce5a9c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -525,6 +525,12 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } +static inline bool +i915_vm_is_48bit(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bd2ff1f87f32..ad666933a88d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3373,9 +3373,6 @@ enum { #define GEN8_CTX_L3LLC_COHERENT (1<<5) #define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - INTEL_LEGACY_64B_CONTEXT : \ - INTEL_LEGACY_32B_CONTEXT) #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e42990b56fa8..b21dbd44045e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -332,7 +332,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) execlists_update_context_pdps(ppgtt, reg_state); return ce->lrc_desc; @@ -1447,7 +1447,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * not needed in 48-bit.*/ if (req->ctx->ppgtt && (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { - if (!USES_FULL_48BIT_PPGTT(req->i915) && + if (!i915_vm_is_48bit(&req->ctx->ppgtt->base) && !intel_vgpu_active(req->i915)) { ret = intel_logical_ring_emit_pdps(req); if (ret) @@ -2045,7 +2045,7 @@ static void execlists_init_reg_state(u32 *reg_state, ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); - if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. From d8fc70b7367b86ca14e825f8508f91bbbf237f3c Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 9 Feb 2017 11:31:21 +0200 Subject: [PATCH 0137/1299] drm/i915: Make power domain masks 64 bit long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are currently 30 power domains, which puts us pretty close to the limit with 32 bit masks. Prepare for the future and increase the limit to 64 bit. v2: Rebase v3: s/unsigned long long/u64/ (Joonas) Allow the 64th bit of the mask to be used. (Joonas) Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170209093121.24410-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/intel_display.c | 34 +-- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 378 ++++++++++++------------ 4 files changed, 209 insertions(+), 209 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 604b20ff0390..03573f084749 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -494,7 +494,7 @@ struct i915_hotplug { #define for_each_power_domain(domain, mask) \ for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - for_each_if ((1 << (domain)) & (mask)) + for_each_if (BIT_ULL(domain) & (mask)) struct drm_i915_private; struct i915_mm_struct; @@ -1405,7 +1405,7 @@ struct i915_power_well { int count; /* cached hw enabled state */ bool hw_enabled; - unsigned long domains; + u64 domains; /* unique identifier for this power well */ unsigned long id; /* diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f6259c949da2..ab78df9191f9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5666,15 +5666,15 @@ intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder) } } -static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state) +static u64 get_crtc_power_domains(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned long mask; + u64 mask; enum transcoder transcoder = crtc_state->cpu_transcoder; if (!crtc_state->base.active) @@ -5684,19 +5684,19 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); if (crtc_state->pch_pfit.enabled || crtc_state->pch_pfit.force_thru) - mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); + mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); - mask |= BIT(intel_display_port_power_domain(intel_encoder)); + mask |= BIT_ULL(intel_display_port_power_domain(intel_encoder)); } if (HAS_DDI(dev_priv) && crtc_state->has_audio) mask |= BIT(POWER_DOMAIN_AUDIO); if (crtc_state->shared_dpll) - mask |= BIT(POWER_DOMAIN_PLLS); + mask |= BIT_ULL(POWER_DOMAIN_PLLS); return mask; } @@ -5708,7 +5708,7 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum intel_display_power_domain domain; - unsigned long domains, new_domains, old_domains; + u64 domains, new_domains, old_domains; old_domains = intel_crtc->enabled_power_domains; intel_crtc->enabled_power_domains = new_domains = @@ -5723,7 +5723,7 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc, } static void modeset_put_power_domains(struct drm_i915_private *dev_priv, - unsigned long domains) + u64 domains) { enum intel_display_power_domain domain; @@ -8992,7 +8992,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9034,7 +9034,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); @@ -9043,7 +9043,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9061,7 +9061,7 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); /* * The PLL needs to be enabled with a valid divider @@ -9136,13 +9136,13 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; - unsigned long power_domain_mask; + u64 power_domain_mask; bool active; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - power_domain_mask = BIT(power_domain); + power_domain_mask = BIT_ULL(power_domain); pipe_config->shared_dpll = NULL; @@ -9176,7 +9176,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { - power_domain_mask |= BIT(power_domain); + power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) skylake_get_pfit_config(crtc, pipe_config); else @@ -12841,7 +12841,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; bool hw_check = intel_state->modeset; - unsigned long put_domains[I915_MAX_PIPES] = {}; + u64 put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; @@ -15565,7 +15565,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev) ilk_wm_get_hw_state(dev); for_each_intel_crtc(dev, crtc) { - unsigned long put_domains; + u64 put_domains; put_domains = modeset_get_crtc_power_domains(&crtc->base, crtc->config); if (WARN_ON(put_domains)) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 321ba45967aa..1298aad7d832 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -715,7 +715,7 @@ struct intel_crtc { bool active; bool lowfreq_avail; u8 plane_ids_mask; - unsigned long enabled_power_domains; + unsigned long long enabled_power_domains; struct intel_overlay *overlay; struct intel_flip_work *flip_work; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0f00a5aab69c..879567987201 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -210,7 +210,7 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, is_enabled = true; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { + for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) { if (power_well->always_on) continue; @@ -385,124 +385,124 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, } #define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static void assert_can_enable_dc9(struct drm_i915_private *dev_priv) { @@ -1251,7 +1251,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } -#define POWER_DOMAIN_MASK (GENMASK(POWER_DOMAIN_NUM - 1, 0)) +#define POWER_DOMAIN_MASK (GENMASK_ULL(POWER_DOMAIN_NUM - 1, 0)) static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id) @@ -1697,7 +1697,7 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, struct i915_power_well *power_well; int i; - for_each_power_well(i, power_well, BIT(domain), power_domains) + for_each_power_well(i, power_well, BIT_ULL(domain), power_domains) intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -1792,7 +1792,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_display_power_domain_str(domain)); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) + for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); @@ -1801,117 +1801,117 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, } #define HSW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BDW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, @@ -2388,7 +2388,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915.enable_dc); - BUILD_BUG_ON(POWER_DOMAIN_NUM > 31); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); From 370a81fb89cbb1a7f3ef119f84b3bd6d0ffebcf6 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 13 Jan 2017 14:20:32 +0200 Subject: [PATCH 0138/1299] drm/i915: Remove unused function intel_ddi_get_link_dpll() The function intel_ddi_get_link_dpll() was added in f169660ed4e5 ("drm/i915/dp: Add a standalone function to obtain shared dpll for HSW/BDW/SKL/BXT") to "allow for the implementation of a platform neutral upfront link training function", but such implementation never landed. So remove that function and clean up the exported shared DPLL interface. Fixes: f169660ed4e5 ("drm/i915/dp: Add a standalone function to obtain shared dpll for HSW/BDW/SKL/BXT") Cc: Durgadoss R Cc: Manasi Navare Cc: Jim Bride Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484310032-1863-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 39 -------------------- drivers/gpu/drm/i915/intel_dpll_mgr.c | 52 +++++---------------------- drivers/gpu/drm/i915/intel_dpll_mgr.h | 16 --------- drivers/gpu/drm/i915/intel_drv.h | 2 -- 4 files changed, 8 insertions(+), 101 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b5b3065cb60c..cd6fedd229a0 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2116,45 +2116,6 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } -struct intel_shared_dpll * -intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) -{ - struct intel_connector *connector = intel_dp->attached_connector; - struct intel_encoder *encoder = connector->encoder; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_shared_dpll *pll = NULL; - struct intel_shared_dpll_state tmp_pll_state; - enum intel_dpll_id dpll_id; - - if (IS_GEN9_LP(dev_priv)) { - dpll_id = (enum intel_dpll_id)dig_port->port; - /* - * Select the required PLL. This works for platforms where - * there is no shared DPLL. - */ - pll = &dev_priv->shared_dplls[dpll_id]; - if (WARN_ON(pll->active_mask)) { - - DRM_ERROR("Shared DPLL in use. active_mask:%x\n", - pll->active_mask); - return NULL; - } - tmp_pll_state = pll->state; - if (!bxt_ddi_dp_set_dpll_hw_state(clock, - &pll->state.hw_state)) { - DRM_ERROR("Could not setup DPLL\n"); - pll->state = tmp_pll_state; - return NULL; - } - } else if (IS_GEN9_BC(dev_priv)) { - pll = skl_find_link_pll(dev_priv, clock); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - pll = hsw_ddi_dp_get_dpll(encoder, clock); - } - return pll; -} - void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 79f656ca593d..b4de632f1158 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -42,44 +42,6 @@ * commit phase. */ -struct intel_shared_dpll * -skl_find_link_pll(struct drm_i915_private *dev_priv, int clock) -{ - struct intel_shared_dpll *pll = NULL; - struct intel_dpll_hw_state dpll_hw_state; - enum intel_dpll_id i; - bool found = false; - - if (!skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) - return pll; - - for (i = DPLL_ID_SKL_DPLL1; i <= DPLL_ID_SKL_DPLL3; i++) { - pll = &dev_priv->shared_dplls[i]; - - /* Only want to check enabled timings first */ - if (pll->state.crtc_mask == 0) - continue; - - if (memcmp(&dpll_hw_state, &pll->state.hw_state, - sizeof(pll->state.hw_state)) == 0) { - found = true; - break; - } - } - - /* Ok no matching timings, maybe there's a free one? */ - for (i = DPLL_ID_SKL_DPLL1; - ((found == false) && (i <= DPLL_ID_SKL_DPLL3)); i++) { - pll = &dev_priv->shared_dplls[i]; - if (pll->state.crtc_mask == 0) { - pll->state.hw_state = dpll_hw_state; - break; - } - } - - return pll; -} - static void intel_atomic_duplicate_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll_state *shared_dpll) @@ -811,8 +773,8 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, return pll; } -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock) +static struct intel_shared_dpll * +hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, int clock) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_shared_dpll *pll; @@ -1360,8 +1322,9 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, } -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +skl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { uint32_t ctrl1; @@ -1816,8 +1779,9 @@ static bool bxt_ddi_set_dpll_hw_state(int clock, return true; } -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +bxt_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { struct bxt_clk_div clk_div = {0}; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index af1497eb4f9c..f8d13a947c13 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -282,20 +282,4 @@ void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state); -/* BXT dpll related functions */ -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); - - -/* SKL dpll related functions */ -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); -struct intel_shared_dpll *skl_find_link_pll(struct drm_i915_private *dev_priv, - int clock); - - -/* HSW dpll related functions */ -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock); - #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1298aad7d832..7845d40f203e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1232,8 +1232,6 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -struct intel_shared_dpll *intel_ddi_get_link_dpll(struct intel_dp *intel_dp, - int clock); unsigned int intel_fb_align_height(struct drm_device *dev, unsigned int height, uint32_t pixel_format, From 72b72ae47324a0b9851e254bb525d1dc47995e4e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 10:14:22 +0000 Subject: [PATCH 0139/1299] drm/i915: Always pin contexts into the high GGTT Now that we have fast top-down insertion into the drm_mm, we can use it for frequent runtime operations like insertion of the context object, whereas before we limited it to the one-off insertion of the pinned kernel context. Keeping the active context objects out of the mappable region of the global GTT (except under memory pressure) improves our ability to allocate mappable aperture region without triggering a GPU stall. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210101422.1598-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 4 +--- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 +++--------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b21dbd44045e..697776d427b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -773,11 +773,9 @@ static int execlists_context_pin(struct intel_engine_cs *engine, } GEM_BUG_ON(!ce->state); - flags = PIN_GLOBAL; + flags = PIN_GLOBAL | PIN_HIGH; if (ctx->ggtt_offset_bias) flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - if (i915_gem_context_is_kernel(ctx)) - flags |= PIN_HIGH; ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d3d1e64f2498..8ae78b79178f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2004,7 +2004,7 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct i915_gem_context *ctx, unsigned int flags) +static int context_pin(struct i915_gem_context *ctx) { struct i915_vma *vma = ctx->engine[RCS].state; int ret; @@ -2019,7 +2019,7 @@ static int context_pin(struct i915_gem_context *ctx, unsigned int flags) return ret; } - return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); + return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | PIN_HIGH); } static int intel_ring_context_pin(struct intel_engine_cs *engine, @@ -2034,13 +2034,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; if (ce->state) { - unsigned int flags; - - flags = 0; - if (i915_gem_context_is_kernel(ctx)) - flags = PIN_HIGH; - - ret = context_pin(ctx, flags); + ret = context_pin(ctx); if (ret) goto error; } From 4f4631af8faf791a60264cf028a16d61804414c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 13:36:32 +0000 Subject: [PATCH 0140/1299] drm/i915/byt: Take powerwell for reading PIPESTAT in debugfs [12493.693827] WARNING: CPU: 1 PID: 14860 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.693868] Unclaimed read from register 0x1f0024 [12493.693905] Modules linked in: vgem i915 drm_kms_helper drm intel_gtt i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops prime_numbers intel_powerclamp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel cryptd lpc_ich i2c_i801 mfd_core video i2c_designware_platform i2c_designware_core i2c_core button autofs4 sd_mod ahci libahci libata scsi_mod [last unloaded: i915] [12493.694039] CPU: 1 PID: 14860 Comm: intel-gpu-overl Tainted: G U 4.10.0-rc7+ #11 [12493.694079] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F8 03/02/2016 [12493.694121] Call Trace: [12493.694169] dump_stack+0x67/0x9d [12493.694235] __warn+0x117/0x140 [12493.694288] warn_slowpath_fmt+0x4f/0x60 [12493.694344] ? do_raw_spin_lock+0x116/0x180 [12493.694533] ? check_for_unclaimed_mmio+0x98/0xe0 [i915] [12493.694727] __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.694923] fwtable_read32+0x2c5/0x330 [i915] [12493.695108] i915_interrupt_info+0xd52/0xf80 [i915] [12493.695302] ? gen6_write16+0x310/0x310 [i915] [12493.695357] seq_read+0x187/0x710 [12493.695412] full_proxy_read+0x75/0xc0 [12493.695472] __vfs_read+0x5a/0x220 [12493.695524] ? kmem_cache_free+0x6c/0x260 [12493.695577] ? putname+0x97/0xa0 [12493.695629] ? putname+0x97/0xa0 [12493.695682] ? rcu_read_lock_sched_held+0xb8/0xd0 [12493.695735] ? rw_verify_area+0x65/0x140 [12493.695787] vfs_read+0xd1/0x1f0 [12493.695840] SyS_read+0x62/0xc0 [12493.695893] entry_SYSCALL_64_fastpath+0x1c/0xb1 [12493.695943] RIP: 0033:0x7f82dca99ba0 [12493.695985] RSP: 002b:00007ffc0bdfd4f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [12493.696031] RAX: ffffffffffffffda RBX: 00007ffc0be005a0 RCX: 00007f82dca99ba0 [12493.696073] RDX: 0000000000001fff RSI: 00007ffc0bdfd500 RDI: 000000000000001a [12493.696115] RBP: ffffffff810fb639 R08: 302f6972642f6775 R09: 00007f82dca0999a [12493.696157] R10: 00007f82dcd62760 R11: 0000000000000246 R12: ffff880069a17f98 [12493.696199] R13: 00007ffc0bdfd428 R14: 0000000000000003 R15: 00007ffc0bdfd428 [12493.696250] ? trace_hardirqs_off_caller+0xd9/0x130 [12493.696300] ---[ end trace 52ccf4d39793cc59 ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99761 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210133632.16946-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1ccc2978a21a..641527701123 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -875,10 +875,22 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IIR_RW)); seq_printf(m, "Display IMR:\t%08x\n", I915_READ(VLV_IMR)); - for_each_pipe(dev_priv, pipe) + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); + intel_display_power_put(dev_priv, power_domain); + } seq_printf(m, "Master IER:\t%08x\n", I915_READ(VLV_MASTER_IER)); From d158694f452252d0fef335a775aeb3eb74fe7af0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 8 Feb 2017 19:52:54 +0200 Subject: [PATCH 0141/1299] drm/i915: Avoid spurious WARNs about the wrong pipe in the PPS code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until recently vlv_steal_power_sequencer() wasn't being called for normal DP ports, and hence it could assert that it should only be called for pipe A and B (since pipe C doesn't support eDP). However that changed when we started to consider normal DP ports as well when choosing a PPS. So we will now get spurious warnings when vlv_steal_power_sequencer() does get called for pipe C. Avoid this by moving the WARN down into vlv_detach_power_sequencer() where this assertion should still hold. Cc: Imre Deak Cc: stable@vger.kernel.org Fixes: 9f2bdb006a7e ("drm/i915: Prevent PPS stealing from a normal DP port on VLV/CHV") References: https://bugs.freedesktop.org/show_bug.cgi?id=95287 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170208175254.10958-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fa77e96c49c7..6ef4f3810082 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2905,6 +2905,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) WARN_ON(intel_dp->active_pipe != INVALID_PIPE); + if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) + return; + edp_panel_vdd_off_sync(intel_dp); /* @@ -2932,9 +2935,6 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, lockdep_assert_held(&dev_priv->pps_mutex); - if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) - return; - for_each_intel_encoder(dev, encoder) { struct intel_dp *intel_dp; enum port port; From 6401c37dbd6264b045961868c1438bf03efdd104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 8 Feb 2017 19:53:28 +0200 Subject: [PATCH 0142/1299] drm/i915: Simplify platform checks in intel_fb_pitch_limit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the VLV/CHV check with a HAS_GMCH_DISPLAY check in intel_fb_pitch_limit(), because it's shorter. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170208175328.11064-1-ville.syrjala@linux.intel.com Reviewed-by: Ander Conselvan de Oliveira --- drivers/gpu/drm/i915/intel_display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ab78df9191f9..88b7d96c46a4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14346,8 +14346,7 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, * pixels and 32K bytes." */ return min(8192 * cpp, 32768); - } else if (gen >= 5 && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv)) { + } else if (gen >= 5 && !HAS_GMCH_DISPLAY(dev_priv)) { return 32*1024; } else if (gen >= 4) { if (fb_modifier == I915_FORMAT_MOD_X_TILED) From 317eaa95081bfa081a5bf147e175b4e007e5a105 Mon Sep 17 00:00:00 2001 From: Lyude Date: Fri, 3 Feb 2017 21:18:25 -0500 Subject: [PATCH 0143/1299] drm/i915/debugfs: Add i915_hpd_storm_ctl This adds a file in i915's debugfs directory that allows userspace to manually control HPD storm detection. This is mainly for hotplugging tests, where we might want to test HPD storm functionality or disable storm detection to speed up hotplugging tests without breaking anything. Changes since v1: - Make HPD storm interval configurable - Misc code cleanup Signed-off-by: Lyude Acked-by: Jani Nikula Cc: Tomeu Vizoso --- drivers/gpu/drm/i915/i915_debugfs.c | 78 +++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_irq.c | 2 + drivers/gpu/drm/i915/intel_hotplug.c | 15 ++++-- 5 files changed, 94 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 641527701123..cd023fda1a3b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4653,6 +4653,81 @@ static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor) return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops); } +static int i915_hpd_storm_ctl_show(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + + seq_printf(m, "Threshold: %d\n", hotplug->hpd_storm_threshold); + seq_printf(m, "Detected: %s\n", + yesno(delayed_work_pending(&hotplug->reenable_work))); + + return 0; +} + +static ssize_t i915_hpd_storm_ctl_write(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) +{ + struct seq_file *m = file->private_data; + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + unsigned int new_threshold; + int i; + char *newline; + char tmp[16]; + + if (len >= sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(tmp, ubuf, len)) + return -EFAULT; + + tmp[len] = '\0'; + + /* Strip newline, if any */ + newline = strchr(tmp, '\n'); + if (newline) + *newline = '\0'; + + if (strcmp(tmp, "reset") == 0) + new_threshold = HPD_STORM_DEFAULT_THRESHOLD; + else if (kstrtouint(tmp, 10, &new_threshold) != 0) + return -EINVAL; + + if (new_threshold > 0) + DRM_DEBUG_KMS("Setting HPD storm detection threshold to %d\n", + new_threshold); + else + DRM_DEBUG_KMS("Disabling HPD storm detection\n"); + + spin_lock_irq(&dev_priv->irq_lock); + hotplug->hpd_storm_threshold = new_threshold; + /* Reset the HPD storm stats so we don't accidentally trigger a storm */ + for_each_hpd_pin(i) + hotplug->stats[i].count = 0; + spin_unlock_irq(&dev_priv->irq_lock); + + /* Re-enable hpd immediately if we were in an irq storm */ + flush_delayed_work(&dev_priv->hotplug.reenable_work); + + return len; +} + +static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file) +{ + return single_open(file, i915_hpd_storm_ctl_show, inode->i_private); +} + +static const struct file_operations i915_hpd_storm_ctl_fops = { + .owner = THIS_MODULE, + .open = i915_hpd_storm_ctl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_hpd_storm_ctl_write +}; + static int i915_debugfs_create(struct dentry *root, struct drm_minor *minor, const char *name, @@ -4746,7 +4821,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, - {"i915_guc_log_control", &i915_guc_log_control_fops} + {"i915_guc_log_control", &i915_guc_log_control_fops}, + {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a17dde86dc8c..2394de1b19df 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -827,6 +827,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, spin_lock_init(&dev_priv->gpu_error.lock); mutex_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); + spin_lock_init(&dev_priv->mm.object_stat_lock); spin_lock_init(&dev_priv->mmio_flip_lock); spin_lock_init(&dev_priv->wm.dsparb_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 03573f084749..f7ff2df0c0c5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -384,6 +384,8 @@ enum hpd_pin { #define for_each_hpd_pin(__pin) \ for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++) +#define HPD_STORM_DEFAULT_THRESHOLD 5 + struct i915_hotplug { struct work_struct hotplug_work; @@ -407,6 +409,8 @@ struct i915_hotplug { struct work_struct poll_init_work; bool poll_enabled; + unsigned int hpd_storm_threshold; + /* * if we get a HPD irq from DP and a HPD irq from non-DP * the non-DP HPD could block the workqueue on a mode config diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9eb4c9c8a10b..27c13193f7a2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4277,6 +4277,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (!IS_GEN2(dev_priv)) dev->vblank_disable_immediate = true; + dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD; + dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b62e3f8ad415..6a9c16508ab5 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -100,7 +100,6 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) } #define HPD_STORM_DETECT_PERIOD 1000 -#define HPD_STORM_THRESHOLD 5 #define HPD_STORM_REENABLE_DELAY (2 * 60 * 1000) /** @@ -112,9 +111,13 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) * storms. Only the pin specific stats and state are changed, the caller is * responsible for further action. * - * @HPD_STORM_THRESHOLD irqs are allowed within @HPD_STORM_DETECT_PERIOD ms, - * otherwise it's considered an irq storm, and the irq state is set to - * @HPD_MARK_DISABLED. + * The number of irqs that are allowed within @HPD_STORM_DETECT_PERIOD is + * stored in @dev_priv->hotplug.hpd_storm_threshold which defaults to + * @HPD_STORM_DEFAULT_THRESHOLD. If this threshold is exceeded, it's + * considered an irq storm and the irq state is set to @HPD_MARK_DISABLED. + * + * The HPD threshold can be controlled through i915_hpd_storm_ctl in debugfs, + * and should only be adjusted for automated hotplug testing. * * Return true if an irq storm was detected on @pin. */ @@ -123,13 +126,15 @@ static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv, { unsigned long start = dev_priv->hotplug.stats[pin].last_jiffies; unsigned long end = start + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD); + const int threshold = dev_priv->hotplug.hpd_storm_threshold; bool storm = false; if (!time_in_range(jiffies, start, end)) { dev_priv->hotplug.stats[pin].last_jiffies = jiffies; dev_priv->hotplug.stats[pin].count = 0; DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", pin); - } else if (dev_priv->hotplug.stats[pin].count > HPD_STORM_THRESHOLD) { + } else if (dev_priv->hotplug.stats[pin].count > threshold && + threshold) { dev_priv->hotplug.stats[pin].state = HPD_MARK_DISABLED; DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", pin); storm = true; From cad3688ff00656face6c78a1028fd02288b4a960 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 16:35:21 +0000 Subject: [PATCH 0144/1299] drm/i915: Split device release from unload We may need to keep our memory management alive after we have unloaded the physical pci device. For example, if we have exported an object via dmabuf, that will keep the device around but the pci device may be removed before the dmabuf itself is released, use of the pci hardware will be revoked, but the memory and object management needs to persist for the dmabuf. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210163523.17533-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2394de1b19df..221f1c56fd9c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1231,8 +1231,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = drm_dev_init(&dev_priv->drm, &driver, &pdev->dev); if (ret) { DRM_DEV_ERROR(&pdev->dev, "allocation failed\n"); - kfree(dev_priv); - return ret; + goto out_free; } dev_priv->drm.pdev = pdev; @@ -1240,7 +1239,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_enable_device(pdev); if (ret) - goto out_free_priv; + goto out_fini; pci_set_drvdata(pdev, &dev_priv->drm); @@ -1304,9 +1303,11 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) i915_driver_cleanup_early(dev_priv); out_pci_disable: pci_disable_device(pdev); -out_free_priv: +out_fini: i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret); - drm_dev_unref(&dev_priv->drm); + drm_dev_fini(&dev_priv->drm); +out_free: + kfree(dev_priv); return ret; } @@ -1385,8 +1386,16 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_cleanup_mmio(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); +} + +static void i915_driver_release(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); i915_driver_cleanup_early(dev_priv); + drm_dev_fini(&dev_priv->drm); + + kfree(dev_priv); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) @@ -2630,6 +2639,7 @@ static struct drm_driver driver = { .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, + .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, .preclose = i915_driver_preclose, From 94d4a2a9aeec18d9a67f3353bfb9861402719fbd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 16:35:22 +0000 Subject: [PATCH 0145/1299] drm/i915: Unbind any residual objects/vma from the Global GTT on shutdown We may unload the PCI device before all users (such as dma-buf) are completely shutdown. This may leave VMA in the global GTT which we want to revoke, whilst keeping the objects themselves around to service the dma-buf. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210163523.17533-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3c83f2b91af0..d580e2b4081b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2818,6 +2818,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma, *vn; + + ggtt->base.closed = true; + + mutex_lock(&dev_priv->drm.struct_mutex); + WARN_ON(!list_empty(&ggtt->base.active_list)); + list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) + WARN_ON(i915_vma_unbind(vma)); + mutex_unlock(&dev_priv->drm.struct_mutex); if (dev_priv->mm.aliasing_ppgtt) { struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; From c4d4c1c66be58e1e91a2eeada02c0cbdeb5fb350 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 16:35:23 +0000 Subject: [PATCH 0146/1299] drm/i915: Flush the freed object queue on device release As dmabufs may live beyond the PCI device removal, we need to flush the freed object worker on device release, and include a warning in case there is a leak. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210163523.17533-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 306a2fb362a2..303c079b75e8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4659,7 +4659,9 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) { + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_timeline_fini(&dev_priv->gt.global_timeline); From 8c12d121590ebe5a43bf9a0aedbbeb192f257846 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 18:52:14 +0000 Subject: [PATCH 0147/1299] drm/i915: Move the irq_barrier for reset earlier into reset_prepare When updating the bookkeeping following the reset, we need the seqno to be coherent on the CPU prior to trusting its result for deciding whether any request is completed. We need the irq_barrier before we start making these decisions, i.e. in reset_prepare. References: https://bugs.freedesktop.org/show_bug.cgi?id=99733 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170210185214.23463-1-chris@chris-wilson.co.uk Reviewed-by: Michel Thierry --- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 303c079b75e8..b8d869d7937d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2652,6 +2652,9 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) tasklet_disable(&engine->irq_tasklet); tasklet_kill(&engine->irq_tasklet); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); if (request && request->fence.error == -EIO) @@ -2748,9 +2751,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - request = i915_gem_find_active_request(engine); if (request && i915_gem_reset_request(request)) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", From ae9a043b0c5b0d90dcd9fb8b3a2d27f34f0725fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Feb 2017 10:23:19 +0000 Subject: [PATCH 0148/1299] drm/i915: Rename conditional GEM execution macros After a brief discussion, we settled on a naming convention for the conditional GEM debugging data that should be clearer to the casual user: GEM_DEBUG Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170207102319.10910-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.h | 12 ++++++------ drivers/gpu/drm/i915/intel_lrc.c | 8 ++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 412fa2794cbb..5a49487368ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -29,17 +29,17 @@ #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) -#define GEM_BUG_ONLY(expr) expr -#define GEM_BUG_ONLY_DECLARE(var) var -#define GEM_BUG_ONLY_ON(expr) GEM_BUG_ON(expr) +#define GEM_DEBUG_DECL(var) var +#define GEM_DEBUG_EXEC(expr) expr +#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) #else #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) -#define GEM_BUG_ONLY(expr) do { } while (0) -#define GEM_BUG_ONLY_DECLARE(var) -#define GEM_BUG_ONLY_ON(expr) +#define GEM_DEBUG_DECL(var) +#define GEM_DEBUG_EXEC(expr) do { } while (0) +#define GEM_DEBUG_BUG_ON(expr) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 697776d427b9..af717e1356a9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -351,7 +351,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); - GEM_BUG_ONLY(port[0].context_id = upper_32_bits(desc[0])); + GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); port[0].count++; if (port[1].request) { @@ -359,7 +359,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[1].request, INTEL_CONTEXT_SCHEDULE_IN); desc[1] = execlists_update_context(port[1].request); - GEM_BUG_ONLY(port[1].context_id = upper_32_bits(desc[1])); + GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); port[1].count = 1; } else { desc[1] = 0; @@ -583,8 +583,8 @@ static void intel_lrc_irq_handler(unsigned long data) continue; /* Check the context/desc id for this event matches */ - GEM_BUG_ONLY_ON(readl(buf + 2 * idx + 1) != - port[0].context_id); + GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) != + port[0].context_id); GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8ae78b79178f..9fba5664376e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2284,7 +2284,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= bytes; GEM_BUG_ON(ring->space < 0); - GEM_BUG_ONLY(ring->advance = ring->tail + bytes); + GEM_DEBUG_EXEC(ring->advance = ring->tail + bytes); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 896838ca502c..b122c3cc6dbb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -144,7 +144,7 @@ struct intel_ring { u32 head; u32 tail; - GEM_BUG_ONLY_DECLARE(u32 advance); + GEM_DEBUG_DECL(u32 advance); int space; int size; @@ -381,7 +381,7 @@ struct intel_engine_cs { struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; - GEM_BUG_ONLY_DECLARE(u32 context_id); + GEM_DEBUG_DECL(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; struct rb_node *execlist_first; @@ -519,7 +519,7 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ONLY_ON(ring->tail != ring->advance); + GEM_DEBUG_BUG_ON(ring->tail != ring->advance); } static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) From c00122f33fe65c00103330e9f85dd2d3a63a3d2c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:19:58 +0000 Subject: [PATCH 0149/1299] drm/i915: Assert that the active request hasn't been signaled As the request is not complete, it should not be signaled. Assert that this is true before we process the request for a reset. References: https://bugs.freedesktop.org/show_bug.cgi?id=99671 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b8d869d7937d..48922ff454e6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2611,6 +2611,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) continue; GEM_BUG_ON(request->engine != engine); + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &request->fence.flags)); return request; } From 8d613c539c74fa9055f88f4116196d7c820bd98f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:19:59 +0000 Subject: [PATCH 0150/1299] drm/i915: Always call i915_gem_reset_finish() following i915_gem_reset_prepare() As i915_gem_reset_finish() undoes the steps from i915_gem_reset_prepare() to leave the system in a fully-working state, e.g. to be able to free the breadcrumb signal threads, make sure that we always call it even on the error path. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 221f1c56fd9c..ac1b3582ca55 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1873,10 +1873,10 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); i915_queue_hangcheck(dev_priv); wakeup: + i915_gem_reset_finish(dev_priv); enable_irq(dev_priv->drm.irq); wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); return; From 1d309634bcf4e3ce1c2b112bb04fcdec82ac70e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:20:00 +0000 Subject: [PATCH 0151/1299] drm/i915: Kill the tasklet then disable Disabling the tasklet leaves it if scheduled on the ready to run list until it is re-enabled. This will leave the ksoftird thread spinning until satisfied. To prevent this situation on starting the GPU reset, we want to kill the tasklet first and then disable. The same problem will arise when a tasklet is scheduled from another device, so a better solution is required for the general case. Reported-by: Tvrtko Ursulin Fixes: 1f7b847d72c3 ("drm/i915: Disable engine->irq_tasklet around resets") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 48922ff454e6..3c2c2296c1dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2651,8 +2651,8 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) * Turning off the engine->irq_tasklet until the reset is over * prevents the race. */ - tasklet_disable(&engine->irq_tasklet); tasklet_kill(&engine->irq_tasklet); + tasklet_disable(&engine->irq_tasklet); if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); From fe3288b5da2c1286a7aac1fb1b2234caa752a81b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:20:01 +0000 Subject: [PATCH 0152/1299] drm/i915: Park the breadcrumbs signaler across a GPU reset The signal threads may be running concurrently with the GPU reset. The completion from the GPU run asynchronous with the reset and two threads may see different snapshots of the state, and the signaler may mark a request as complete as we try to reset it. We don't tolerate 2 different views of the same state and complain if we try to mark a request as failed if it is already complete. Disable the signal threads during reset to prevent this conflict (even though the conflict implies that the state we resetting to is invalid, we have already made our decision!). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99733 References: https://bugs.freedesktop.org/show_bug.cgi?id=99671 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-4-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 16 +++++++++++++++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 3 +++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3c2c2296c1dc..730804ce9610 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_frontbuffer.h" #include "intel_mocs.h" #include +#include #include #include #include @@ -2643,6 +2644,17 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent the signaler thread from updating the request + * state (by calling dma_fence_signal) as we are processing + * the reset. The write from the GPU of the seqno is + * asynchronous and the signaler thread may see a different + * value to us and declare the request complete, even though + * the reset routine have picked that request as the active + * (incomplete) request. This conflict is not handled + * gracefully! + */ + kthread_park(engine->breadcrumbs.signaler); + /* Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request @@ -2796,8 +2808,10 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { tasklet_enable(&engine->irq_tasklet); + kthread_unpark(engine->breadcrumbs.signaler); + } } static void nop_submit_request(struct drm_i915_gem_request *request) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 9fd002bcebb6..f5e05343110a 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -490,6 +490,9 @@ static int intel_breadcrumbs_signaler(void *arg) break; schedule(); + + if (kthread_should_park()) + kthread_parkme(); } } while (1); __set_current_state(TASK_RUNNING); From 2ae557388d9feb7049ee311ab1b36bb4c6f6feb6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:20:02 +0000 Subject: [PATCH 0153/1299] drm/i915: Clear the last_retired_context following a hang/reset Following a hang and reset, we know that the engine is idle and all context state has been saved or lost. Consequently, we know that the engine is no longer referencing the last context and we can relinquish our tracking. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-5-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 730804ce9610..638b335e72b6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2788,8 +2788,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) i915_gem_retire_requests(dev_priv); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct i915_gem_context *ctx; + i915_gem_reset_engine(engine); + ctx = fetch_and_zero(&engine->last_retired_context); + if (ctx) + engine->context_unpin(engine, ctx); + } i915_gem_restore_fences(dev_priv); From 7e91c7df29b5e196de3dc6f086c8937973bd0b88 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 7 Feb 2017 19:58:02 +0200 Subject: [PATCH 0154/1299] swiotlb-xen: implement xen_swiotlb_dma_mmap callback This function creates userspace mapping for the DMA-coherent memory. Signed-off-by: Stefano Stabellini Signed-off-by: Oleksandr Dmytryshyn Signed-off-by: Andrii Anisov Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/xen/mm.c | 1 + drivers/xen/swiotlb-xen.c | 19 +++++++++++++++++++ include/xen/swiotlb-xen.h | 5 +++++ 3 files changed, 25 insertions(+) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index bd62d94f8ac5..cd1684e4e864 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -198,6 +198,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .unmap_page = xen_swiotlb_unmap_page, .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, + .mmap = xen_swiotlb_dma_mmap, }; int __init xen_mm_init(void) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index f905d6eeb048..c7f61fc0572a 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -680,3 +680,22 @@ xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask) return 0; } EXPORT_SYMBOL_GPL(xen_swiotlb_set_dma_mask); + +/* + * Create userspace mapping for the DMA-coherent memory. + * This function should be called with the pages from the current domain only, + * passing pages mapped from other domains would lead to memory corruption. + */ +int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + if (__generic_dma_ops(dev)->mmap) + return __generic_dma_ops(dev)->mmap(dev, vma, cpu_addr, + dma_addr, size, attrs); +#endif + return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap); diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index a0083be5d529..a315c876aaa9 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -55,4 +55,9 @@ xen_swiotlb_dma_supported(struct device *hwdev, u64 mask); extern int xen_swiotlb_set_dma_mask(struct device *dev, u64 dma_mask); + +extern int +xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); #endif /* __LINUX_SWIOTLB_XEN_H */ From 69369f52d28a34c84acb6f2a8a585e743441566a Mon Sep 17 00:00:00 2001 From: Andrii Anisov Date: Tue, 7 Feb 2017 19:58:03 +0200 Subject: [PATCH 0155/1299] swiotlb-xen: implement xen_swiotlb_get_sgtable callback Signed-off-by: Andrii Anisov Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/arm/xen/mm.c | 1 + drivers/xen/swiotlb-xen.c | 28 ++++++++++++++++++++++++++++ include/xen/swiotlb-xen.h | 6 ++++++ 3 files changed, 35 insertions(+) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index cd1684e4e864..76ea48a614e1 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .set_dma_mask = xen_swiotlb_set_dma_mask, .mmap = xen_swiotlb_dma_mmap, + .get_sgtable = xen_swiotlb_get_sgtable, }; int __init xen_mm_init(void) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index c7f61fc0572a..23e30b4e1fb6 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -699,3 +699,31 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); } EXPORT_SYMBOL_GPL(xen_swiotlb_dma_mmap); + +/* + * This function should be called with the pages from the current domain only, + * passing pages mapped from other domains would lead to memory corruption. + */ +int +xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs) +{ +#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) + if (__generic_dma_ops(dev)->get_sgtable) { +#if 0 + /* + * This check verifies that the page belongs to the current domain and + * is not one mapped from another domain. + * This check is for debug only, and should not go to production build + */ + unsigned long bfn = PHYS_PFN(dma_to_phys(dev, handle)); + BUG_ON (!page_is_ram(bfn)); +#endif + return __generic_dma_ops(dev)->get_sgtable(dev, sgt, cpu_addr, + handle, size, attrs); + } +#endif + return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size); +} +EXPORT_SYMBOL_GPL(xen_swiotlb_get_sgtable); diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h index a315c876aaa9..1f6d78f044b6 100644 --- a/include/xen/swiotlb-xen.h +++ b/include/xen/swiotlb-xen.h @@ -2,6 +2,7 @@ #define __LINUX_SWIOTLB_XEN_H #include +#include #include extern int xen_swiotlb_init(int verbose, bool early); @@ -60,4 +61,9 @@ extern int xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); + +extern int +xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + unsigned long attrs); #endif /* __LINUX_SWIOTLB_XEN_H */ From 953c7f82eb890085c60dbe22578e883d6837c674 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:12 +0000 Subject: [PATCH 0156/1299] drm/i915: Provide a hook for selftests Some pieces of code are independent of hardware but are very tricky to exercise through the normal userspace ABI or via debugfs hooks. Being able to create mock unit tests and execute them through CI is vital. Start by adding a central point where we can execute unit tests and a parameter to enable them. This is disabled by default as the expectation is that these tests will occasionally explode. To facilitate integration with igt, any parameter beginning with i915.igt__ is interpreted as a subtest executable independently via igt/drv_selftest. Two classes of selftests are recognised: mock unit tests and integration tests. Mock unit tests are run as soon as the module is loaded, before the device is probed. At that point there is no driver instantiated and all hw interactions must be "mocked". This is very useful for writing universal tests to exercise code not typically run on a broad range of architectures. Alternatively, you can hook into the live selftests and run when the device has been instantiated - hw interactions are real. v2: Add a macro for compiling conditional code for mock objects inside real objects. v3: Differentiate between mock unit tests and late integration test. v4: List the tests in natural order, use igt to sort after modparam. v5: s/late/live/ v6: s/unsigned long/unsigned int/ v7: Use igt_ prefixes for long helpers. v8: Deobfuscate macros overriding functions, stop using -I$(src) Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 16 ++ drivers/gpu/drm/i915/Makefile | 3 + drivers/gpu/drm/i915/i915_pci.c | 31 ++- drivers/gpu/drm/i915/i915_selftest.h | 102 +++++++ .../drm/i915/selftests/i915_live_selftests.h | 11 + .../drm/i915/selftests/i915_mock_selftests.h | 11 + drivers/gpu/drm/i915/selftests/i915_random.c | 63 +++++ drivers/gpu/drm/i915/selftests/i915_random.h | 50 ++++ .../gpu/drm/i915/selftests/i915_selftest.c | 250 ++++++++++++++++++ tools/testing/selftests/drivers/gpu/i915.sh | 1 + 10 files changed, 531 insertions(+), 7 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_selftest.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_live_selftests.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_mock_selftests.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_random.c create mode 100644 drivers/gpu/drm/i915/selftests/i915_random.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_selftest.c diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 598551dbf62c..a4d8cfd77c3c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -26,6 +26,7 @@ config DRM_I915_DEBUG select DRM_DEBUG_MM if DRM=y select DRM_DEBUG_MM_SELFTEST select DRM_I915_SW_FENCE_DEBUG_OBJECTS + select DRM_I915_SELFTEST default n help Choose this option to turn on extra driver debugging that may affect @@ -59,3 +60,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS Recommended for driver developers only. If in doubt, say "N". + +config DRM_I915_SELFTEST + bool "Enable selftests upon driver load" + depends on DRM_I915 + default n + select PRIME_NUMBERS + help + Choose this option to allow the driver to perform selftests upon + loading; also requires the i915.selftest=1 module parameter. To + exit the module after running the selftests (i.e. to prevent normal + module initialisation afterwards) use i915.selftest=-1. + + Recommended for driver developers only. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e58fc8f1b83b..d0c9577c7533 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -117,6 +117,9 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o +i915-$(CONFIG_DRM_I915_SELFTEST) += \ + selftests/i915_random.o \ + selftests/i915_selftest.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index df2051b41fa1..732101ed57fb 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -27,6 +27,7 @@ #include #include "i915_drv.h" +#include "i915_selftest.h" #define GEN_DEFAULT_PIPEOFFSETS \ .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \ @@ -473,10 +474,19 @@ static const struct pci_device_id pciidlist[] = { }; MODULE_DEVICE_TABLE(pci, pciidlist); +static void i915_pci_remove(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + + i915_driver_unload(dev); + drm_dev_unref(dev); +} + static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct intel_device_info *intel_info = (struct intel_device_info *) ent->driver_data; + int err; if (IS_ALPHA_SUPPORT(intel_info) && !i915.alpha_support) { DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n" @@ -500,15 +510,17 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; - return i915_driver_load(pdev, ent); -} + err = i915_driver_load(pdev, ent); + if (err) + return err; -static void i915_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); + err = i915_live_selftests(pdev); + if (err) { + i915_pci_remove(pdev); + return err > 0 ? -ENOTTY : err; + } - i915_driver_unload(dev); - drm_dev_unref(dev); + return 0; } static struct pci_driver i915_pci_driver = { @@ -522,6 +534,11 @@ static struct pci_driver i915_pci_driver = { static int __init i915_init(void) { bool use_kms = true; + int err; + + err = i915_mock_selftests(); + if (err) + return err > 0 ? 0 : err; /* * Enable KMS by default, unless explicitly overriden by diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h new file mode 100644 index 000000000000..8b5994caa301 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -0,0 +1,102 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __I915_SELFTEST_H__ +#define __I915_SELFTEST_H__ + +struct pci_dev; +struct drm_i915_private; + +struct i915_selftest { + unsigned long timeout_jiffies; + unsigned int timeout_ms; + unsigned int random_seed; + int mock; + int live; +}; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +extern struct i915_selftest i915_selftest; + +int i915_mock_selftests(void); +int i915_live_selftests(struct pci_dev *pdev); + +/* We extract the function declarations from i915_mock_selftests.h and + * i915_live_selftests.h Add your unit test declarations there! + * + * Mock unit tests are run very early upon module load, before the driver + * is probed. All hardware interactions, as well as other subsystems, must + * be "mocked". + * + * Live unit tests are run after the driver is loaded - all hardware + * interactions are real. + */ +#define selftest(name, func) int func(void); +#include "selftests/i915_mock_selftests.h" +#undef selftest +#define selftest(name, func) int func(struct drm_i915_private *i915); +#include "selftests/i915_live_selftests.h" +#undef selftest + +struct i915_subtest { + int (*func)(void *data); + const char *name; +}; + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data); +#define i915_subtests(T, data) \ + __i915_subtests(__func__, T, ARRAY_SIZE(T), data) + +#define SUBTEST(x) { x, #x } + +#define I915_SELFTEST_DECLARE(x) x +#define I915_SELFTEST_ONLY(x) unlikely(x) + +#else /* !IS_ENABLED(CONFIG_DRM_I915_SELFTEST) */ + +static inline int i915_mock_selftests(void) { return 0; } +static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; } + +#define I915_SELFTEST_DECLARE(x) +#define I915_SELFTEST_ONLY(x) 0 + +#endif + +/* Using the i915_selftest_ prefix becomes a little unwieldy with the helpers. + * Instead we use the igt_ shorthand, in reference to the intel-gpu-tools + * suite of uabi test cases (which includes a test runner for our selftests). + */ + +#define IGT_TIMEOUT(name__) \ + unsigned long name__ = jiffies + i915_selftest.timeout_jiffies + +__printf(2, 3) +bool __igt_timeout(unsigned long timeout, const char *fmt, ...); + +#define igt_timeout(t, fmt, ...) \ + __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + +#endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h new file mode 100644 index 000000000000..f3e17cb10e05 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -0,0 +1,11 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h new file mode 100644 index 000000000000..69e97a2ba4a6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -0,0 +1,11 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c new file mode 100644 index 000000000000..c17c83c30637 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include "i915_random.h" + +static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +{ + return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); +} + +void i915_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + unsigned int i, j; + + for (i = 0; i < count; i++) { + BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); + j = i915_prandom_u32_max_state(count, state); + swap(order[i], order[j]); + } +} + +unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) +{ + unsigned int *order, i; + + order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY); + if (!order) + return order; + + for (i = 0; i < count; i++) + order[i] = i; + + i915_random_reorder(order, count, state); + return order; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h new file mode 100644 index 000000000000..b9c334ce6cd9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_SELFTESTS_RANDOM_H__ +#define __I915_SELFTESTS_RANDOM_H__ + +#include + +#include "../i915_selftest.h" + +#define I915_RND_STATE_INITIALIZER(x) ({ \ + struct rnd_state state__; \ + prandom_seed_state(&state__, (x)); \ + state__; \ +}) + +#define I915_RND_STATE(name__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed) + +#define I915_RND_SUBSTATE(name__, parent__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) + +unsigned int *i915_random_order(unsigned int count, + struct rnd_state *state); +void i915_random_reorder(unsigned int *order, + unsigned int count, + struct rnd_state *state); + +#endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c new file mode 100644 index 000000000000..6ba3abb10c6f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -0,0 +1,250 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "../i915_drv.h" +#include "../i915_selftest.h" + +struct i915_selftest i915_selftest __read_mostly = { + .timeout_ms = 1000, +}; + +int i915_mock_sanitycheck(void) +{ + pr_info(DRIVER_NAME ": %s() - ok!\n", __func__); + return 0; +} + +int i915_live_sanitycheck(struct drm_i915_private *i915) +{ + pr_info("%s: %s() - ok!\n", i915->drm.driver->name, __func__); + return 0; +} + +enum { +#define selftest(name, func) mock_##name, +#include "i915_mock_selftests.h" +#undef selftest +}; + +enum { +#define selftest(name, func) live_##name, +#include "i915_live_selftests.h" +#undef selftest +}; + +struct selftest { + bool enabled; + const char *name; + union { + int (*mock)(void); + int (*live)(struct drm_i915_private *); + }; +}; + +#define selftest(n, f) [mock_##n] = { .name = #n, .mock = f }, +static struct selftest mock_selftests[] = { +#include "i915_mock_selftests.h" +}; +#undef selftest + +#define selftest(n, f) [live_##n] = { .name = #n, .live = f }, +static struct selftest live_selftests[] = { +#include "i915_live_selftests.h" +}; +#undef selftest + +/* Embed the line number into the parameter name so that we can order tests */ +#define selftest(n, func) selftest_0(n, func, param(n)) +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, mock_selftests[mock_##n].enabled, bool, 0400); +#include "i915_mock_selftests.h" +#undef selftest_0 +#undef param + +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __live_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, live_selftests[live_##n].enabled, bool, 0400); +#include "i915_live_selftests.h" +#undef selftest_0 +#undef param +#undef selftest + +static void set_default_test_all(struct selftest *st, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + if (st[i].enabled) + return; + + for (i = 0; i < count; i++) + st[i].enabled = true; +} + +static int __run_selftests(const char *name, + struct selftest *st, + unsigned int count, + void *data) +{ + int err = 0; + + while (!i915_selftest.random_seed) + i915_selftest.random_seed = get_random_int(); + + i915_selftest.timeout_jiffies = + i915_selftest.timeout_ms ? + msecs_to_jiffies_timeout(i915_selftest.timeout_ms) : + MAX_SCHEDULE_TIMEOUT; + + set_default_test_all(st, count); + + pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x st_timeout=%u\n", + name, i915_selftest.random_seed, i915_selftest.timeout_ms); + + /* Tests are listed in order in i915_*_selftests.h */ + for (; count--; st++) { + if (!st->enabled) + continue; + + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s\n", st->name); + if (data) + err = st->live(data); + else + err = st->mock(); + if (err == -EINTR && !signal_pending(current)) + err = 0; + if (err) + break; + } + + if (WARN(err > 0 || err == -ENOTTY, + "%s returned %d, conflicting with selftest's magic values!\n", + st->name, err)) + err = -1; + + return err; +} + +#define run_selftests(x, data) \ + __run_selftests(#x, x##_selftests, ARRAY_SIZE(x##_selftests), data) + +int i915_mock_selftests(void) +{ + int err; + + if (!i915_selftest.mock) + return 0; + + err = run_selftests(mock, NULL); + if (err) { + i915_selftest.mock = err; + return err; + } + + if (i915_selftest.mock < 0) { + i915_selftest.mock = -ENOTTY; + return 1; + } + + return 0; +} + +int i915_live_selftests(struct pci_dev *pdev) +{ + int err; + + if (!i915_selftest.live) + return 0; + + err = run_selftests(live, to_i915(pci_get_drvdata(pdev))); + if (err) { + i915_selftest.live = err; + return err; + } + + if (i915_selftest.live < 0) { + i915_selftest.live = -ENOTTY; + return 1; + } + + return 0; +} + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data) +{ + int err; + + for (; count--; st++) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); + err = st->func(data); + if (err && err != -EINTR) { + pr_err(DRIVER_NAME "/%s: %s failed with error %d\n", + caller, st->name, err); + return err; + } + } + + return 0; +} + +bool __igt_timeout(unsigned long timeout, const char *fmt, ...) +{ + va_list va; + + if (!signal_pending(current)) { + cond_resched(); + if (time_before(jiffies, timeout)) + return false; + } + + if (fmt) { + va_start(va, fmt); + vprintk(fmt, va); + va_end(va); + } + + return true; +} + +module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); +module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); + +module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); + +module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); +MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh index d407f0fa1e3a..c06d6e8a8dcc 100755 --- a/tools/testing/selftests/drivers/gpu/i915.sh +++ b/tools/testing/selftests/drivers/gpu/i915.sh @@ -7,6 +7,7 @@ if ! /sbin/modprobe -q -r i915; then fi if /sbin/modprobe -q i915 mock_selftests=-1; then + /sbin/modprobe -q -r i915 echo "drivers/gpu/i915: ok" else echo "drivers/gpu/i915: [FAIL]" From 935a2f776aa523cd6ad20c9b7c7d019ba9418759 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:13 +0000 Subject: [PATCH 0157/1299] drm/i915: Add some selftests for sg_table manipulation Start exercising the scattergather lists, especially looking at iteration after coalescing. v2: Comment on the peculiarity of table construction (i.e. why this sg_table might be interesting). v3: Added one __func__ to identify expect_pfn_sg() v4: Loop until we have crossed the chain boundary (forcing sg_table to do multiple allocations) before squelching a potential ENOMEM from oom. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 11 +- .../drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/scatterlist.c | 355 ++++++++++++++++++ 3 files changed, 364 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/scatterlist.c diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 638b335e72b6..c91e5d10cc58 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2224,17 +2224,17 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, mutex_unlock(&obj->mm.lock); } -static void i915_sg_trim(struct sg_table *orig_st) +static bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; unsigned int i; if (orig_st->nents == orig_st->orig_nents) - return; + return false; if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) - return; + return false; new_sg = new_st.sgl; for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { @@ -2247,6 +2247,7 @@ static void i915_sg_trim(struct sg_table *orig_st) sg_free_table(orig_st); *orig_st = new_st; + return true; } static struct sg_table * @@ -5019,3 +5020,7 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, sg = i915_gem_object_get_sg(obj, n, &offset); return sg_dma_address(sg) + (offset << PAGE_SHIFT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/scatterlist.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 69e97a2ba4a6..5f0bdda42ed8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -9,3 +9,4 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ +selftest(scatterlist, scatterlist_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c new file mode 100644 index 000000000000..eb2cda8e2b9f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -0,0 +1,355 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "../i915_selftest.h" + +#define PFN_BIAS (1 << 10) + +struct pfn_table { + struct sg_table st; + unsigned long start, end; +}; + +typedef unsigned int (*npages_fn_t)(unsigned long n, + unsigned long count, + struct rnd_state *rnd); + +static noinline int expect_pfn_sg(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + struct scatterlist *sg; + unsigned long pfn, n; + + pfn = pt->start; + for_each_sg(pt->st.sgl, sg, pt->st.nents, n) { + struct page *page = sg_page(sg); + unsigned int npages = npages_fn(n, pt->st.nents, rnd); + + if (page_to_pfn(page) != pfn) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (sg->length != npages * PAGE_SIZE) { + pr_err("%s: %s copied wrong sg length, expected size %lu, found %u (using for_each_sg)\n", + __func__, who, npages * PAGE_SIZE, sg->length); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn += npages; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sg_page_iter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sg_page_iter sgiter; + unsigned long pfn; + + pfn = pt->start; + for_each_sg_page(pt->st.sgl, &sgiter, pt->st.nents, 0) { + struct page *page = sg_page_iter_page(&sgiter); + + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sgtiter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sgt_iter sgt; + struct page *page; + unsigned long pfn; + + pfn = pt->start; + for_each_sgt_page(page, sgt, &pt->st) { + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sgt_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static int expect_pfn_sgtable(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + int err; + + err = expect_pfn_sg(pt, npages_fn, rnd, who, timeout); + if (err) + return err; + + err = expect_pfn_sg_page_iter(pt, who, timeout); + if (err) + return err; + + err = expect_pfn_sgtiter(pt, who, timeout); + if (err) + return err; + + return 0; +} + +static unsigned int one(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1; +} + +static unsigned int grow(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return n + 1; +} + +static unsigned int shrink(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return count - n; +} + +static unsigned int random(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1 + (prandom_u32_state(rnd) % 1024); +} + +static int alloc_table(struct pfn_table *pt, + unsigned long count, unsigned long max, + npages_fn_t npages_fn, + struct rnd_state *rnd, + int alloc_error) +{ + struct scatterlist *sg; + unsigned long n, pfn; + + if (sg_alloc_table(&pt->st, max, + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN)) + return alloc_error; + + /* count should be less than 20 to prevent overflowing sg->length */ + GEM_BUG_ON(overflows_type(count * PAGE_SIZE, sg->length)); + + /* Construct a table where each scatterlist contains different number + * of entries. The idea is to check that we can iterate the individual + * pages from inside the coalesced lists. + */ + pt->start = PFN_BIAS; + pfn = pt->start; + sg = pt->st.sgl; + for (n = 0; n < count; n++) { + unsigned long npages = npages_fn(n, count, rnd); + + /* Nobody expects the Sparse Memmap! */ + if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) { + sg_free_table(&pt->st); + return -ENOSPC; + } + + if (n) + sg = sg_next(sg); + sg_set_page(sg, pfn_to_page(pfn), npages * PAGE_SIZE, 0); + + GEM_BUG_ON(page_to_pfn(sg_page(sg)) != pfn); + GEM_BUG_ON(sg->length != npages * PAGE_SIZE); + GEM_BUG_ON(sg->offset != 0); + + pfn += npages; + } + sg_mark_end(sg); + pt->st.nents = n; + pt->end = pfn; + + return 0; +} + +static const npages_fn_t npages_funcs[] = { + one, + grow, + shrink, + random, + NULL, +}; + +static int igt_sg_alloc(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max_order = 20; /* approximating a 4GiB object */ + struct rnd_state prng; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max_order) { + unsigned long size = BIT(prime); + int offset; + + for (offset = -1; offset <= 1; offset++) { + unsigned long sz = size + offset; + const npages_fn_t *npages; + struct pfn_table pt; + int err; + + for (npages = npages_funcs; *npages; npages++) { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = alloc_table(&pt, sz, sz, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, *npages, &prng, + "sg_alloc_table", + end_time); + sg_free_table(&pt.st); + if (err) + return err; + } + } + + /* Test at least one continuation before accepting oom */ + if (size > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +static int igt_sg_trim(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max = PAGE_SIZE; /* not prime! */ + struct pfn_table pt; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max) { + const npages_fn_t *npages; + int err; + + for (npages = npages_funcs; *npages; npages++) { + struct rnd_state prng; + + prandom_seed_state(&prng, i915_selftest.random_seed); + err = alloc_table(&pt, prime, max, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + if (i915_sg_trim(&pt.st)) { + if (pt.st.orig_nents != prime || + pt.st.nents != prime) { + pr_err("i915_sg_trim failed (nents %u, orig_nents %u), expected %lu\n", + pt.st.nents, pt.st.orig_nents, prime); + err = -EINVAL; + } else { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, + *npages, &prng, + "i915_sg_trim", + end_time); + } + } + sg_free_table(&pt.st); + if (err) + return err; + } + + /* Test at least one continuation before accepting oom */ + if (prime > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +int scatterlist_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sg_alloc), + SUBTEST(igt_sg_trim), + }; + + return i915_subtests(tests, NULL); +} From f97fbf9606a1a96d61d171e1ed1606b2b812fb01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:14 +0000 Subject: [PATCH 0158/1299] drm/i915: Add unit tests for the breadcrumb rbtree, insert/remove First retroactive test, make sure that the waiters are in global seqno order after random inserts and removals. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 21 +++ drivers/gpu/drm/i915/intel_engine_cs.c | 4 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + .../drm/i915/selftests/i915_mock_selftests.h | 1 + .../drm/i915/selftests/intel_breadcrumbs.c | 172 ++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_engine.c | 55 ++++++ drivers/gpu/drm/i915/selftests/mock_engine.h | 32 ++++ 7 files changed, 287 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_engine.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_engine.h diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f5e05343110a..74cb7b91b5db 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -107,6 +107,18 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) if (b->rpm_wakelock) return; + if (I915_SELFTEST_ONLY(b->mock)) { + /* For our mock objects we want to avoid interaction + * with the real hardware (which is not set up). So + * we simply pretend we have enabled the powerwell + * and the irq, and leave it up to the mock + * implementation to call intel_engine_wakeup() + * itself when it wants to simulate a user interrupt, + */ + b->rpm_wakelock = true; + return; + } + /* Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. For completeness, * record an rpm reference for ourselves to cover the @@ -142,6 +154,11 @@ static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) if (!b->rpm_wakelock) return; + if (I915_SELFTEST_ONLY(b->mock)) { + b->rpm_wakelock = false; + return; + } + if (b->irq_enabled) { irq_disable(engine); b->irq_enabled = false; @@ -664,3 +681,7 @@ unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) return mask; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_breadcrumbs.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 69a6416d1223..538d845d7251 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -524,3 +524,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, break; } } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_engine.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b122c3cc6dbb..cc62e89010d3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_selftest.h" #define I915_CMD_HASH_ORDER 9 @@ -249,6 +250,7 @@ struct intel_engine_cs { bool irq_enabled : 1; bool rpm_wakelock : 1; + I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; /* diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 5f0bdda42ed8..80458e2a2b04 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -10,3 +10,4 @@ */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) +selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c new file mode 100644 index 000000000000..6b5acf9de65b --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_engine.h" + +static int check_rbtree(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct rb_node *rb; + int n; + + if (&b->first_wait->node != rb_first(&b->waiters)) { + pr_err("First waiter does not match first element of wait-tree\n"); + return -EINVAL; + } + + n = find_first_bit(bitmap, count); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = container_of(rb, typeof(*w), node); + int idx = w - waiters; + + if (!test_bit(idx, bitmap)) { + pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", + idx, w->seqno); + return -EINVAL; + } + + if (n != idx) { + pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", + idx, w->seqno, n); + return -EINVAL; + } + + n = find_next_bit(bitmap, count, n + 1); + } + + return 0; +} + +static int check_rbtree_empty(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (b->first_wait) { + pr_err("Empty breadcrumbs still has a waiter\n"); + return -EINVAL; + } + + if (!RB_EMPTY_ROOT(&b->waiters)) { + pr_err("Empty breadcrumbs, but wait-tree not empty\n"); + return -EINVAL; + } + + return 0; +} + +static int igt_random_insert_remove(void *arg) +{ + const u32 seqno_bias = 0x1000; + I915_RND_STATE(prng); + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned int *order; + unsigned long *bitmap; + int err = -ENOMEM; + int n; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + order = i915_random_order(count, &prng); + if (!order) + goto out_bitmap; + + for (n = 0; n < count; n++) + intel_wait_init(&waiters[n], seqno_bias + n); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + + /* Add and remove waiters into the rbtree in random order. At each + * step, we verify that the rbtree is correctly ordered. + */ + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_add_wait(engine, &waiters[i]); + __set_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_remove_wait(engine, &waiters[i]); + __clear_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + err = check_rbtree_empty(engine); +out_order: + kfree(order); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +int intel_breadcrumbs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_random_insert_remove), + }; + struct intel_engine_cs *engine; + int err; + + engine = mock_engine("mock"); + if (!engine) + return -ENOMEM; + + err = i915_subtests(tests, engine); + kfree(engine); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c new file mode 100644 index 000000000000..4a090bbe807b --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -0,0 +1,55 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" + +struct intel_engine_cs *mock_engine(const char *name) +{ + struct intel_engine_cs *engine; + static int id; + + engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); + if (!engine) + return NULL; + + /* minimal engine setup for seqno */ + engine->name = name; + engine->id = id++; + engine->status_page.page_addr = (void *)(engine + 1); + + /* minimal breadcrumbs init */ + spin_lock_init(&engine->breadcrumbs.lock); + engine->breadcrumbs.mock = true; + + return engine; +} + +void mock_engine_flush(struct intel_engine_cs *engine) +{ +} + +void mock_engine_reset(struct intel_engine_cs *engine) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h new file mode 100644 index 000000000000..0ae9a94aaa1e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -0,0 +1,32 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_ENGINE_H__ +#define __MOCK_ENGINE_H__ + +struct intel_engine_cs *mock_engine(const char *name); +void mock_engine_flush(struct intel_engine_cs *engine); +void mock_engine_reset(struct intel_engine_cs *engine); + +#endif /* !__MOCK_ENGINE_H__ */ From ae1f8090b1a7f163a16d72ddc4214a327ce989e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:15 +0000 Subject: [PATCH 0159/1299] drm/i915: Add unit tests for the breadcrumb rbtree, completion Second retroactive test, make sure that the waiters are removed from the global wait-tree when their seqno completes. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-4-chris@chris-wilson.co.uk --- .../drm/i915/selftests/intel_breadcrumbs.c | 107 ++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_engine.h | 6 + 2 files changed, 113 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 6b5acf9de65b..32a27e56c353 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -64,6 +64,27 @@ static int check_rbtree(struct intel_engine_cs *engine, return 0; } +static int check_completion(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + int n; + + for (n = 0; n < count; n++) { + if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) + continue; + + pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", + n, waiters[n].seqno, + intel_wait_complete(&waiters[n]) ? "complete" : "active", + test_bit(n, bitmap) ? "active" : "complete"); + return -EINVAL; + } + + return 0; +} + static int check_rbtree_empty(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -153,10 +174,96 @@ static int igt_random_insert_remove(void *arg) return err; } +static int igt_insert_complete(void *arg) +{ + const u32 seqno_bias = 0x1000; + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned long *bitmap; + int err = -ENOMEM; + int n, m; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + for (n = 0; n < count; n++) { + intel_wait_init(&waiters[n], n + seqno_bias); + intel_engine_add_wait(engine, &waiters[n]); + __set_bit(n, bitmap); + } + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_bitmap; + + /* On each step, we advance the seqno so that several waiters are then + * complete (we increase the seqno by increasingly larger values to + * retire more and more waiters at once). All retired waiters should + * be woken and removed from the rbtree, and so that we check. + */ + for (n = 0; n < count; n = m) { + int seqno = 2 * n; + + GEM_BUG_ON(find_first_bit(bitmap, count) != n); + + if (intel_wait_complete(&waiters[n])) { + pr_err("waiter[%d, seqno=%d] completed too early\n", + n, waiters[n].seqno); + err = -EINVAL; + goto out_bitmap; + } + + /* complete the following waiters */ + mock_seqno_advance(engine, seqno + seqno_bias); + for (m = n; m <= seqno; m++) { + if (m == count) + break; + + GEM_BUG_ON(!test_bit(m, bitmap)); + __clear_bit(m, bitmap); + } + + intel_engine_remove_wait(engine, &waiters[n]); + RB_CLEAR_NODE(&waiters[n].node); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) { + pr_err("rbtree corrupt after seqno advance to %d\n", + seqno + seqno_bias); + goto out_bitmap; + } + + err = check_completion(engine, bitmap, waiters, count); + if (err) { + pr_err("completions after seqno advance to %d failed\n", + seqno + seqno_bias); + goto out_bitmap; + } + } + + err = check_rbtree_empty(engine); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + int intel_breadcrumbs_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_random_insert_remove), + SUBTEST(igt_insert_complete), }; struct intel_engine_cs *engine; int err; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 0ae9a94aaa1e..9cfe9671f860 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -29,4 +29,10 @@ struct intel_engine_cs *mock_engine(const char *name); void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); +static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + intel_engine_wakeup(engine); +} + #endif /* !__MOCK_ENGINE_H__ */ From e62e8ad1ba5701e0e0c1371041cbb677f75ba3fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:16 +0000 Subject: [PATCH 0160/1299] drm/i915: Add unit tests for the breadcrumb rbtree, wakeups Third retroactive test, make sure that the seqno waiters are woken. v2: Smattering of comments, rearrange code v3: Fix IDLE assert to avoid startup/sleep races Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-5-chris@chris-wilson.co.uk --- .../drm/i915/selftests/intel_breadcrumbs.c | 201 ++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 32a27e56c353..d1b99b565500 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -259,11 +259,212 @@ static int igt_insert_complete(void *arg) return err; } +struct igt_wakeup { + struct task_struct *tsk; + atomic_t *ready, *set, *done; + struct intel_engine_cs *engine; + unsigned long flags; +#define STOP 0 +#define IDLE 1 + wait_queue_head_t *wq; + u32 seqno; +}; + +static int wait_atomic(atomic_t *p) +{ + schedule(); + return 0; +} + +static int wait_atomic_timeout(atomic_t *p) +{ + return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; +} + +static bool wait_for_ready(struct igt_wakeup *w) +{ + DEFINE_WAIT(ready); + + set_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->done)) + wake_up_atomic_t(w->done); + + if (test_bit(STOP, &w->flags)) + goto out; + + for (;;) { + prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); + if (atomic_read(w->ready) == 0) + break; + + schedule(); + } + finish_wait(w->wq, &ready); + +out: + clear_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->set)) + wake_up_atomic_t(w->set); + + return !test_bit(STOP, &w->flags); +} + +static int igt_wakeup_thread(void *arg) +{ + struct igt_wakeup *w = arg; + struct intel_wait wait; + + while (wait_for_ready(w)) { + GEM_BUG_ON(kthread_should_stop()); + + intel_wait_init(&wait, w->seqno); + intel_engine_add_wait(w->engine, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (i915_seqno_passed(intel_engine_get_seqno(w->engine), + w->seqno)) + break; + + if (test_bit(STOP, &w->flags)) /* emergency escape */ + break; + + schedule(); + } + intel_engine_remove_wait(w->engine, &wait); + __set_current_state(TASK_RUNNING); + } + + return 0; +} + +static void igt_wake_all_sync(atomic_t *ready, + atomic_t *set, + atomic_t *done, + wait_queue_head_t *wq, + int count) +{ + atomic_set(set, count); + atomic_set(ready, 0); + wake_up_all(wq); + + wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + atomic_set(ready, count); + atomic_set(done, count); +} + +static int igt_wakeup(void *arg) +{ + I915_RND_STATE(prng); + const int state = TASK_UNINTERRUPTIBLE; + struct intel_engine_cs *engine = arg; + struct igt_wakeup *waiters; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + const int count = 4096; + const u32 max_seqno = count / 4; + atomic_t ready, set, done; + int err = -ENOMEM; + int n, step; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + /* Create a large number of threads, each waiting on a random seqno. + * Multiple waiters will be waiting for the same seqno. + */ + atomic_set(&ready, count); + for (n = 0; n < count; n++) { + waiters[n].wq = &wq; + waiters[n].ready = &ready; + waiters[n].set = &set; + waiters[n].done = &done; + waiters[n].engine = engine; + waiters[n].flags = BIT(IDLE); + + waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], + "i915/igt:%d", n); + if (IS_ERR(waiters[n].tsk)) + goto out_waiters; + + get_task_struct(waiters[n].tsk); + } + + for (step = 1; step <= max_seqno; step <<= 1) { + u32 seqno; + + /* The waiter threads start paused as we assign them a random + * seqno and reset the engine. Once the engine is reset, + * we signal that the threads may begin their wait upon their + * seqno. + */ + for (n = 0; n < count; n++) { + GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); + waiters[n].seqno = + 1 + prandom_u32_state(&prng) % max_seqno; + } + mock_seqno_advance(engine, 0); + igt_wake_all_sync(&ready, &set, &done, &wq, count); + + /* Simulate the GPU doing chunks of work, with one or more + * seqno appearing to finish at the same time. A random number + * of threads will be waiting upon the update and hopefully be + * woken. + */ + for (seqno = 1; seqno <= max_seqno + step; seqno += step) { + usleep_range(50, 500); + mock_seqno_advance(engine, seqno); + } + GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); + + /* With the seqno now beyond any of the waiting threads, they + * should all be woken, see that they are complete and signal + * that they are ready for the next test. We wait until all + * threads are complete and waiting for us (i.e. not a seqno). + */ + err = wait_on_atomic_t(&done, wait_atomic_timeout, state); + if (err) { + pr_err("Timed out waiting for %d remaining waiters\n", + atomic_read(&done)); + break; + } + + err = check_rbtree_empty(engine); + if (err) + break; + } + +out_waiters: + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + set_bit(STOP, &waiters[n].flags); + } + mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ + igt_wake_all_sync(&ready, &set, &done, &wq, n); + + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + kthread_stop(waiters[n].tsk); + put_task_struct(waiters[n].tsk); + } + + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + int intel_breadcrumbs_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_random_insert_remove), SUBTEST(igt_insert_complete), + SUBTEST(igt_wakeup), }; struct intel_engine_cs *engine; int err; From 66d9cb5d805af70229ffe6f961bf06adc511f469 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:17 +0000 Subject: [PATCH 0161/1299] drm/i915: Mock the GEM device for self-testing A simulacrum of drm_i915_private to let us pretend interactions with the device. v2: Tidy init error paths Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 4 + drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/selftests/mock_drm.c | 54 +++++++++ drivers/gpu/drm/i915/selftests/mock_drm.h | 31 ++++++ .../gpu/drm/i915/selftests/mock_gem_device.c | 104 ++++++++++++++++++ .../gpu/drm/i915/selftests/mock_gem_device.h | 8 ++ .../gpu/drm/i915/selftests/mock_gem_object.h | 8 ++ 7 files changed, 210 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/mock_drm.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_drm.h create mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_device.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_device.h create mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_object.h diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ac1b3582ca55..efb1f64f0e41 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2668,3 +2668,7 @@ static struct drm_driver driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, }; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_drm.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c91e5d10cc58..db240a8f6b82 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5023,4 +5023,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" +#include "selftests/mock_gem_device.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c new file mode 100644 index 000000000000..113dec05c7dc --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_drm.h" + +static inline struct inode fake_inode(struct drm_i915_private *i915) +{ + return (struct inode){ .i_rdev = i915->drm.primary->index }; +} + +struct drm_file *mock_file(struct drm_i915_private *i915) +{ + struct inode inode = fake_inode(i915); + struct file filp = {}; + struct drm_file *file; + int err; + + err = drm_open(&inode, &filp); + if (unlikely(err)) + return ERR_PTR(err); + + file = filp.private_data; + file->authenticated = true; + return file; +} + +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file) +{ + struct inode inode = fake_inode(i915); + struct file filp = { .private_data = file }; + + drm_release(&inode, &filp); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.h b/drivers/gpu/drm/i915/selftests/mock_drm.h new file mode 100644 index 000000000000..b39beee9f8f6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DRM_H +#define __MOCK_DRM_H + +struct drm_file *mock_file(struct drm_i915_private *i915); +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file); + +#endif /* !__MOCK_DRM_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c new file mode 100644 index 000000000000..15d0a7ccc9d1 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "mock_gem_device.h" +#include "mock_gem_object.h" + +static void mock_device_release(struct drm_device *dev) +{ + struct drm_i915_private *i915 = to_i915(dev); + + i915_gem_drain_freed_objects(i915); + + kmem_cache_destroy(i915->objects); + + drm_dev_fini(&i915->drm); + put_device(&i915->drm.pdev->dev); +} + +static struct drm_driver mock_driver = { + .name = "mock", + .driver_features = DRIVER_GEM, + .release = mock_device_release, + + .gem_close_object = i915_gem_close_object, + .gem_free_object_unlocked = i915_gem_free_object, +}; + +static void release_dev(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + kfree(pdev); +} + +struct drm_i915_private *mock_gem_device(void) +{ + struct drm_i915_private *i915; + struct pci_dev *pdev; + int err; + + pdev = kzalloc(sizeof(*pdev) + sizeof(*i915), GFP_KERNEL); + if (!pdev) + goto err; + + device_initialize(&pdev->dev); + pdev->dev.release = release_dev; + dev_set_name(&pdev->dev, "mock"); + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + i915 = (struct drm_i915_private *)(pdev + 1); + pci_set_drvdata(pdev, i915); + + err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); + if (err) { + pr_err("Failed to initialise mock GEM device: err=%d\n", err); + goto put_device; + } + i915->drm.pdev = pdev; + i915->drm.dev_private = i915; + + mkwrite_device_info(i915)->gen = -1; + + spin_lock_init(&i915->mm.object_stat_lock); + + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + init_llist_head(&i915->mm.free_list); + + i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); + if (!i915->objects) + goto put_device; + + return i915; + +put_device: + put_device(&pdev->dev); +err: + return NULL; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h new file mode 100644 index 000000000000..c557e33c3953 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -0,0 +1,8 @@ +#ifndef __MOCK_GEM_DEVICE_H__ +#define __MOCK_GEM_DEVICE_H__ + +struct drm_i915_private; + +struct drm_i915_private *mock_gem_device(void); + +#endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h new file mode 100644 index 000000000000..9fbf67321662 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_object.h @@ -0,0 +1,8 @@ +#ifndef __MOCK_GEM_OBJECT_H__ +#define __MOCK_GEM_OBJECT_H__ + +struct mock_object { + struct drm_i915_gem_object base; +}; + +#endif /* !__MOCK_GEM_OBJECT_H__ */ From 3b5bb0a37665ce1efaf3b9a551c945c09a726504 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:18 +0000 Subject: [PATCH 0162/1299] drm/i915: Mock a GGTT for self-testing A very simple mockery, just a random manager and timeline. Useful for inserting objects and ordering retirement; and not much else. v2: mock_fini_ggtt() to complement mock_init_ggtt(). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 + .../gpu/drm/i915/selftests/mock_gem_device.c | 31 ++++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 138 ++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_gtt.h | 35 +++++ 4 files changed, 208 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/mock_gtt.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_gtt.h diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d580e2b4081b..520bcf63d271 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3757,3 +3757,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, size, alignment, color, start, end, DRM_MM_INSERT_EVICT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 15d0a7ccc9d1..dbd32b125d15 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -26,6 +26,7 @@ #include "mock_gem_device.h" #include "mock_gem_object.h" +#include "mock_gtt.h" static void mock_device_release(struct drm_device *dev) { @@ -33,6 +34,12 @@ static void mock_device_release(struct drm_device *dev) i915_gem_drain_freed_objects(i915); + mutex_lock(&i915->drm.struct_mutex); + mock_fini_ggtt(i915); + i915_gem_timeline_fini(&i915->gt.global_timeline); + mutex_unlock(&i915->drm.struct_mutex); + + kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); drm_dev_fini(&i915->drm); @@ -84,19 +91,43 @@ struct drm_i915_private *mock_gem_device(void) i915->drm.pdev = pdev; i915->drm.dev_private = i915; + /* Using the global GTT may ask questions about KMS users, so prepare */ + drm_mode_config_init(&i915->drm); + mkwrite_device_info(i915)->gen = -1; spin_lock_init(&i915->mm.object_stat_lock); INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); init_llist_head(&i915->mm.free_list); + INIT_LIST_HEAD(&i915->mm.unbound_list); + INIT_LIST_HEAD(&i915->mm.bound_list); i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); if (!i915->objects) goto put_device; + i915->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!i915->vmas) + goto err_objects; + + mutex_lock(&i915->drm.struct_mutex); + INIT_LIST_HEAD(&i915->gt.timelines); + err = i915_gem_timeline_init__global(i915); + if (err) { + mutex_unlock(&i915->drm.struct_mutex); + goto err_vmas; + } + + mock_init_ggtt(i915); + mutex_unlock(&i915->drm.struct_mutex); + return i915; +err_vmas: + kmem_cache_destroy(i915->vmas); +err_objects: + kmem_cache_destroy(i915->objects); put_device: put_device(&pdev->dev); err: diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c new file mode 100644 index 000000000000..a61309c7cb3e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_gtt.h" + +static void mock_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ +} + +static void mock_insert_entries(struct i915_address_space *vm, + struct sg_table *st, + u64 start, + enum i915_cache_level level, u32 flags) +{ +} + +static int mock_bind_ppgtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); + vma->pages = vma->obj->mm.pages; + vma->flags |= I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ppgtt(struct i915_vma *vma) +{ +} + +static void mock_cleanup(struct i915_address_space *vm) +{ +} + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name) +{ + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return NULL; + + kref_init(&ppgtt->ref); + ppgtt->base.i915 = i915; + ppgtt->base.total = round_down(U64_MAX, PAGE_SIZE); + ppgtt->base.file = ERR_PTR(-ENODEV); + + INIT_LIST_HEAD(&ppgtt->base.active_list); + INIT_LIST_HEAD(&ppgtt->base.inactive_list); + INIT_LIST_HEAD(&ppgtt->base.unbound_list); + + INIT_LIST_HEAD(&ppgtt->base.global_link); + drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); + i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); + + ppgtt->base.clear_range = nop_clear_range; + ppgtt->base.insert_page = mock_insert_page; + ppgtt->base.insert_entries = mock_insert_entries; + ppgtt->base.bind_vma = mock_bind_ppgtt; + ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.cleanup = mock_cleanup; + + return ppgtt; +} + +static int mock_bind_ggtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + int err; + + err = i915_get_ggtt_vma_pages(vma); + if (err) + return err; + + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ggtt(struct i915_vma *vma) +{ +} + +void mock_init_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + INIT_LIST_HEAD(&i915->vm_list); + + ggtt->base.i915 = i915; + + ggtt->mappable_base = 0; + ggtt->mappable_end = 2048 * PAGE_SIZE; + ggtt->base.total = 4096 * PAGE_SIZE; + + ggtt->base.clear_range = nop_clear_range; + ggtt->base.insert_page = mock_insert_page; + ggtt->base.insert_entries = mock_insert_entries; + ggtt->base.bind_vma = mock_bind_ggtt; + ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.cleanup = mock_cleanup; + + i915_address_space_init(&ggtt->base, i915, "global"); +} + +void mock_fini_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + i915_address_space_fini(&ggtt->base); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h new file mode 100644 index 000000000000..9a0a833bb545 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_GTT_H +#define __MOCK_GTT_H + +void mock_init_ggtt(struct drm_i915_private *i915); +void mock_fini_ggtt(struct drm_i915_private *i915); + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name); + +#endif /* !__MOCK_GTT_H */ From 0daf0113cff6884ed947ffe0870a926e73d52f79 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:19 +0000 Subject: [PATCH 0163/1299] drm/i915: Mock infrastructure for request emission Create a fake engine that runs requests using a timer to simulate hw. v2: Prevent leaks of ctx->name along error paths Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 4 + .../drm/i915/selftests/intel_breadcrumbs.c | 11 +- drivers/gpu/drm/i915/selftests/mock_context.c | 78 ++++++++ drivers/gpu/drm/i915/selftests/mock_context.h | 34 ++++ drivers/gpu/drm/i915/selftests/mock_engine.c | 172 +++++++++++++++++- drivers/gpu/drm/i915/selftests/mock_engine.h | 18 +- .../gpu/drm/i915/selftests/mock_gem_device.c | 95 +++++++++- .../gpu/drm/i915/selftests/mock_gem_device.h | 1 + drivers/gpu/drm/i915/selftests/mock_request.c | 44 +++++ drivers/gpu/drm/i915/selftests/mock_request.h | 44 +++++ 10 files changed, 483 insertions(+), 18 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/mock_context.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_context.h create mode 100644 drivers/gpu/drm/i915/selftests/mock_request.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_request.h diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index fb11b674f319..d27c4050b4c5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1199,3 +1199,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_context.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index d1b99b565500..6426acc9fdca 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -25,6 +25,7 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "mock_gem_device.h" #include "mock_engine.h" static int check_rbtree(struct intel_engine_cs *engine, @@ -466,15 +467,15 @@ int intel_breadcrumbs_mock_selftests(void) SUBTEST(igt_insert_complete), SUBTEST(igt_wakeup), }; - struct intel_engine_cs *engine; + struct drm_i915_private *i915; int err; - engine = mock_engine("mock"); - if (!engine) + i915 = mock_gem_device(); + if (!i915) return -ENOMEM; - err = i915_subtests(tests, engine); - kfree(engine); + err = i915_subtests(tests, i915->engine[RCS]); + drm_dev_unref(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c new file mode 100644 index 000000000000..8d3a90c3f8ac --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_context.h" +#include "mock_gtt.h" + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name) +{ + struct i915_gem_context *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + kref_init(&ctx->ref); + INIT_LIST_HEAD(&ctx->link); + ctx->i915 = i915; + + ret = ida_simple_get(&i915->context_hw_ida, + 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + if (ret < 0) + goto err_free; + ctx->hw_id = ret; + + if (name) { + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) + goto err_put; + + ctx->ppgtt = mock_ppgtt(i915, name); + if (!ctx->ppgtt) + goto err_put; + } + + return ctx; + +err_free: + kfree(ctx); + return NULL; + +err_put: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return NULL; +} + +void mock_context_close(struct i915_gem_context *ctx) +{ + i915_gem_context_set_closed(ctx); + + i915_ppgtt_close(&ctx->ppgtt->base); + + i915_gem_context_put(ctx); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h new file mode 100644 index 000000000000..2427e5c0916a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_CONTEXT_H +#define __MOCK_CONTEXT_H + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name); + +void mock_context_close(struct i915_gem_context *ctx); + +#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 4a090bbe807b..8d5ba037064c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -23,33 +23,185 @@ */ #include "mock_engine.h" +#include "mock_request.h" -struct intel_engine_cs *mock_engine(const char *name) +static struct mock_request *first_request(struct mock_engine *engine) { - struct intel_engine_cs *engine; + return list_first_entry_or_null(&engine->hw_queue, + struct mock_request, + link); +} + +static void hw_delay_complete(unsigned long data) +{ + struct mock_engine *engine = (typeof(engine))data; + struct mock_request *request; + + spin_lock(&engine->hw_lock); + + request = first_request(engine); + if (request) { + list_del_init(&request->link); + mock_seqno_advance(&engine->base, request->base.global_seqno); + } + + request = first_request(engine); + if (request) + mod_timer(&engine->hw_delay, jiffies + request->delay); + + spin_unlock(&engine->hw_lock); +} + +static int mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_get(ctx); + return 0; +} + +static void mock_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_put(ctx); +} + +static int mock_request_alloc(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + + INIT_LIST_HEAD(&mock->link); + mock->delay = 0; + + request->ring = request->engine->buffer; + return 0; +} + +static int mock_emit_flush(struct drm_i915_gem_request *request, + unsigned int flags) +{ + return 0; +} + +static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, + u32 *flags) +{ +} + +static void mock_submit_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + + i915_gem_request_submit(request); + GEM_BUG_ON(!request->global_seqno); + + spin_lock_irq(&engine->hw_lock); + list_add_tail(&mock->link, &engine->hw_queue); + if (mock->link.prev == &engine->hw_queue) + mod_timer(&engine->hw_delay, jiffies + mock->delay); + spin_unlock_irq(&engine->hw_lock); +} + +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = roundup_pow_of_two(sizeof(struct intel_ring)); + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + ring->engine = engine; + ring->size = sz; + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + + INIT_LIST_HEAD(&ring->request_list); + ring->last_retired_head = -1; + intel_ring_update_space(ring); + + return ring; +} + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name) +{ + struct mock_engine *engine; static int id; engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) return NULL; - /* minimal engine setup for seqno */ - engine->name = name; - engine->id = id++; - engine->status_page.page_addr = (void *)(engine + 1); + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) { + kfree(engine); + return NULL; + } - /* minimal breadcrumbs init */ - spin_lock_init(&engine->breadcrumbs.lock); - engine->breadcrumbs.mock = true; + /* minimal engine setup for requests */ + engine->base.i915 = i915; + engine->base.name = name; + engine->base.id = id++; + engine->base.status_page.page_addr = (void *)(engine + 1); - return engine; + engine->base.context_pin = mock_context_pin; + engine->base.context_unpin = mock_context_unpin; + engine->base.request_alloc = mock_request_alloc; + engine->base.emit_flush = mock_emit_flush; + engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.submit_request = mock_submit_request; + + engine->base.timeline = + &i915->gt.global_timeline.engine[engine->base.id]; + + intel_engine_init_breadcrumbs(&engine->base); + engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ + + /* fake hw queue */ + spin_lock_init(&engine->hw_lock); + setup_timer(&engine->hw_delay, + hw_delay_complete, + (unsigned long)engine); + INIT_LIST_HEAD(&engine->hw_queue); + + return &engine->base; } void mock_engine_flush(struct intel_engine_cs *engine) { + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct mock_request *request, *rn; + + del_timer_sync(&mock->hw_delay); + + spin_lock_irq(&mock->hw_lock); + list_for_each_entry_safe(request, rn, &mock->hw_queue, link) { + list_del_init(&request->link); + mock_seqno_advance(&mock->base, request->base.global_seqno); + } + spin_unlock_irq(&mock->hw_lock); } void mock_engine_reset(struct intel_engine_cs *engine) { intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); } + +void mock_engine_free(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + + intel_engine_fini_breadcrumbs(engine); + + kfree(engine->buffer); + kfree(engine); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 9cfe9671f860..e5e240216ba3 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -25,9 +25,25 @@ #ifndef __MOCK_ENGINE_H__ #define __MOCK_ENGINE_H__ -struct intel_engine_cs *mock_engine(const char *name); +#include +#include +#include + +#include "../intel_ringbuffer.h" + +struct mock_engine { + struct intel_engine_cs base; + + spinlock_t hw_lock; + struct list_head hw_queue; + struct timer_list hw_delay; +}; + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name); void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); +void mock_engine_free(struct intel_engine_cs *engine); static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) { diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index dbd32b125d15..6a8258eacdcb 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -24,14 +24,46 @@ #include +#include "mock_engine.h" +#include "mock_context.h" +#include "mock_request.h" #include "mock_gem_device.h" #include "mock_gem_object.h" #include "mock_gtt.h" +void mock_device_flush(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + mock_engine_flush(engine); + + i915_gem_retire_requests(i915); +} + static void mock_device_release(struct drm_device *dev) { struct drm_i915_private *i915 = to_i915(dev); + struct intel_engine_cs *engine; + enum intel_engine_id id; + mutex_lock(&i915->drm.struct_mutex); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + + cancel_delayed_work_sync(&i915->gt.retire_work); + cancel_delayed_work_sync(&i915->gt.idle_work); + + mutex_lock(&i915->drm.struct_mutex); + for_each_engine(engine, i915, id) + mock_engine_free(engine); + i915_gem_context_fini(i915); + mutex_unlock(&i915->drm.struct_mutex); + + drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); mutex_lock(&i915->drm.struct_mutex); @@ -39,6 +71,10 @@ static void mock_device_release(struct drm_device *dev) i915_gem_timeline_fini(&i915->gt.global_timeline); mutex_unlock(&i915->drm.struct_mutex); + destroy_workqueue(i915->wq); + + kmem_cache_destroy(i915->dependencies); + kmem_cache_destroy(i915->requests); kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); @@ -62,9 +98,19 @@ static void release_dev(struct device *dev) kfree(pdev); } +static void mock_retire_work_handler(struct work_struct *work) +{ +} + +static void mock_idle_work_handler(struct work_struct *work) +{ +} + struct drm_i915_private *mock_gem_device(void) { struct drm_i915_private *i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct pci_dev *pdev; int err; @@ -98,36 +144,81 @@ struct drm_i915_private *mock_gem_device(void) spin_lock_init(&i915->mm.object_stat_lock); + init_waitqueue_head(&i915->gpu_error.wait_queue); + init_waitqueue_head(&i915->gpu_error.reset_queue); + + i915->wq = alloc_ordered_workqueue("mock", 0); + if (!i915->wq) + goto put_device; + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); init_llist_head(&i915->mm.free_list); INIT_LIST_HEAD(&i915->mm.unbound_list); INIT_LIST_HEAD(&i915->mm.bound_list); + ida_init(&i915->context_hw_ida); + + INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); + INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler); + + i915->gt.awake = true; + i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); if (!i915->objects) - goto put_device; + goto err_wq; i915->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); if (!i915->vmas) goto err_objects; + i915->requests = KMEM_CACHE(mock_request, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU); + if (!i915->requests) + goto err_vmas; + + i915->dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT); + if (!i915->dependencies) + goto err_requests; + mutex_lock(&i915->drm.struct_mutex); INIT_LIST_HEAD(&i915->gt.timelines); err = i915_gem_timeline_init__global(i915); if (err) { mutex_unlock(&i915->drm.struct_mutex); - goto err_vmas; + goto err_dependencies; } mock_init_ggtt(i915); mutex_unlock(&i915->drm.struct_mutex); + mkwrite_device_info(i915)->ring_mask = BIT(0); + i915->engine[RCS] = mock_engine(i915, "mock"); + if (!i915->engine[RCS]) + goto err_dependencies; + + i915->kernel_context = mock_context(i915, NULL); + if (!i915->kernel_context) + goto err_engine; + return i915; +err_engine: + for_each_engine(engine, i915, id) + mock_engine_free(engine); +err_dependencies: + kmem_cache_destroy(i915->dependencies); +err_requests: + kmem_cache_destroy(i915->requests); err_vmas: kmem_cache_destroy(i915->vmas); err_objects: kmem_cache_destroy(i915->objects); +err_wq: + destroy_workqueue(i915->wq); put_device: put_device(&pdev->dev); err: diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h index c557e33c3953..4cca4d57f52c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.h +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -4,5 +4,6 @@ struct drm_i915_private; struct drm_i915_private *mock_gem_device(void); +void mock_device_flush(struct drm_i915_private *i915); #endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c new file mode 100644 index 000000000000..e23242d1b88a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -0,0 +1,44 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_request.h" + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay) +{ + struct drm_i915_gem_request *request; + struct mock_request *mock; + + /* NB the i915->requests slab cache is enlarged to fit mock_request */ + request = i915_gem_request_alloc(engine, context); + if (!request) + return NULL; + + mock = container_of(request, typeof(*mock), base); + mock->delay = delay; + + return &mock->base; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h new file mode 100644 index 000000000000..cc76d4f4eb4e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_REQUEST__ +#define __MOCK_REQUEST__ + +#include + +#include "../i915_gem_request.h" + +struct mock_request { + struct drm_i915_gem_request base; + + struct list_head link; + unsigned long delay; +}; + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay); + +#endif /* !__MOCK_REQUEST__ */ From 44653988ef7c70bf7c55a5e642e22ce5446fb6b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:20 +0000 Subject: [PATCH 0164/1299] drm/i915: Create a fake object for testing huge allocations We would like to be able to exercise huge allocations even on memory constrained devices. To do this we create an object that allocates only a few pages and remaps them across its whole range - each page is reused multiple times. We can therefore pretend we are rendering into a much larger object. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_object.h | 18 ++- .../gpu/drm/i915/selftests/huge_gem_object.c | 135 ++++++++++++++++++ .../gpu/drm/i915/selftests/huge_gem_object.h | 45 ++++++ 4 files changed, 192 insertions(+), 7 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.c create mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.h diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index db240a8f6b82..59e10b364ad0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5024,4 +5024,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" +#include "selftests/huge_gem_object.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 290eaa7fc9eb..4114cc8a0b9b 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -167,14 +167,18 @@ struct drm_i915_gem_object { /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; + union { + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + unsigned long scratch; + }; /** for phys allocated objects */ struct drm_dma_handle *phys_handle; diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c new file mode 100644 index 000000000000..4e681fc13be4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -0,0 +1,135 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "huge_gem_object.h" + +static void huge_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + unsigned long nreal = obj->scratch / PAGE_SIZE; + struct scatterlist *sg; + + for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) + __free_page(sg_page(sg)); + + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +huge_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + const unsigned long nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct scatterlist *sg, *src, *end; + struct sg_table *pages; + unsigned long n; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(pages, npages, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + sg = pages->sgl; + for (n = 0; n < nreal; n++) { + struct page *page; + + page = alloc_page(GFP | __GFP_HIGHMEM); + if (!page) { + sg_mark_end(sg); + goto err; + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + sg = __sg_next(sg); + } + if (nreal < npages) { + for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { + sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); + src = __sg_next(src); + if (src == end) + src = pages->sgl; + } + } + + if (i915_gem_gtt_prepare_pages(obj, pages)) + goto err; + + return pages; + +err: + huge_free_pages(obj, pages); + return ERR_PTR(-ENOMEM); +#undef GFP +} + +static void huge_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_free_pages(obj, pages); + + obj->mm.dirty = false; +} + +static const struct drm_i915_gem_object_ops huge_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = huge_get_pages, + .put_pages = huge_put_pages, +}; + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!phys_size || phys_size > dma_size); + GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(dma_size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); + i915_gem_object_init(obj, &huge_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->scratch = phys_size; + + return obj; +} diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h new file mode 100644 index 000000000000..a6133a9e8029 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __HUGE_GEM_OBJECT_H +#define __HUGE_GEM_OBJECT_H + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size); + +static inline phys_addr_t +huge_gem_object_phys_size(struct drm_i915_gem_object *obj) +{ + return obj->scratch; +} + +static inline dma_addr_t +huge_gem_object_dma_size(struct drm_i915_gem_object *obj) +{ + return obj->base.size; +} + +#endif /* !__HUGE_GEM_OBJECT_H */ From c835c5508358311d7922ea7b18bcaf611d1f8bb9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:21 +0000 Subject: [PATCH 0165/1299] drm/i915: Add selftests for i915_gem_request Simple starting point for adding seltests for i915_gem_request, first mock a device (with engines and contexts) that allows us to construct and execute a request, along with waiting for the request to complete. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 5 ++ .../gpu/drm/i915/selftests/i915_gem_request.c | 68 +++++++++++++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + 3 files changed, 74 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_request.c diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f31deeb72703..24571a5289a4 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1198,3 +1198,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) engine_retire_requests(engine); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_gem_request.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c new file mode 100644 index 000000000000..9921d1c317c0 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -0,0 +1,68 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int igt_add_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -ENOMEM; + + /* Basic preliminary test to create a request and let it loose! */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], + i915->kernel_context, + HZ / 10); + if (!request) + goto out_unlock; + + i915_add_request(request); + + err = 0; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + drm_dev_unref(&i915->drm); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 80458e2a2b04..bda982404ad3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -11,3 +11,4 @@ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) +selftest(requests, i915_gem_request_mock_selftests) From f1ae924d1871391f5a8b21d8220531830475a902 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:22 +0000 Subject: [PATCH 0166/1299] drm/i915: Add a simple request selftest for waiting A trivial kselftest to submit a request and wait upon it. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-11-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_request.c | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 9921d1c317c0..3cbf4735fe2f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -49,10 +49,81 @@ static int igt_add_request(void *arg) return err; } +static int igt_wait_request(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, then wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { + pr_err("request wait succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed before submit!!\n"); + goto out_unlock; + } + + i915_add_request(request); + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed immediately!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + pr_err("request wait succeeded (expected timeout!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out!\n"); + goto out_unlock; + } + + if (!i915_gem_request_completed(request)) { + pr_err("request not complete after waiting!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out when already complete!\n"); + goto out_unlock; + } + + err = 0; +out_unlock: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), }; struct drm_i915_private *i915; int err; From 5fd4d112d26cae71c0f92f9d83c0d29a22510025 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:23 +0000 Subject: [PATCH 0167/1299] drm/i915: Add a simple fence selftest to i915_gem_request Do a quick selftest on in the interoperability of dma_fence_wait on a i915_gem_request. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-12-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_request.c | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 3cbf4735fe2f..6fa148b4ff30 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -119,11 +119,72 @@ static int igt_wait_request(void *arg) return err; } +static int igt_fence_wait(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, treat it as a fence and wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_locked; + } + mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ + + if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { + pr_err("fence wait success before submit (expected timeout)!\n"); + goto out_device; + } + + mutex_lock(&i915->drm.struct_mutex); + i915_add_request(request); + mutex_unlock(&i915->drm.struct_mutex); + + if (dma_fence_is_signaled(&request->fence)) { + pr_err("fence signaled immediately!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { + pr_err("fence wait success after submit (expected timeout)!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out (expected success)!\n"); + goto out_device; + } + + if (!dma_fence_is_signaled(&request->fence)) { + pr_err("fence unsignaled after waiting!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out when complete (expected success)!\n"); + goto out_device; + } + + err = 0; +out_device: + mutex_lock(&i915->drm.struct_mutex); +out_locked: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_add_request), SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), }; struct drm_i915_private *i915; int err; From b348090d6758cc391dc91f8a8233b18f0acc8458 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:24 +0000 Subject: [PATCH 0168/1299] drm/i915: Simple selftest to exercise live requests Just create several batches of requests and expect it to not fall over! Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-13-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_request.c | 147 ++++++++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 2 files changed, 148 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6fa148b4ff30..610f89e3696c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -22,6 +22,8 @@ * */ +#include + #include "../i915_selftest.h" #include "mock_gem_device.h" @@ -198,3 +200,148 @@ int i915_gem_request_mock_selftests(void) return err; } + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915_gem_retire_requests(i915); + + i915->gpu_error.missed_irq_rings = 0; + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + if (wait_for(intel_execlists_idle(i915), 1)) { + pr_err("%s(%s): GPU not idle\n", t->func, t->name); + return -EIO; + } + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int live_nop_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_unlock; + } + + /* This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_add_request(request); + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_unlock; + + pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_request), + }; + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index f3e17cb10e05..09bf538826df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -9,3 +9,4 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ +selftest(requests, i915_gem_request_live_selftests) From cf8be13df2ce232ccf0a7a7220613b476f7fedfa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:25 +0000 Subject: [PATCH 0169/1299] drm/i915: Test simultaneously submitting requests to all engines Use a recursive-batch to busy spin on each to ensure that each is being run simultaneously. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-14-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_request.c | 184 ++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 610f89e3696c..880a1e82393a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -338,10 +338,194 @@ static int live_nop_request(void *arg) return err; } +static struct i915_vma *recursive_batch(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(i915); + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + if (gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + } else if (gen >= 6) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; + *cmd++ = lower_32_bits(vma->node.start); + } else if (gen >= 4) { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(vma->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = lower_32_bits(vma->node.start); + } + *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + + wmb(); + i915_gem_object_unpin_map(obj); + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int recursive_batch_resolve(struct i915_vma *batch) +{ + u32 *cmd; + + cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(batch->obj); + + return 0; +} + +static int live_all_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct drm_i915_gem_request *request[I915_NUM_ENGINES]; + struct i915_vma *batch; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines simultaneously. We + * send a recursive batch to each engine - checking that we don't + * block doing so, and that they don't complete too soon. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch, err=%d\n", __func__, err); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed with err=%d\n", + __func__, err); + goto out_request; + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + if (!i915_gem_object_has_active_reference(batch->obj)) { + i915_gem_object_get(batch->obj); + i915_gem_object_set_active_reference(batch->obj); + } + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + } + + for_each_engine(engine, i915, id) { + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + } + + err = recursive_batch_resolve(batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); + goto out_request; + } + + for_each_engine(engine, i915, id) { + long timeout; + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + i915_gem_request_put(request[id]); + request[id] = NULL; + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) + if (request[id]) + i915_gem_request_put(request[id]); + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_request), + SUBTEST(live_all_engines), }; return i915_subtests(tests, i915); } From 97b592b11a262356079aaa66eeb97c077515212c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:26 +0000 Subject: [PATCH 0170/1299] drm/i915: Test request ordering between engines A request on one engine with a dependency on a request on another engine must wait for completion of the first request before starting. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-15-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_request.c | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 880a1e82393a..e946488d6af2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -521,11 +521,143 @@ static int live_all_engines(void *arg) return err; } +static int live_sequential_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; + struct drm_i915_gem_request *prev = NULL; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines sequentially, such + * that each successive request waits for the earlier ones. This + * tests that we don't execute requests out of order, even though + * they are running on independent engines. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + struct i915_vma *batch; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch for %s, err=%d\n", + __func__, engine->name, err); + goto out_unlock; + } + + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + if (prev) { + err = i915_gem_request_await_dma_fence(request[id], + &prev->fence); + if (err) { + i915_add_request(request[id]); + pr_err("%s: Request await failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_get(batch); + + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + + prev = request[id]; + } + + for_each_engine(engine, i915, id) { + long timeout; + + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + + err = recursive_batch_resolve(request[id]->batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", + __func__, err); + goto out_request; + } + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) { + u32 *cmd; + + if (!request[id]) + break; + + cmd = i915_gem_object_pin_map(request[id]->batch->obj, + I915_MAP_WC); + if (!IS_ERR(cmd)) { + *cmd = MI_BATCH_BUFFER_END; + wmb(); + i915_gem_object_unpin_map(request[id]->batch->obj); + } + + i915_vma_put(request[id]->batch); + i915_gem_request_put(request[id]); + } +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_request), SUBTEST(live_all_engines), + SUBTEST(live_sequential_engines), }; return i915_subtests(tests, i915); } From cd3862dc6e89817dbeefa02881400da926ffceeb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:27 +0000 Subject: [PATCH 0171/1299] drm/i915: Live testing of empty requests Primarily to emphasize the difference between just advancing the breadcrumb using a bare request and the overhead of dispatching an execbuffer. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-16-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_request.c | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index e946488d6af2..9d056a86723d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -338,6 +338,160 @@ static int live_nop_request(void *arg) return err; } +static struct i915_vma *empty_batch(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct drm_i915_gem_request * +empty_request(struct intel_engine_cs *engine, + struct i915_vma *batch) +{ + struct drm_i915_gem_request *request; + int err; + + request = i915_gem_request_alloc(engine, + engine->i915->kernel_context); + if (IS_ERR(request)) + return request; + + err = engine->emit_flush(request, EMIT_INVALIDATE); + if (err) + goto out_request; + + err = i915_switch_context(request); + if (err) + goto out_request; + + err = engine->emit_bb_start(request, + batch->node.start, + batch->node.size, + I915_DISPATCH_SECURE); + if (err) + goto out_request; + +out_request: + __i915_add_request(request, err == 0); + return err ? ERR_PTR(err) : request; +} + +static int live_empty_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + struct i915_vma *batch; + unsigned int id; + int err = 0; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + batch = empty_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_batch; + + /* Warmup / preload */ + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_batch; + + pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + static struct i915_vma *recursive_batch(struct drm_i915_private *i915) { struct i915_gem_context *ctx = i915->kernel_context; @@ -658,6 +812,7 @@ int i915_gem_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_nop_request), SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), + SUBTEST(live_empty_request), }; return i915_subtests(tests, i915); } From 8335fd65ce6c880876ea0f94f427aa2499bcd05a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:28 +0000 Subject: [PATCH 0172/1299] drm/i915: Add selftests for object allocation, phys The phys object is a rarely used device (only very old machines require a chunk of physically contiguous pages for a few hardware interactions). As such, it is not exercised by CI and to combat that we want to add a test that exercises the phys object on all platforms. v2: Always set err on error paths and not rely on inheriting the err. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + .../gpu/drm/i915/selftests/i915_gem_object.c | 120 ++++++++++++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + 3 files changed, 122 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_object.c diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 59e10b364ad0..ec455f47958a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5025,4 +5025,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" +#include "selftests/i915_gem_object.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c new file mode 100644 index 000000000000..0d76c5e0e2ff --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -0,0 +1,120 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int igt_gem_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = -ENOMEM; + + /* Basic test to ensure we can create an object */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + err = 0; + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_phys_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err; + + /* Create an object and bind it to a contiguous set of physical pages, + * i.e. exercise the i915_gem_object_phys API. + */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_attach_phys(obj, PAGE_SIZE); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); + goto out_obj; + } + + if (obj->ops != &i915_gem_phys_ops) { + pr_err("i915_gem_object_attach_phys did not create a phys object\n"); + err = -EINVAL; + goto out_obj; + } + + if (!atomic_read(&obj->mm.pages_pin_count)) { + pr_err("i915_gem_object_attach_phys did not pin its phys pages\n"); + err = -EINVAL; + goto out_obj; + } + + /* Make the object dirty so that put_pages must do copy back the data */ + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_set_to_gtt_domain(obj, true); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", + err); + goto out_obj; + } + +out_obj: + i915_gem_object_put(obj); +out: + return err; +} + +int i915_gem_object_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_object), + SUBTEST(igt_phys_object), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index bda982404ad3..2ed94e3a71b7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -12,3 +12,4 @@ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) +selftest(objects, i915_gem_object_mock_selftests) From 12d30d879398acf14cb2d5300a1c4b2ec777ee02 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:29 +0000 Subject: [PATCH 0173/1299] drm/i915: Add a live seftest for GEM objects Starting with a placeholder test just to reassure that we can create a test object, Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-18-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_object.c | 49 +++++++++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 2 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 0d76c5e0e2ff..895e1e85653f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -100,6 +100,46 @@ static int igt_phys_object(void *arg) return err; } +static int igt_gem_huge(void *arg) +{ + const unsigned int nreal = 509; /* just to be awkward */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int n; + int err; + + /* Basic sanitycheck of our huge fake object allocation */ + + obj = huge_gem_object(i915, + nreal * PAGE_SIZE, + i915->ggtt.base.total + PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { + if (i915_gem_object_get_page(obj, n) != + i915_gem_object_get_page(obj, n % nreal)) { + pr_err("Page lookup mismatch at index %u [%u]\n", + n, n % nreal); + err = -EINVAL; + goto out_unpin; + } + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + int i915_gem_object_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -118,3 +158,12 @@ int i915_gem_object_mock_selftests(void) drm_dev_unref(&i915->drm); return err; } + +int i915_gem_object_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_huge), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 09bf538826df..d720b03e2c01 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -10,3 +10,4 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(requests, i915_gem_request_live_selftests) +selftest(objects, i915_gem_object_live_selftests) From 48d898172075e55cb6fdea52410795b4aeff1352 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:30 +0000 Subject: [PATCH 0174/1299] drm/i915: Test partial mappings Create partial mappings to cover a large object, investigating tiling (fenced regions) and VMA reuse. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-19-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_object.c | 293 ++++++++++++++++++ 1 file changed, 293 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 895e1e85653f..99648f91194f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -140,6 +140,298 @@ static int igt_gem_huge(void *arg) return err; } +struct tile { + unsigned int width; + unsigned int height; + unsigned int stride; + unsigned int size; + unsigned int tiling; + unsigned int swizzle; +}; + +static u64 swizzle_bit(unsigned int bit, u64 offset) +{ + return (offset & BIT_ULL(bit)) >> (bit - 6); +} + +static u64 tiled_offset(const struct tile *tile, u64 v) +{ + u64 x, y; + + if (tile->tiling == I915_TILING_NONE) + return v; + + y = div64_u64_rem(v, tile->stride, &x); + v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; + + if (tile->tiling == I915_TILING_X) { + v += y * tile->width; + v += div64_u64_rem(x, tile->width, &x) << tile->size; + v += x; + } else { + const unsigned int ytile_span = 16; + const unsigned int ytile_height = 32 * ytile_span; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } + + switch (tile->swizzle) { + case I915_BIT_6_SWIZZLE_9: + v ^= swizzle_bit(9, v); + break; + case I915_BIT_6_SWIZZLE_9_10: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); + break; + case I915_BIT_6_SWIZZLE_9_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); + break; + } + + return v; +} + +static int check_partial_mapping(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; + + if (igt_timeout(end_time, + "%s: timed out before tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; + + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) + return err; + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + for_each_prime_number_from(page, 1, npages) { + struct i915_ggtt_view view = + compute_partial_view(obj, page, MIN_CHUNK_PAGES); + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; + + GEM_BUG_ON(view.partial.size > nreal); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu\n", + page); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu\n", + page); + return PTR_ERR(io); + } + + err = i915_vma_get_fence(vma); + if (err) { + pr_err("Failed to get fence for partial view: offset=%lu\n", + page); + i915_vma_unpin_iomap(vma); + return err; + } + + iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + continue; + + i915_gem_object_flush_gtt_write_domain(obj); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile_row_pages(obj), + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + if (err) + return err; + } + + return 0; +} + +static int igt_partial_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int tiling; + int err; + + /* We want to check the page mapping and fencing of a large object + * mmapped through the GTT. The object we create is larger than can + * possibly be mmaped as a whole, and so we must use partial GGTT vma. + * We then check that a write through each partial GGTT vma ends up + * in the right set of pages within the object, and with the expected + * tiling, which we verify by manual swizzling. + */ + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.base.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + + if (1) { + IGT_TIMEOUT(end); + struct tile tile; + + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + tile.tiling = I915_TILING_NONE; + + err = check_partial_mapping(obj, &tile, end); + if (err && err != -EINTR) + goto out_unlock; + } + + for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { + IGT_TIMEOUT(end); + unsigned int max_pitch; + unsigned int pitch; + struct tile tile; + + tile.tiling = tiling; + switch (tiling) { + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (INTEL_GEN(i915) <= 2) { + tile.height = 16; + tile.width = 128; + tile.size = 11; + } else if (tile.tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile.height = 32; + tile.width = 128; + tile.size = 12; + } else { + tile.height = 8; + tile.width = 512; + tile.size = 12; + } + + if (INTEL_GEN(i915) < 4) + max_pitch = 8192 / tile.width; + else if (INTEL_GEN(i915) < 7) + max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; + else + max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + + for (pitch = max_pitch; pitch; pitch >>= 1) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + + if (pitch > 2 && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch - 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + + if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch + 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + + if (INTEL_GEN(i915) >= 4) { + for_each_prime_number(pitch, max_pitch) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + +next_tiling: ; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + int i915_gem_object_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -163,6 +455,7 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_gem_huge), + SUBTEST(igt_partial_tiling), }; return i915_subtests(tests, i915); From 3d81d589d6e3c89b687771074f65cb8f3b59ccf3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:31 +0000 Subject: [PATCH 0175/1299] drm/i915: Test exhaustion of the mmap space An unlikely error condition that we can simulate by stealing most of the range before trying to insert new objects. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-20-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_object.c | 138 ++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 99648f91194f..67d82bf1407f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -25,6 +25,7 @@ #include "../i915_selftest.h" #include "mock_gem_device.h" +#include "huge_gem_object.h" static int igt_gem_object(void *arg) { @@ -432,6 +433,142 @@ next_tiling: ; return err; } +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + i915_vma_move_to_active(vma, rq, 0); + i915_add_request(rq); + + i915_gem_object_set_active_reference(obj); + i915_vma_unpin(vma); + return 0; +} + +static bool assert_mmap_offset(struct drm_i915_private *i915, + unsigned long size, + int expected) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_create_mmap_offset(obj); + i915_gem_object_put(obj); + + return err == expected; +} + +static int igt_mmap_offset_exhaustion(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; + struct drm_i915_gem_object *obj; + struct drm_mm_node resv, *hole; + u64 hole_start, hole_end; + int loop, err; + + /* Trim the device mmap space to only a page */ + memset(&resv, 0, sizeof(resv)); + drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { + resv.start = hole_start; + resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ + err = drm_mm_reserve_node(mm, &resv); + if (err) { + pr_err("Failed to trim VMA manager, err=%d\n", err); + return err; + } + break; + } + + /* Just fits! */ + if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { + pr_err("Unable to insert object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Too large */ + if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Fill the hole, further allocation attempts should then fail */ + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("Unable to insert object into reclaimed hole\n"); + goto err_obj; + } + + if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); + err = -EINVAL; + goto err_obj; + } + + i915_gem_object_put(obj); + + /* Now fill with busy dead objects that we expect to reap */ + for (loop = 0; loop < 3; loop++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = make_obj_busy(obj); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("[loop %d] Failed to busy the object\n", loop); + goto err_obj; + } + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n", + loop, err); + goto out; + } + } + +out: + drm_mm_remove_node(&resv); + return err; +err_obj: + i915_gem_object_put(obj); + goto out; +} + int i915_gem_object_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -456,6 +593,7 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(igt_gem_huge), SUBTEST(igt_partial_tiling), + SUBTEST(igt_mmap_offset_exhaustion), }; return i915_subtests(tests, i915); From 170594502cf591fd0789d7e5239937b1a87af4c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:32 +0000 Subject: [PATCH 0176/1299] drm/i915: Test coherency of and barriers between cache domains Write into an object using WB, WC, GTT, and GPU paths and make sure that our internal API is sufficient to ensure coherent reads and writes. v2: Avoid invalid free upon allocation error Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_selftest.h | 2 + .../drm/i915/selftests/i915_gem_coherency.c | 384 ++++++++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 4 files changed, 388 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_coherency.c diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ec455f47958a..82489ed10373 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5026,4 +5026,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" #include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 8b5994caa301..e43777b72211 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -99,4 +99,6 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...); #define igt_timeout(t, fmt, ...) \ __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define igt_can_mi_store_dword_imm(D) (INTEL_GEN(D) > 2) + #endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c new file mode 100644 index 000000000000..483cbe02d983 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -0,0 +1,384 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "../i915_selftest.h" +#include "i915_random.h" + +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) *map; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + map[offset_in_page(offset) / sizeof(*map)] = v; + if (needs_clflush & CLFLUSH_AFTER) + clflush(map+offset_in_page(offset) / sizeof(*map)); + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) map; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + *v = map[offset_in_page(offset) / sizeof(*map)]; + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct i915_vma *vma; + typeof(v) *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + struct i915_vma *vma; + typeof(v) map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + typeof(v) *map; + int err; + + /* XXX GTT write followed by WC write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + typeof(v) map; + int err; + + /* XXX WC write followed by GTT write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + err = intel_ring_begin(rq, 4); + if (err) { + __i915_add_request(rq, false); + i915_vma_unpin(vma); + return err; + } + + if (INTEL_GEN(i915) >= 8) { + intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); + intel_ring_emit(rq->ring, lower_32_bits(i915_ggtt_offset(vma) + offset)); + intel_ring_emit(rq->ring, upper_32_bits(i915_ggtt_offset(vma) + offset)); + intel_ring_emit(rq->ring, v); + } else if (INTEL_GEN(i915) >= 4) { + intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); + intel_ring_emit(rq->ring, 0); + intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); + intel_ring_emit(rq->ring, v); + } else { + intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM | 1 << 22); + intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); + intel_ring_emit(rq->ring, v); + intel_ring_emit(rq->ring, MI_NOOP); + } + intel_ring_advance(rq->ring); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; +} + +static bool always_valid(struct drm_i915_private *i915) +{ + return true; +} + +static bool needs_mi_store_dword(struct drm_i915_private *i915) +{ + return igt_can_mi_store_dword_imm(i915); +} + +static const struct igt_coherency_mode { + const char *name; + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); +} igt_coherency_mode[] = { + { "cpu", cpu_set, cpu_get, always_valid }, + { "gtt", gtt_set, gtt_get, always_valid }, + { "wc", wc_set, wc_get, always_valid }, + { "gpu", gpu_set, NULL, needs_mi_store_dword }, + { }, +}; + +static int igt_gem_coherency(void *arg) +{ + const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; + unsigned long count, n; + u32 *offsets, *values; + int err; + + /* We repeatedly write, overwrite and read from a sequence of + * cachelines in order to try and detect incoherency (unflushed writes + * from either the CPU or GPU). Each setter/getter uses our cache + * domain API which should prevent incoherency. + */ + + offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); + if (!offsets) + return -ENOMEM; + for (count = 0; count < ncachelines; count++) + offsets[count] = count * 64 + 4 * (count % 16); + + values = offsets + ncachelines; + + mutex_lock(&i915->drm.struct_mutex); + for (over = igt_coherency_mode; over->name; over++) { + if (!over->set) + continue; + + if (!over->valid(i915)) + continue; + + for (write = igt_coherency_mode; write->name; write++) { + if (!write->set) + continue; + + if (!write->valid(i915)) + continue; + + for (read = igt_coherency_mode; read->name; read++) { + if (!read->get) + continue; + + if (!read->valid(i915)) + continue; + + for_each_prime_number_from(count, 1, ncachelines) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto unlock; + } + + i915_random_reorder(offsets, ncachelines, &prng); + for (n = 0; n < count; n++) + values[n] = prandom_u32_state(&prng); + + for (n = 0; n < count; n++) { + err = over->set(obj, offsets[n], ~values[n]); + if (err) { + pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", + n, count, over->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + err = write->set(obj, offsets[n], values[n]); + if (err) { + pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", + n, count, write->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + u32 found; + + err = read->get(obj, offsets[n], &found); + if (err) { + pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", + n, count, read->name, err); + goto put_object; + } + + if (found != values[n]) { + pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", + n, count, over->name, + write->name, values[n], + read->name, found, + ~values[n], offsets[n]); + err = -EINVAL; + goto put_object; + } + } + + __i915_gem_object_release_unless_active(obj); + } + } + } + } +unlock: + mutex_unlock(&i915->drm.struct_mutex); + kfree(offsets); + return err; + +put_object: + __i915_gem_object_release_unless_active(obj); + goto unlock; +} + +int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_coherency), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index d720b03e2c01..8f0c207dbe22 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,3 +11,4 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) +selftest(coherency, i915_gem_coherency_live_selftests) From 26e7a2a17971400fee4fcacc0132439c043fd0e6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:33 +0000 Subject: [PATCH 0177/1299] drm/i915: Move uncore selfchecks to live selftest infrastructure Now that the kselftest infrastructure exists, put it to use and add to it the existing consistency checks on the fw register lookup tables. v2: s/tabke/table/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-22-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 52 +-------- .../drm/i915/selftests/i915_live_selftests.h | 1 + drivers/gpu/drm/i915/selftests/intel_uncore.c | 100 ++++++++++++++++++ 3 files changed, 105 insertions(+), 48 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/intel_uncore.c diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 1ff8fd9911d7..441c51fd9746 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -642,33 +642,6 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) return entry->domains; } -static void -intel_fw_table_check(struct drm_i915_private *dev_priv) -{ - const struct intel_forcewake_range *ranges; - unsigned int num_ranges; - s32 prev; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - ranges = dev_priv->uncore.fw_domains_table; - if (!ranges) - return; - - num_ranges = dev_priv->uncore.fw_domains_table_entries; - - for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { - WARN_ON_ONCE(IS_GEN9(dev_priv) && - (prev + 1) != (s32)ranges->start); - WARN_ON_ONCE(prev >= (s32)ranges->start); - prev = ranges->start; - WARN_ON_ONCE(prev >= (s32)ranges->end); - prev = ranges->end; - } -} - #define GEN_FW_RANGE(s, e, d) \ { .start = (s), .end = (e), .domains = (d) } @@ -707,23 +680,6 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; -static void intel_shadow_table_check(void) -{ - const i915_reg_t *reg = gen8_shadowed_regs; - s32 prev; - u32 offset; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - offset = i915_mmio_reg_offset(*reg); - WARN_ON_ONCE(prev >= (s32)offset); - prev = offset; - } -} - static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -1384,10 +1340,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) break; } - intel_fw_table_check(dev_priv); - if (INTEL_GEN(dev_priv) >= 8) - intel_shadow_table_check(); - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS @@ -1905,3 +1857,7 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, return fw_domains; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_uncore.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 8f0c207dbe22..be7892d05290 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -9,6 +9,7 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ +selftest(uncore, intel_uncore_live_selftests) selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c new file mode 100644 index 000000000000..5f7bd5c9428f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -0,0 +1,100 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +static int intel_fw_table_check(struct drm_i915_private *i915) +{ + const struct intel_forcewake_range *ranges; + unsigned int num_ranges, i; + s32 prev; + + ranges = i915->uncore.fw_domains_table; + if (!ranges) + return 0; + + num_ranges = i915->uncore.fw_domains_table_entries; + for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { + /* Check that the table is watertight */ + if (IS_GEN9(i915) && (prev + 1) != (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is not watertight to previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the table never goes backwards */ + if (prev >= (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is less than the previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the entry is valid */ + if (ranges->start >= ranges->end) { + pr_err("%s: entry[%d]:(%x, %x) has negative length\n", + __func__, i, ranges->start, ranges->end); + return -EINVAL; + } + + prev = ranges->end; + } + + return 0; +} + +static int intel_shadow_table_check(void) +{ + const i915_reg_t *reg = gen8_shadowed_regs; + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); + + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } + + prev = offset; + } + + return 0; +} + +int intel_uncore_live_selftests(struct drm_i915_private *i915) +{ + int err; + + err = intel_fw_table_check(i915); + if (err) + return err; + + err = intel_shadow_table_check(); + if (err) + return err; + + return 0; +} From 9852d543a85f3aaeb408259f5e462c4ce7f1dc99 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:34 +0000 Subject: [PATCH 0178/1299] drm/i915: Test all fw tables during mock selftests In addition to just testing the fw table we load, during the initial mock testing we can test that all tables are valid (so the testing is not limited to just the platforms that load that particular table). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-23-chris@chris-wilson.co.uk --- .../drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/intel_uncore.c | 51 ++++++++++++++----- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 2ed94e3a71b7..c61e08de7913 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -10,6 +10,7 @@ */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) +selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 5f7bd5c9428f..2c2a879857da 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -24,20 +24,16 @@ #include "../i915_selftest.h" -static int intel_fw_table_check(struct drm_i915_private *i915) +static int intel_fw_table_check(const struct intel_forcewake_range *ranges, + unsigned int num_ranges, + bool is_watertight) { - const struct intel_forcewake_range *ranges; - unsigned int num_ranges, i; + unsigned int i; s32 prev; - ranges = i915->uncore.fw_domains_table; - if (!ranges) - return 0; - - num_ranges = i915->uncore.fw_domains_table_entries; for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { /* Check that the table is watertight */ - if (IS_GEN9(i915) && (prev + 1) != (s32)ranges->start) { + if (is_watertight && (prev + 1) != (s32)ranges->start) { pr_err("%s: entry[%d]:(%x, %x) is not watertight to previous (%x)\n", __func__, i, ranges->start, ranges->end, prev); return -EINVAL; @@ -84,13 +80,26 @@ static int intel_shadow_table_check(void) return 0; } -int intel_uncore_live_selftests(struct drm_i915_private *i915) +int intel_uncore_mock_selftests(void) { - int err; + struct { + const struct intel_forcewake_range *ranges; + unsigned int num_ranges; + bool is_watertight; + } fw[] = { + { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, + { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, + { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + }; + int err, i; - err = intel_fw_table_check(i915); - if (err) - return err; + for (i = 0; i < ARRAY_SIZE(fw); i++) { + err = intel_fw_table_check(fw[i].ranges, + fw[i].num_ranges, + fw[i].is_watertight); + if (err) + return err; + } err = intel_shadow_table_check(); if (err) @@ -98,3 +107,17 @@ int intel_uncore_live_selftests(struct drm_i915_private *i915) return 0; } + +int intel_uncore_live_selftests(struct drm_i915_private *i915) +{ + int err; + + /* Confirm the table we load is still valid */ + err = intel_fw_table_check(i915->uncore.fw_domains_table, + i915->uncore.fw_domains_table_entries, + INTEL_GEN(i915) >= 9); + if (err) + return err; + + return 0; +} From a8fb2bad82b64047d9c6090b859467cacc69e9d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:35 +0000 Subject: [PATCH 0179/1299] drm/i915: Sanity check all registers for matching fw domains Add a late selftest that walks over all forcewake registers (those below 0x40000) and uses the mmio debug register to check to see if any are unclaimed. This is possible if we fail to wake the appropriate powerwells for the register. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-24-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_uncore.c | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 2c2a879857da..2d0fef2cfca6 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -108,6 +108,61 @@ int intel_uncore_mock_selftests(void) return 0; } +static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_priv) +{ +#define FW_RANGE 0x40000 + unsigned long *valid; + u32 offset; + int err; + + if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && + !IS_VALLEYVIEW(dev_priv) && + !IS_CHERRYVIEW(dev_priv)) + return 0; + + if (IS_VALLEYVIEW(dev_priv)) /* XXX system lockup! */ + return 0; + + if (IS_BROADWELL(dev_priv)) /* XXX random GPU hang afterwards! */ + return 0; + + valid = kzalloc(BITS_TO_LONGS(FW_RANGE) * sizeof(*valid), + GFP_TEMPORARY); + if (!valid) + return -ENOMEM; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + check_for_unclaimed_mmio(dev_priv); + for (offset = 0; offset < FW_RANGE; offset += 4) { + i915_reg_t reg = { offset }; + + (void)I915_READ_FW(reg); + if (!check_for_unclaimed_mmio(dev_priv)) + set_bit(offset, valid); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + err = 0; + for_each_set_bit(offset, valid, FW_RANGE) { + i915_reg_t reg = { offset }; + + intel_uncore_forcewake_reset(dev_priv, false); + check_for_unclaimed_mmio(dev_priv); + + (void)I915_READ(reg); + if (check_for_unclaimed_mmio(dev_priv)) { + pr_err("Unclaimed mmio read to register 0x%04x\n", + offset); + err = -EINVAL; + } + } + + kfree(valid); + return err; +} + int intel_uncore_live_selftests(struct drm_i915_private *i915) { int err; @@ -119,5 +174,9 @@ int intel_uncore_live_selftests(struct drm_i915_private *i915) if (err) return err; + err = intel_uncore_check_forcewake_domains(i915); + if (err) + return err; + return 0; } From 6cca22ede8a448d341b3e4565943f54bd8b8dcd0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:36 +0000 Subject: [PATCH 0180/1299] drm/i915: Add some mock tests for dmabuf interop Check that we can create both dmabuf and objects from dmabuf. v2: Cleanups, correct include, fix unpin on dead path and prevent explosion on dmabuf init failure Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-25-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 5 + .../gpu/drm/i915/selftests/i915_gem_dmabuf.c | 294 ++++++++++++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/mock_dmabuf.c | 176 +++++++++++ drivers/gpu/drm/i915/selftests/mock_dmabuf.h | 41 +++ 5 files changed, 517 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.h diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index d037adcda6f2..3e276eee0450 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -307,3 +307,8 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, return ERR_PTR(ret); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_dmabuf.c" +#include "selftests/i915_gem_dmabuf.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c new file mode 100644 index 000000000000..a2393fcf9fa8 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -0,0 +1,294 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_dmabuf.h" + +static int igt_dmabuf_export(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + return PTR_ERR(dmabuf); + } + + dma_buf_put(dmabuf); + return 0; +} + +static int igt_dmabuf_import_self(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import != &obj->base) { + pr_err("i915_gem_prime_import created a new object!\n"); + err = -EINVAL; + goto out_import; + } + + err = 0; +out_import: + i915_gem_object_put(to_intel_bo(import)); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *obj_map, *dma_map; + u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; + int err, i; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto out_dmabuf; + } + + if (obj->base.dev != &i915->drm) { + pr_err("i915_gem_prime_import created a non-i915 object!\n"); + err = -EINVAL; + goto out_obj; + } + + if (obj->base.size != PAGE_SIZE) { + pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", + (long long)obj->base.size, PAGE_SIZE); + err = -EINVAL; + goto out_obj; + } + + dma_map = dma_buf_vmap(dmabuf); + if (!dma_map) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out_obj; + } + + if (0) { /* Can not yet map dmabuf */ + obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(obj_map)) { + err = PTR_ERR(obj_map); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto out_dma_map; + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(dma_map, pattern[i], PAGE_SIZE); + if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("imported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(obj_map, pattern[i], PAGE_SIZE); + if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("exported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + i915_gem_object_unpin_map(obj); + } + + err = 0; +out_dma_map: + dma_buf_vunmap(dmabuf, dma_map); +out_obj: + i915_gem_object_put(obj); +out_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_import_ownership(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto err_dmabuf; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_vunmap(dmabuf, ptr); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto err_dmabuf; + } + + dma_buf_put(dmabuf); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); + goto out_obj; + } + + err = 0; + i915_gem_object_unpin_pages(obj); +out_obj: + i915_gem_object_put(obj); + return err; + +err_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_export_vmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto err_obj; + } + i915_gem_object_put(obj); + + ptr = dma_buf_vmap(dmabuf); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("dma_buf_vmap failed with err=%d\n", err); + goto out; + } + + if (memchr_inv(ptr, 0, dmabuf->size)) { + pr_err("Exported object not initialiased to zero!\n"); + err = -EINVAL; + goto out; + } + + memset(ptr, 0xc5, dmabuf->size); + + err = 0; + dma_buf_vunmap(dmabuf, ptr); +out: + dma_buf_put(dmabuf); + return err; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_dmabuf_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_self), + SUBTEST(igt_dmabuf_import), + SUBTEST(igt_dmabuf_import_ownership), + SUBTEST(igt_dmabuf_export_vmap), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index c61e08de7913..955a4d6ccdaf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -14,3 +14,4 @@ selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) +selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c new file mode 100644 index 000000000000..99da8f4ef497 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c @@ -0,0 +1,176 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_dmabuf.h" + +static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct mock_dmabuf *mock = to_mock(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *sg; + int i, err; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(st, mock->npages, GFP_KERNEL); + if (err) + goto err_free; + + sg = st->sgl; + for (i = 0; i < mock->npages; i++) { + sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); + sg = sg_next(sg); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + err = -ENOMEM; + goto err_st; + } + + return st; + +err_st: + sg_free_table(st); +err_free: + kfree(st); + return ERR_PTR(err); +} + +static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *st, + enum dma_data_direction dir) +{ + dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + sg_free_table(st); + kfree(st); +} + +static void mock_dmabuf_release(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + int i; + + for (i = 0; i < mock->npages; i++) + put_page(mock->pages[i]); + + kfree(mock); +} + +static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); +} + +static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + vm_unmap_ram(vaddr, mock->npages); +} + +static void *mock_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap_atomic(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + kunmap_atomic(addr); +} + +static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kunmap(mock->pages[page_num]); +} + +static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + return -ENODEV; +} + +static const struct dma_buf_ops mock_dmabuf_ops = { + .map_dma_buf = mock_map_dma_buf, + .unmap_dma_buf = mock_unmap_dma_buf, + .release = mock_dmabuf_release, + .kmap = mock_dmabuf_kmap, + .kmap_atomic = mock_dmabuf_kmap_atomic, + .kunmap = mock_dmabuf_kunmap, + .kunmap_atomic = mock_dmabuf_kunmap_atomic, + .mmap = mock_dmabuf_mmap, + .vmap = mock_dmabuf_vmap, + .vunmap = mock_dmabuf_vunmap, +}; + +static struct dma_buf *mock_dmabuf(int npages) +{ + struct mock_dmabuf *mock; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dmabuf; + int i; + + mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + + mock->npages = npages; + for (i = 0; i < npages; i++) { + mock->pages[i] = alloc_page(GFP_KERNEL); + if (!mock->pages[i]) + goto err; + } + + exp_info.ops = &mock_dmabuf_ops; + exp_info.size = npages * PAGE_SIZE; + exp_info.flags = O_CLOEXEC; + exp_info.priv = mock; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) + goto err; + + return dmabuf; + +err: + while (i--) + put_page(mock->pages[i]); + kfree(mock); + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h new file mode 100644 index 000000000000..ec80613159b9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h @@ -0,0 +1,41 @@ + +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DMABUF_H__ +#define __MOCK_DMABUF_H__ + +#include + +struct mock_dmabuf { + int npages; + struct page *pages[]; +}; + +static struct mock_dmabuf *to_mock(struct dma_buf *buf) +{ + return buf->priv; +} + +#endif /* !__MOCK_DMABUF_H__ */ From ced01afdf672d6ceab71f14e0aefb79b914ddd34 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:37 +0000 Subject: [PATCH 0181/1299] drm/i915: Add a live dmabuf selftest Though we have good coverage of our dmabuf interface through the mock tests, we also want to check the heavy module unload paths of the live i915 driver. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-26-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 9 +++++++++ drivers/gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c index a2393fcf9fa8..817bef74bbcb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -292,3 +292,12 @@ int i915_gem_dmabuf_mock_selftests(void) drm_dev_unref(&i915->drm); return err; } + +int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index be7892d05290..fc605695d118 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -12,4 +12,5 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) +selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) From 1c42819a14a0868edd5d4b2db4594c1f7fc324d4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:38 +0000 Subject: [PATCH 0182/1299] drm/i915: Add initial selftests for i915_gem_gtt Simple starting point for adding selftests for i915_gem_gtt, first try creating a ppGTT and filling it. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-27-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 99 +++++++++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 101 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 520bcf63d271..9daea3cd9cdc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3760,4 +3760,5 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gtt.c" +#include "selftests/i915_gem_gtt.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c new file mode 100644 index 000000000000..f3359be62515 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -0,0 +1,99 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +static int igt_ppgtt_alloc(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct i915_hw_ppgtt *ppgtt; + u64 size, last; + int err; + + /* Allocate a ppggt and try to fill the entire range */ + + if (!USES_PPGTT(dev_priv)) + return 0; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + mutex_lock(&dev_priv->drm.struct_mutex); + err = __hw_ppgtt_init(ppgtt, dev_priv); + if (err) + goto err_ppgtt; + + if (!ppgtt->base.allocate_va_range) + goto err_ppgtt_cleanup; + + /* Check we can allocate the entire range */ + for (size = 4096; + size <= ppgtt->base.total; + size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, size); + if (err) { + if (err == -ENOMEM) { + pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", + size, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + + ppgtt->base.clear_range(&ppgtt->base, 0, size); + } + + /* Check we can incrementally allocate the entire range */ + for (last = 0, size = 4096; + size <= ppgtt->base.total; + last = size, size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, + last, size - last); + if (err) { + if (err == -ENOMEM) { + pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", + last, size - last, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + } + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + mutex_unlock(&dev_priv->drm.struct_mutex); + kfree(ppgtt); + return err; +} + +int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ppgtt_alloc), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index fc605695d118..d6c869c5b54f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -14,3 +14,4 @@ selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) +selftest(gtt, i915_gem_gtt_live_selftests) From 8d28ba4568f4973b7fc5b12100a720979c16b4f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:39 +0000 Subject: [PATCH 0183/1299] drm/i915: Exercise filling the top/bottom portions of the ppgtt Allocate objects with varying number of pages (which should hopefully consist of a mixture of contiguous page chunks and so coalesced sg lists) and check that the sg walkers in insert_pages cope. v2: Check both small <-> large Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-28-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_object.h | 3 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 362 ++++++++++++++++++ 2 files changed, 365 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 4114cc8a0b9b..02365a5f7c80 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,6 +33,8 @@ #include +#include "i915_selftest.h" + struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 @@ -84,6 +86,7 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + I915_SELFTEST_DECLARE(struct list_head st_link); unsigned long flags; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f3359be62515..f97580cbd7f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -22,8 +22,98 @@ * */ +#include + #include "../i915_selftest.h" +#include "mock_drm.h" + +static void fake_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +fake_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) +#define PFN_BIAS 0x1000 + struct sg_table *pages; + struct scatterlist *sg; + typeof(obj->base.size) rem; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + rem = round_up(obj->base.size, BIT(31)) >> 31; + if (sg_alloc_table(pages, rem, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + rem = obj->base.size; + for (sg = pages->sgl; sg; sg = sg_next(sg)) { + unsigned long len = min_t(typeof(rem), rem, BIT(31)); + + sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); + sg_dma_address(sg) = page_to_phys(sg_page(sg)); + sg_dma_len(sg) = len; + + rem -= len; + } + + obj->mm.madv = I915_MADV_DONTNEED; + return pages; +#undef GFP +} + +static void fake_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_pages, + .put_pages = fake_put_pages, +}; + +static struct drm_i915_gem_object * +fake_dma_object(struct drm_i915_private *i915, u64 size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + /* Preallocate the "backing storage" */ + if (i915_gem_object_pin_pages(obj)) + return ERR_PTR(-ENOMEM); + + i915_gem_object_unpin_pages(obj); + return obj; +} + static int igt_ppgtt_alloc(void *arg) { struct drm_i915_private *dev_priv = arg; @@ -89,10 +179,282 @@ static int igt_ppgtt_alloc(void *arg) return err; } +static void close_object_list(struct list_head *objects, + struct i915_address_space *vm) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, vm, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +} + +static int fill_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + struct drm_i915_gem_object *obj; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + const unsigned long max_step = max(int_sqrt(max_pages), 2UL); + unsigned long npages, prime, flags; + struct i915_vma *vma; + LIST_HEAD(objects); + int err; + + /* Try binding many VMA working inwards from either edge */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(prime, 2, max_step) { + for (npages = 1; npages <= max_pages; npages *= prime) { + const u64 full_size = npages << PAGE_SHIFT; + const struct { + const char *name; + u64 offset; + int step; + } phases[] = { + { "top-down", hole_end, -1, }, + { "bottom-up", hole_start, 1, }, + { } + }, *p; + + obj = fake_dma_object(i915, full_size); + if (IS_ERR(obj)) + break; + + list_add(&obj->st_link, &objects); + + /* Align differing sized objects against the edges, and + * check we don't walk off into the void when binding + * them into the GTT. + */ + for (p = phases; p->name; p++) { + u64 offset; + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (forward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) moved vma.node=%llx + %llx, expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (backward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) moved vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + } + + if (igt_timeout(end_time, "%s timed out (npages=%lu, prime=%lu)\n", + __func__, npages, prime)) { + err = -EINTR; + goto err; + } + } + + close_object_list(&objects, vm); + } + + return 0; + +err: + close_object_list(&objects, vm); + return err; +} + +static int exercise_ppgtt(struct drm_i915_private *dev_priv, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct drm_file *file; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + if (!USES_FULL_PPGTT(dev_priv)) + return 0; + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, file->driver_priv, "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + GEM_BUG_ON(offset_in_page(ppgtt->base.total)); + GEM_BUG_ON(ppgtt->base.closed); + + err = func(dev_priv, &ppgtt->base, 0, ppgtt->base.total, end_time); + + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + return err; +} + +static int igt_ppgtt_fill(void *arg) +{ + return exercise_ppgtt(arg, fill_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_fill), }; return i915_subtests(tests, i915); From 62c981cfe78513aace9a156b0abf0246ba7c79f2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:40 +0000 Subject: [PATCH 0184/1299] drm/i915: Exercise filling the top/bottom portions of the global GTT Same test as previously for the per-process GTT instead applied to the global GTT. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-29-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 61 ++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f97580cbd7f7..86b2e09d9f85 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -22,6 +22,7 @@ * */ +#include #include #include "../i915_selftest.h" @@ -188,7 +189,8 @@ static void close_object_list(struct list_head *objects, struct i915_vma *vma; vma = i915_vma_instance(obj, vm, NULL); - if (!IS_ERR(vma)) + /* Only ppgtt vma may be closed before the object is freed */ + if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) i915_vma_close(vma); list_del(&obj->st_link); @@ -450,12 +452,69 @@ static int igt_ppgtt_fill(void *arg) return exercise_ppgtt(arg, fill_hole); } +static int sort_holes(void *priv, struct list_head *A, struct list_head *B) +{ + struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); + struct drm_mm_node *b = list_entry(B, typeof(*b), hole_stack); + + if (a->start < b->start) + return -1; + else + return 1; +} + +static int exercise_ggtt(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + u64 hole_start, hole_end, last = 0; + struct drm_mm_node *node; + IGT_TIMEOUT(end_time); + int err; + + mutex_lock(&i915->drm.struct_mutex); +restart: + list_sort(NULL, &ggtt->base.mm.hole_stack, sort_holes); + drm_mm_for_each_hole(node, &ggtt->base.mm, hole_start, hole_end) { + if (hole_start < last) + continue; + + if (ggtt->base.mm.color_adjust) + ggtt->base.mm.color_adjust(node, 0, + &hole_start, &hole_end); + if (hole_start >= hole_end) + continue; + + err = func(i915, &ggtt->base, hole_start, hole_end, end_time); + if (err) + break; + + /* As we have manipulated the drm_mm, the list may be corrupt */ + last = hole_end; + goto restart; + } + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int igt_ggtt_fill(void *arg) +{ + return exercise_ggtt(arg, fill_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_fill), }; + GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); + return i915_subtests(tests, i915); } From 6e32ab3d47776247526ca83e434d6d0a746cfd64 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:41 +0000 Subject: [PATCH 0185/1299] drm/i915: Fill different pages of the GTT Exercise filling different pages of the GTT v2: Walk all holes until we timeout Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-30-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 86b2e09d9f85..73845404f664 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -409,6 +409,87 @@ static int fill_hole(struct drm_i915_private *i915, return err; } +static int walk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); + unsigned long flags; + u64 size; + + /* Try binding a single VMA in different positions within the hole */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(size, 1, max_pages) { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u64 addr; + int err = 0; + + obj = fake_dma_object(i915, size << PAGE_SHIFT); + if (IS_ERR(obj)) + break; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + for (addr = hole_start; + addr + obj->base.size < hole_end; + addr += obj->base.size) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", + __func__, addr, vma->size, + hole_start, hole_end, err); + goto err; + } + i915_vma_unpin(vma); + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s unbind failed at %llx + %llx with err=%d\n", + __func__, addr, vma->size, err); + goto err; + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + if (igt_timeout(end_time, + "%s timed out at %llx\n", + __func__, addr)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct drm_i915_private *i915, struct i915_address_space *vm, @@ -452,6 +533,11 @@ static int igt_ppgtt_fill(void *arg) return exercise_ppgtt(arg, fill_hole); } +static int igt_ppgtt_walk(void *arg) +{ + return exercise_ppgtt(arg, walk_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -506,11 +592,18 @@ static int igt_ggtt_fill(void *arg) return exercise_ggtt(arg, fill_hole); } +static int igt_ggtt_walk(void *arg) +{ + return exercise_ggtt(arg, walk_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), }; From 5c3bff482ad19657d6bb07659b8a29554a909b58 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:42 +0000 Subject: [PATCH 0186/1299] drm/i915: Exercise filling and removing random ranges from the live GTT Test the low-level i915_address_space interfaces to sanity check the live insertion/removal of address ranges. v2: Split out the low-level operations to a new test Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-31-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 73845404f664..c4ed0fd9f160 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -26,6 +26,7 @@ #include #include "../i915_selftest.h" +#include "i915_random.h" #include "mock_drm.h" @@ -490,6 +491,106 @@ static int walk_hole(struct drm_i915_private *i915, return 0; } +static int drunk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(prng); + unsigned int size; + unsigned long flags; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + struct i915_vma *vma; + u64 hole_size; + int err; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + GEM_BUG_ON(vma->size != BIT_ULL(size)); + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, BIT_ULL(size), + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, BIT_ULL(size)); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, n, count)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + kfree(order); + if (err) + return err; + } + + return 0; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct drm_i915_private *i915, struct i915_address_space *vm, @@ -538,6 +639,11 @@ static int igt_ppgtt_walk(void *arg) return exercise_ppgtt(arg, walk_hole); } +static int igt_ppgtt_drunk(void *arg) +{ + return exercise_ppgtt(arg, drunk_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -597,12 +703,19 @@ static int igt_ggtt_walk(void *arg) return exercise_ggtt(arg, walk_hole); } +static int igt_ggtt_drunk(void *arg) +{ + return exercise_ggtt(arg, drunk_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), }; From 4a6f13fce1331df68e1fc7dd4eb0c0b461d5f845 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:43 +0000 Subject: [PATCH 0187/1299] drm/i915: Live testing of lowlevel GTT operations Directly test allocating the va range and clearing it, this bypasses the use of i915_vma_bind() and inserting the pages to focus on testing of the pagetables. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-32-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c4ed0fd9f160..d0ec74671ef8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -181,6 +181,92 @@ static int igt_ppgtt_alloc(void *arg) return err; } +static int lowlevel_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(seed_prng); + unsigned int size; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + I915_RND_SUBSTATE(prng, seed_prng); + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + u64 hole_size; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + GEM_BUG_ON(count * BIT_ULL(size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + GEM_BUG_ON(obj->base.size != BIT_ULL(size)); + + if (i915_gem_object_pin_pages(obj)) { + i915_gem_object_put(obj); + kfree(order); + break; + } + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + + if (vm->allocate_va_range && + vm->allocate_va_range(vm, addr, BIT_ULL(size))) + break; + + vm->insert_entries(vm, obj->mm.pages, addr, + I915_CACHE_NONE, 0); + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, n, count)) { + hole_end = hole_start; /* quit */ + break; + } + } + count = n; + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + vm->clear_range(vm, addr, BIT_ULL(size)); + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + + kfree(order); + } + + return 0; +} + static void close_object_list(struct list_head *objects, struct i915_address_space *vm) { @@ -644,6 +730,11 @@ static int igt_ppgtt_drunk(void *arg) return exercise_ppgtt(arg, drunk_hole); } +static int igt_ppgtt_lowlevel(void *arg) +{ + return exercise_ppgtt(arg, lowlevel_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -708,13 +799,20 @@ static int igt_ggtt_drunk(void *arg) return exercise_ggtt(arg, drunk_hole); } +static int igt_ggtt_lowlevel(void *arg) +{ + return exercise_ggtt(arg, lowlevel_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_lowlevel), SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), From aae4a3d811753d3f79c43c932b8f7d1bcbf42fb0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:44 +0000 Subject: [PATCH 0188/1299] drm/i915: Use fault-injection to force the shrinker to run in live GTT tests It is possible whilst allocating the page-directory tree for a ppgtt bind that the shrinker may run and reap unused parts of the tree. If the shrinker happens to remove a chunk of the tree that the allocate_va_range has already processed, we may then try to insert into the dangling tree. This test uses the fault-injection framework to force the shrinker to be invoked before we allocate new pages, i.e. new chunks of the PD tree. References: https://bugs.freedesktop.org/show_bug.cgi?id=99295 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/Kconfig.debug | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++ drivers/gpu/drm/i915/i915_selftest.h | 2 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 98 +++++++++++++++++++ 5 files changed, 109 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index a4d8cfd77c3c..68ff072f8b76 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -65,6 +65,7 @@ config DRM_I915_SELFTEST bool "Enable selftests upon driver load" depends on DRM_I915 default n + select FAULT_INJECTION select PRIME_NUMBERS help Choose this option to allow the driver to perform selftests upon diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f7ff2df0c0c5..80b653fb3722 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2485,6 +2485,8 @@ struct drm_i915_private { /* Used to save the pipe-to-encoder mapping for audio */ struct intel_encoder *av_enc_map[I915_MAX_PIPES]; + I915_SELFTEST_DECLARE(struct fault_attr vm_fault); + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9daea3cd9cdc..f8ac69380a0f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -23,6 +23,9 @@ * */ +#include /* fault-inject.h is not standalone! */ + +#include #include #include #include @@ -345,6 +348,9 @@ static int __setup_page_dma(struct drm_i915_private *dev_priv, { struct device *kdev = &dev_priv->drm.pdev->dev; + if (I915_SELFTEST_ONLY(should_fail(&dev_priv->vm_fault, 1))) + i915_gem_shrink_all(dev_priv); + p->page = alloc_page(flags); if (!p->page) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index e43777b72211..9d7d86f1733d 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -36,6 +36,8 @@ struct i915_selftest { }; #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include + extern struct i915_selftest i915_selftest; int i915_mock_selftests(void); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index d0ec74671ef8..2e571bbe98ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -271,11 +271,14 @@ static void close_object_list(struct list_head *objects, struct i915_address_space *vm) { struct drm_i915_gem_object *obj, *on; + int ignored; list_for_each_entry_safe(obj, on, objects, st_link) { struct i915_vma *vma; vma = i915_vma_instance(obj, vm, NULL); + if (!IS_ERR(vma)) + ignored = i915_vma_unbind(vma); /* Only ppgtt vma may be closed before the object is freed */ if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) i915_vma_close(vma); @@ -677,6 +680,95 @@ static int drunk_hole(struct drm_i915_private *i915, return 0; } +static int __shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + unsigned int order = 12; + LIST_HEAD(objects); + int err = 0; + u64 addr; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (addr = hole_start; addr < hole_end; ) { + struct i915_vma *vma; + u64 size = BIT_ULL(order++); + + size = min(size, hole_end - addr); + obj = fake_dma_object(i915, size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + GEM_BUG_ON(vma->size != size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, addr, size, hole_start, hole_end, err); + break; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + break; + } + + i915_vma_unpin(vma); + addr += size; + + if (igt_timeout(end_time, + "%s timed out at ofset %llx [%llx - %llx]\n", + __func__, addr, hole_start, hole_end)) { + err = -EINTR; + break; + } + } + + close_object_list(&objects, vm); + return err; +} + +static int shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + unsigned long prime; + int err; + + i915->vm_fault.probability = 999; + atomic_set(&i915->vm_fault.times, -1); + + for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { + i915->vm_fault.interval = prime; + err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); + if (err) + break; + } + + memset(&i915->vm_fault, 0, sizeof(i915->vm_fault)); + + return err; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct drm_i915_private *i915, struct i915_address_space *vm, @@ -735,6 +827,11 @@ static int igt_ppgtt_lowlevel(void *arg) return exercise_ppgtt(arg, lowlevel_hole); } +static int igt_ppgtt_shrink(void *arg) +{ + return exercise_ppgtt(arg, shrink_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -812,6 +909,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ppgtt_shrink), SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), From e3c7a1c5377e54f99689a18cb85ae7f3b633b58a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:45 +0000 Subject: [PATCH 0189/1299] drm/i915: Test creation of VMA Simple test to exercise creation and lookup of VMA within an object. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-34-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 3 + .../drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/i915_vma.c | 224 ++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_vma.c diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e75494c1ef52..e6c565fd6e61 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -690,3 +690,6 @@ int i915_vma_unbind(struct i915_vma *vma) return 0; } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_vma.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 955a4d6ccdaf..b450eab7e6e1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,3 +15,4 @@ selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) +selftest(vma, i915_vma_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c new file mode 100644 index 000000000000..eb794fcc90ca --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -0,0 +1,224 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_context.h" + +static bool assert_vma(struct i915_vma *vma, + struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx) +{ + bool ok = true; + + if (vma->vm != &ctx->ppgtt->base) { + pr_err("VMA created with wrong VM\n"); + ok = false; + } + + if (vma->size != obj->base.size) { + pr_err("VMA created with wrong size, found %llu, expected %zu\n", + vma->size, obj->base.size); + ok = false; + } + + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("VMA created with wrong type [%d]\n", + vma->ggtt_view.type); + ok = false; + } + + return ok; +} + +static struct i915_vma * +checked_vma_instance(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + bool ok = true; + + vma = i915_vma_instance(obj, vm, view); + if (IS_ERR(vma)) + return vma; + + /* Manual checks, will be reinforced by i915_vma_compare! */ + if (vma->vm != vm) { + pr_err("VMA's vm [%p] does not match request [%p]\n", + vma->vm, vm); + ok = false; + } + + if (i915_is_ggtt(vm) != i915_vma_is_ggtt(vma)) { + pr_err("VMA ggtt status [%d] does not match parent [%d]\n", + i915_vma_is_ggtt(vma), i915_is_ggtt(vm)); + ok = false; + } + + if (i915_vma_compare(vma, vm, view)) { + pr_err("i915_vma_compare failed with create parmaters!\n"); + return ERR_PTR(-EINVAL); + } + + if (i915_vma_compare(vma, vma->vm, + i915_vma_is_ggtt(vma) ? &vma->ggtt_view : NULL)) { + pr_err("i915_vma_compare failed with itself\n"); + return ERR_PTR(-EINVAL); + } + + if (!ok) { + pr_err("i915_vma_compare failed to detect the difference!\n"); + return ERR_PTR(-EINVAL); + } + + return vma; +} + +static int create_vmas(struct drm_i915_private *i915, + struct list_head *objects, + struct list_head *contexts) +{ + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + int pinned; + + list_for_each_entry(obj, objects, st_link) { + for (pinned = 0; pinned <= 1; pinned++) { + list_for_each_entry(ctx, contexts, link) { + struct i915_address_space *vm = + &ctx->ppgtt->base; + struct i915_vma *vma; + int err; + + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (!assert_vma(vma, obj, ctx)) { + pr_err("VMA lookup/create failed\n"); + return -EINVAL; + } + + if (!pinned) { + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + pr_err("Failed to pin VMA\n"); + return err; + } + } else { + i915_vma_unpin(vma); + } + } + } + } + + return 0; +} + +static int igt_vma_create(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_context *ctx, *cn; + unsigned long num_obj, num_ctx; + unsigned long no, nc; + IGT_TIMEOUT(end_time); + LIST_HEAD(contexts); + LIST_HEAD(objects); + int err; + + /* Exercise creating many vma amonst many objections, checking the + * vma creation and lookup routines. + */ + + no = 0; + for_each_prime_number(num_obj, ULONG_MAX - 1) { + for (; no < num_obj; no++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + list_add(&obj->st_link, &objects); + } + + nc = 0; + for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for (; nc < num_ctx; nc++) { + ctx = mock_context(i915, "mock"); + if (!ctx) + goto out; + + list_move(&ctx->link, &contexts); + } + + err = create_vmas(i915, &objects, &contexts); + if (err) + goto out; + + if (igt_timeout(end_time, + "%s timed out: after %lu objects in %lu contexts\n", + __func__, no, nc)) + goto end; + } + + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + } + +end: + /* Final pass to lookup all created contexts */ + err = create_vmas(i915, &objects, &contexts); +out: + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + + list_for_each_entry_safe(obj, on, &objects, st_link) + i915_gem_object_put(obj); + return err; +} + +int i915_vma_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_vma_create), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + From 782a3e9ef2051f2bec7c4fb255aa6a8f7c530da8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:46 +0000 Subject: [PATCH 0190/1299] drm/i915: Exercise i915_vma_pin/i915_vma_insert High-level testing of the struct drm_mm by verifying our handling of weird requests to i915_vma_pin. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-35-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 4 +- drivers/gpu/drm/i915/i915_vma.h | 4 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 151 ++++++++++++++++++++++ 3 files changed, 155 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e6c565fd6e61..0dc994b76924 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -327,8 +327,8 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma) __i915_gem_object_release_unless_active(obj); } -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { if (!drm_mm_node_allocated(&vma->node)) return false; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e39d922cfb6f..2e03f81dddbe 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -228,8 +228,8 @@ i915_vma_compare(struct i915_vma *vma, int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index eb794fcc90ca..111158f7daeb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -202,10 +202,161 @@ static int igt_vma_create(void *arg) return err; } +struct pin_mode { + u64 size; + u64 flags; + bool (*assert)(const struct i915_vma *, + const struct pin_mode *mode, + int result); + const char *string; +}; + +static bool assert_pin_valid(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + if (result) + return false; + + if (i915_vma_misplaced(vma, mode->size, 0, mode->flags)) + return false; + + return true; +} + +__maybe_unused +static bool assert_pin_e2big(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -E2BIG; +} + +__maybe_unused +static bool assert_pin_enospc(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -ENOSPC; +} + +__maybe_unused +static bool assert_pin_einval(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -EINVAL; +} + +static int igt_vma_pin1(void *arg) +{ + struct drm_i915_private *i915 = arg; + const struct pin_mode modes[] = { +#define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } +#define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } +#define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL) +#define TOOBIG(sz, fl) __INVALID(sz, fl, assert_pin_e2big, E2BIG) +#define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC) + VALID(0, PIN_GLOBAL), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE), + + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 4096), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), + + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.base.total), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), + + VALID(4096, PIN_GLOBAL), + VALID(8192, PIN_GLOBAL), + VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), + TOOBIG(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), + VALID(i915->ggtt.base.total, PIN_GLOBAL), + TOOBIG(i915->ggtt.base.total + 4096, PIN_GLOBAL), + TOOBIG(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), + INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), + + VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + +#if !IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + /* Misusing BIAS is a programming error (it is not controllable + * from userspace) so when debugging is enabled, it explodes. + * However, the tests are still quite interesting for checking + * variable start, end and size. + */ + NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.base.total), + NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), +#endif + { }, +#undef NOSPACE +#undef TOOBIG +#undef INVALID +#undef __INVALID +#undef VALID + }, *m; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err = -EINVAL; + + /* Exercise all the weird and wonderful i915_vma_pin requests, + * focusing on error handling of boundary conditions. + */ + + GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.base.mm)); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = checked_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + goto out; + + for (m = modes; m->assert; m++) { + err = i915_vma_pin(vma, m->size, 0, m->flags); + if (!m->assert(vma, m, err)) { + pr_err("%s to pin single page into GGTT with mode[%d:%s]: size=%llx flags=%llx, err=%d\n", + m->assert == assert_pin_valid ? "Failed" : "Unexpectedly succeeded", + (int)(m - modes), m->string, m->size, m->flags, + err); + if (!err) + i915_vma_unpin(vma); + err = -EINVAL; + goto out; + } + + if (!err) { + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + if (err) { + pr_err("Failed to unbind single page from GGTT, err=%d\n", err); + goto out; + } + } + } + + err = 0; +out: + i915_gem_object_put(obj); + return err; +} + int i915_vma_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_vma_create), + SUBTEST(igt_vma_pin1), }; struct drm_i915_private *i915; int err; From a231bf643dcc5a221afbc52093e73f23d50d96ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:47 +0000 Subject: [PATCH 0191/1299] drm/i915: Verify page layout for rotated VMA Exercise creating rotated VMA and checking the page order within. v2..v3: Be more creative in rotated params Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-36-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_vma.c | 179 ++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 111158f7daeb..c8869fc217fd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -352,11 +352,190 @@ static int igt_vma_pin1(void *arg) return err; } +static unsigned long rotated_index(const struct intel_rotation_info *r, + unsigned int n, + unsigned int x, + unsigned int y) +{ + return (r->plane[n].stride * (r->plane[n].height - y - 1) + + r->plane[n].offset + x); +} + +static struct scatterlist * +assert_rotated(struct drm_i915_gem_object *obj, + const struct intel_rotation_info *r, unsigned int n, + struct scatterlist *sg) +{ + unsigned int x, y; + + for (x = 0; x < r->plane[n].width; x++) { + for (y = 0; y < r->plane[n].height; y++) { + unsigned long src_idx; + dma_addr_t src; + + if (!sg) { + pr_err("Invalid sg table: too short at plane %d, (%d, %d)!\n", + n, x, y); + return ERR_PTR(-EINVAL); + } + + src_idx = rotated_index(r, n, x, y); + src = i915_gem_object_get_dma_address(obj, src_idx); + + if (sg_dma_len(sg) != PAGE_SIZE) { + pr_err("Invalid sg.length, found %d, expected %lu for rotated page (%d, %d) [src index %lu]\n", + sg_dma_len(sg), PAGE_SIZE, + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + if (sg_dma_address(sg) != src) { + pr_err("Invalid address for rotated page (%d, %d) [src index %lu]\n", + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + sg = sg_next(sg); + } + } + + return sg; +} + +static unsigned int rotated_size(const struct intel_rotation_plane_info *a, + const struct intel_rotation_plane_info *b) +{ + return a->width * a->height + b->width * b->height; +} + +static int igt_vma_rotate(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const struct intel_rotation_plane_info planes[] = { + { .width = 1, .height = 1, .stride = 1 }, + { .width = 2, .height = 2, .stride = 2 }, + { .width = 4, .height = 4, .stride = 4 }, + { .width = 8, .height = 8, .stride = 8 }, + + { .width = 3, .height = 5, .stride = 3 }, + { .width = 3, .height = 5, .stride = 4 }, + { .width = 3, .height = 5, .stride = 5 }, + + { .width = 5, .height = 3, .stride = 5 }, + { .width = 5, .height = 3, .stride = 7 }, + { .width = 5, .height = 3, .stride = 9 }, + + { .width = 4, .height = 6, .stride = 6 }, + { .width = 6, .height = 4, .stride = 6 }, + { } + }, *a, *b; + const unsigned int max_pages = 64; + int err = -ENOMEM; + + /* Create VMA for many different combinations of planes and check + * that the page layout within the rotated VMA match our expectations. + */ + + obj = i915_gem_object_create_internal(i915, max_pages * PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (a = planes; a->width; a++) { + for (b = planes + ARRAY_SIZE(planes); b-- != planes; ) { + struct i915_ggtt_view view; + unsigned int n, max_offset; + + max_offset = max(a->stride * a->height, + b->stride * b->height); + GEM_BUG_ON(max_offset > max_pages); + max_offset = max_pages - max_offset; + + view.type = I915_GGTT_VIEW_ROTATED; + view.rotated.plane[0] = *a; + view.rotated.plane[1] = *b; + + for_each_prime_number_from(view.rotated.plane[0].offset, 0, max_offset) { + for_each_prime_number_from(view.rotated.plane[1].offset, 0, max_offset) { + struct scatterlist *sg; + struct i915_vma *vma; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + pr_err("Failed to pin VMA, err=%d\n", err); + goto out_object; + } + + if (vma->size != rotated_size(a, b) * PAGE_SIZE) { + pr_err("VMA is wrong size, expected %lu, found %llu\n", + PAGE_SIZE * rotated_size(a, b), vma->size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages->nents != rotated_size(a, b)) { + pr_err("sg table is wrong sizeo, expected %u, found %u nents\n", + rotated_size(a, b), vma->pages->nents); + err = -EINVAL; + goto out_object; + } + + if (vma->node.size < vma->size) { + pr_err("VMA binding too small, expected %llu, found %llu\n", + vma->size, vma->node.size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages == obj->mm.pages) { + pr_err("VMA using unrotated object pages!\n"); + err = -EINVAL; + goto out_object; + } + + sg = vma->pages->sgl; + for (n = 0; n < ARRAY_SIZE(view.rotated.plane); n++) { + sg = assert_rotated(obj, &view.rotated, n, sg); + if (IS_ERR(sg)) { + pr_err("Inconsistent VMA pages for plane %d: [(%d, %d, %d, %d), (%d, %d, %d, %d)]\n", n, + view.rotated.plane[0].width, + view.rotated.plane[0].height, + view.rotated.plane[0].stride, + view.rotated.plane[0].offset, + view.rotated.plane[1].width, + view.rotated.plane[1].height, + view.rotated.plane[1].stride, + view.rotated.plane[1].offset); + err = -EINVAL; + goto out_object; + } + } + + i915_vma_unpin(vma); + } + } + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + int i915_vma_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_vma_create), SUBTEST(igt_vma_pin1), + SUBTEST(igt_vma_rotate), }; struct drm_i915_private *i915; int err; From af1f83a152b5cf6a9a780b7204805cbe0ff905da Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:48 +0000 Subject: [PATCH 0192/1299] drm/i915: Test creation of partial VMA Mock testing to ensure we can create and lookup partial VMA. v2: Named phases Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-37-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_vma.c | 192 ++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index c8869fc217fd..ad56566e24db 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -530,12 +530,204 @@ static int igt_vma_rotate(void *arg) return err; } +static bool assert_partial(struct drm_i915_gem_object *obj, + struct i915_vma *vma, + unsigned long offset, + unsigned long size) +{ + struct sgt_iter sgt; + dma_addr_t dma; + + for_each_sgt_dma(dma, sgt, vma->pages) { + dma_addr_t src; + + if (!size) { + pr_err("Partial scattergather list too long\n"); + return false; + } + + src = i915_gem_object_get_dma_address(obj, offset); + if (src != dma) { + pr_err("DMA mismatch for partial page offset %lu\n", + offset); + return false; + } + + offset++; + size--; + } + + return true; +} + +static bool assert_pin(struct i915_vma *vma, + struct i915_ggtt_view *view, + u64 size, + const char *name) +{ + bool ok = true; + + if (vma->size != size) { + pr_err("(%s) VMA is wrong size, expected %llu, found %llu\n", + name, size, vma->size); + ok = false; + } + + if (vma->node.size < vma->size) { + pr_err("(%s) VMA binding too small, expected %llu, found %llu\n", + name, vma->size, vma->node.size); + ok = false; + } + + if (view && view->type != I915_GGTT_VIEW_NORMAL) { + if (memcmp(&vma->ggtt_view, view, sizeof(*view))) { + pr_err("(%s) VMA mismatch upon creation!\n", + name); + ok = false; + } + + if (vma->pages == vma->obj->mm.pages) { + pr_err("(%s) VMA using original object pages!\n", + name); + ok = false; + } + } else { + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("Not the normal ggtt view! Found %d\n", + vma->ggtt_view.type); + ok = false; + } + + if (vma->pages != vma->obj->mm.pages) { + pr_err("VMA not using object pages!\n"); + ok = false; + } + } + + return ok; +} + +static int igt_vma_partial(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + const unsigned int npages = 1021; /* prime! */ + struct drm_i915_gem_object *obj; + const struct phase { + const char *name; + } phases[] = { + { "create" }, + { "lookup" }, + { }, + }, *p; + unsigned int sz, offset; + struct i915_vma *vma; + int err = -ENOMEM; + + /* Create lots of different VMA for the object and check that + * we are returned the same VMA when we later request the same range. + */ + + obj = i915_gem_object_create_internal(i915, npages*PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (p = phases; p->name; p++) { /* exercise both create/lookup */ + unsigned int count, nvma; + + nvma = 0; + for_each_prime_number_from(sz, 1, npages) { + for_each_prime_number_from(offset, 0, npages - sz) { + struct i915_ggtt_view view; + + view.type = I915_GGTT_VIEW_PARTIAL; + view.partial.offset = offset; + view.partial.size = sz; + + if (sz == npages) + view.type = I915_GGTT_VIEW_NORMAL; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, &view, sz*PAGE_SIZE, p->name)) { + pr_err("(%s) Inconsistent partial pinning for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + if (!assert_partial(obj, vma, offset, sz)) { + pr_err("(%s) Inconsistent partial pages for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + nvma++; + } + } + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", + p->name, count, nvma); + err = -EINVAL; + goto out_object; + } + + /* Check that we did create the whole object mapping */ + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, NULL, obj->base.size, p->name)) { + pr_err("(%s) inconsistent full pin\n", p->name); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) allocated an extra full vma!\n", p->name); + err = -EINVAL; + goto out_object; + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + int i915_vma_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_vma_create), SUBTEST(igt_vma_pin1), SUBTEST(igt_vma_rotate), + SUBTEST(igt_vma_partial), }; struct drm_i915_private *i915; int err; From 791ff39ae32a34816cdca40891f142f4122ad409 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:49 +0000 Subject: [PATCH 0193/1299] drm/i915: Live testing for context execution Check we can create and execution within a context. v2: Write one set of dwords through each context/engine to exercise more contexts within the same time period. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-38-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 1 + .../gpu/drm/i915/selftests/i915_gem_context.c | 404 ++++++++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 406 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_context.c diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d27c4050b4c5..c73bf02870d5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1202,4 +1202,5 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_context.c" +#include "selftests/i915_gem_context.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c new file mode 100644 index 000000000000..4cfdc2f0eda7 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -0,0 +1,404 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "huge_gem_object.h" + +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + GEM_BUG_ON(!igt_can_mi_store_dword_imm(vma->vm->i915)); + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static unsigned long real_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; +} + +static unsigned long fake_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); + if (err) + return err; + + /* Within the GTT the huge objects maps every page onto + * its 1024 real pages (using phys_pfn = dma_pfn % 1024). + * We set the nth dword within the page using the nth + * mapping via the GTT - this should exercise the GTT mapping + * whilst checking that each context provides a unique view + * into the object. + */ + batch = gpu_fill_dw(vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_move_to_active(vma, rq, 0); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; + +err_request: + __i915_add_request(rq, false); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, m, need_flush; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + for (m = 0; m < DW_PER_PAGE; m++) + map[m] = value; + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_obj_finish_shmem_access(obj); + obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->base.write_domain = 0; + return 0; +} + +static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + if (map[m] != m) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + if (map[m] != 0xdeadbeef) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], 0xdeadbeef); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_obj_finish_shmem_access(obj); + return err; +} + +static struct drm_i915_gem_object * +create_test_object(struct i915_gem_context *ctx, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + u64 size; + u32 handle; + int err; + + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); + size = round_down(size, DW_PER_PAGE * PAGE_SIZE); + + obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + if (IS_ERR(obj)) + return obj; + + /* tie the handle to the drm_file for easy reaping */ + err = drm_gem_handle_create(file, &obj->base, &handle); + i915_gem_object_put(obj); + if (err) + return ERR_PTR(err); + + err = cpu_fill(obj, 0xdeadbeef); + if (err) { + pr_err("Failed to fill object with cpu, err=%d\n", + err); + return ERR_PTR(err); + } + + list_add_tail(&obj->st_link, objects); + return obj; +} + +static unsigned long max_dwords(struct drm_i915_gem_object *obj) +{ + unsigned long npages = fake_page_count(obj); + + GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); + return npages / DW_PER_PAGE; +} + +static int igt_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_file *file = mock_file(i915); + struct drm_i915_gem_object *obj; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + unsigned long ncontexts, ndwords, dw; + int err; + + /* Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + mutex_lock(&i915->drm.struct_mutex); + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + unsigned int id; + + ctx = i915_gem_create_context(i915, file->driver_priv); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + if (dw == 0) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) + dw = 0; + ndwords++; + } + ncontexts++; + } + pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", + ncontexts, INTEL_INFO(i915)->num_rings, ndwords); + + dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +int i915_gem_context_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ctx_exec), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index d6c869c5b54f..eedd7901ce61 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,3 +15,4 @@ selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) +selftest(contexts, i915_gem_context_live_selftests) From 6cde9a02e001532744a56052b2aecbda8c51bf16 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:50 +0000 Subject: [PATCH 0194/1299] drm/i915: Extract aliasing ppgtt setup In order to force testing of the aliasing ppgtt, extract its initialisation function. v2: Also extract the cleanup function for symmetry. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-39-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 90 ++++++++++++++++++----------- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 + 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f8ac69380a0f..eebbffdb9a0b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2738,6 +2738,59 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, *end -= I915_GTT_PAGE_SIZE; } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + err = __hw_ppgtt_init(ppgtt, i915); + if (err) + goto err_ppgtt; + + if (ppgtt->base.allocate_va_range) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, + 0, ppgtt->base.total); + if (err) + goto err_ppgtt_cleanup; + } + + ppgtt->base.clear_range(&ppgtt->base, + ppgtt->base.start, + ppgtt->base.total); + + i915->mm.aliasing_ppgtt = ppgtt; + WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); + ggtt->base.bind_vma = aliasing_gtt_bind_vma; + + return 0; + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + kfree(ppgtt); + return err; +} + +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); + if (!ppgtt) + return; + + ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); + + ggtt->base.bind_vma = ggtt_bind_vma; +} + int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. @@ -2751,7 +2804,6 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) */ struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long hole_start, hole_end; - struct i915_hw_ppgtt *ppgtt; struct drm_mm_node *entry; int ret; @@ -2780,38 +2832,13 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) ggtt->base.total - PAGE_SIZE, PAGE_SIZE); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) { - ret = -ENOMEM; - goto err; - } - - ret = __hw_ppgtt_init(ppgtt, dev_priv); + ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err_ppgtt; - - if (ppgtt->base.allocate_va_range) { - ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, - ppgtt->base.total); - if (ret) - goto err_ppgtt_cleanup; - } - - ppgtt->base.clear_range(&ppgtt->base, - ppgtt->base.start, - ppgtt->base.total); - - dev_priv->mm.aliasing_ppgtt = ppgtt; - WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); - ggtt->base.bind_vma = aliasing_gtt_bind_vma; + goto err; } return 0; -err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); -err_ppgtt: - kfree(ppgtt); err: drm_mm_remove_node(&ggtt->error_capture); return ret; @@ -2834,12 +2861,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) WARN_ON(i915_vma_unbind(vma)); mutex_unlock(&dev_priv->drm.struct_mutex); - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); - } - + i915_gem_fini_aliasing_ppgtt(dev_priv); i915_gem_cleanup_stolen(&dev_priv->drm); if (drm_mm_node_allocated(&ggtt->error_capture)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 7e678ce5a9c7..fe922059a412 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -531,6 +531,9 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); From 92fdf8d4a39a9545c27b7ec2d5b138ff8cde5f91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:51 +0000 Subject: [PATCH 0195/1299] drm/i915: Force an aliasing_ppgtt test for context execution Ensure that we minimally exercise the aliasing_ppgtt, even on a full-ppgtt, by allocating one and similarly creating a context to use it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-40-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_gem_context.c | 61 ++++++++++++++++++- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 4cfdc2f0eda7..3813a19a6179 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -325,6 +325,7 @@ static int igt_ctx_exec(void *arg) IGT_TIMEOUT(end_time); LIST_HEAD(objects); unsigned long ncontexts, ndwords, dw; + bool first_shared_gtt = true; int err; /* Create a few different contexts (with different mm) and write @@ -342,7 +343,12 @@ static int igt_ctx_exec(void *arg) struct i915_gem_context *ctx; unsigned int id; - ctx = i915_gem_create_context(i915, file->driver_priv); + if (first_shared_gtt) { + ctx = __create_hw_context(i915, file->driver_priv); + first_shared_gtt = false; + } else { + ctx = i915_gem_create_context(i915, file->driver_priv); + } if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_unlock; @@ -394,11 +400,60 @@ static int igt_ctx_exec(void *arg) return err; } -int i915_gem_context_live_selftests(struct drm_i915_private *i915) +static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + int err; + + err = i915_gem_init_aliasing_ppgtt(i915); + if (err) + return err; + + list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + continue; + + vma->flags &= ~I915_VMA_LOCAL_BIND; + } + + return 0; +} + +static void fake_aliasing_ppgtt_disable(struct drm_i915_private *i915) +{ + i915_gem_fini_aliasing_ppgtt(i915); +} + +int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) { static const struct i915_subtest tests[] = { SUBTEST(igt_ctx_exec), }; + bool fake_alias = false; + int err; - return i915_subtests(tests, i915); + /* Install a fake aliasing gtt for exercise */ + if (USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) { + mutex_lock(&dev_priv->drm.struct_mutex); + err = fake_aliasing_ppgtt_enable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + return err; + + GEM_BUG_ON(!dev_priv->mm.aliasing_ppgtt); + fake_alias = true; + } + + err = i915_subtests(tests, dev_priv); + + if (fake_alias) { + mutex_lock(&dev_priv->drm.struct_mutex); + fake_aliasing_ppgtt_disable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } + + return err; } From f40a7b7558ef95bbd869b3c8268c0b2a239087c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:52 +0000 Subject: [PATCH 0196/1299] drm/i915: Initial selftests for exercising eviction Very simple tests to just ask eviction to find some free space in a full GTT and one with some available space. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-41-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_evict.c | 4 + .../gpu/drm/i915/selftests/i915_gem_evict.c | 260 ++++++++++++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + 3 files changed, 265 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_evict.c diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 776c508cb9ae..ca25b1f7f6d1 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -393,3 +393,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_evict.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c new file mode 100644 index 000000000000..97af353db218 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -0,0 +1,260 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int populate_ggtt(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + u64 size; + + for (size = 0; + size + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + size += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + } + + if (!list_empty(&i915->mm.unbound_list)) { + size = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + size++; + + pr_err("Found %lld objects unbound!\n", size); + return -EINVAL; + } + + if (list_empty(&i915->ggtt.base.inactive_list)) { + pr_err("No objects on the GGTT inactive list!\n"); + return -EINVAL; + } + + return 0; +} + +static void unpin_ggtt(struct drm_i915_private *i915) +{ + struct i915_vma *vma; + + list_for_each_entry(vma, &i915->ggtt.base.inactive_list, vm_link) + i915_vma_unpin(vma); +} + +static void cleanup_objects(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + i915_gem_object_put(obj); + + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + i915_gem_object_put(obj); + + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); +} + +static int igt_evict_something(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict one. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict something */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_overcommit(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + /* Fill the GGTT with pinned objects and then try to pin one more. + * We expect it to fail. + */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } + + list_move(&obj->global_link, &i915->mm.unbound_list); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { + pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); + err = -EINVAL; + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_for_vma(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node target = { + .start = 0, + .size = 4096, + }; + int err; + + /* Fill the GGTT with pinned objects and try to evict a range. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict the node */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err) { + pr_err("i915_gem_evict_for_node returned err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_vm(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict everything. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +int i915_gem_evict_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_something), + SUBTEST(igt_evict_for_vma), + SUBTEST(igt_evict_vm), + SUBTEST(igt_overcommit), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index b450eab7e6e1..cfbd3f5486ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -16,3 +16,4 @@ selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) +selftest(evict, i915_gem_evict_mock_selftests) From e619cd0d223d55b69732d0d61d1c497e506670ac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:53 +0000 Subject: [PATCH 0197/1299] drm/i915: Add mock exercise for i915_gem_gtt_reserve i915_gem_gtt_reserve should put the node exactly as requested in the GTT, evicting as required. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-42-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 195 ++++++++++++++++++ .../drm/i915/selftests/i915_mock_selftests.h | 1 + 2 files changed, 196 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 2e571bbe98ae..4f87e437bf1f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -29,6 +29,7 @@ #include "i915_random.h" #include "mock_drm.h" +#include "mock_gem_device.h" static void fake_free_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) @@ -901,6 +902,200 @@ static int igt_ggtt_lowlevel(void *arg) return exercise_ggtt(arg, lowlevel_hole); } +static void track_vma_bind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + obj->bind_count++; /* track for eviction later */ + __i915_gem_object_pin_pages(obj); + + vma->pages = obj->mm.pages; + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +} + +static int igt_gtt_reserve(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_reserve() tries to reserve the precise range + * for the node, and evicts if it has to. So our test checks that + * it can give us the requsted space and prevent overlaps. + */ + + /* Start by filling the GGTT */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* Now we start forcing evictions */ + for (total = I915_GTT_PAGE_SIZE; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* And then try at random */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + offset = random_offset(0, i915->ggtt.base.total, + 2*I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + offset, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + offset, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +int i915_gem_gtt_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gtt_reserve), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index cfbd3f5486ae..be9a9ebf5692 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -17,3 +17,4 @@ selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) +selftest(gtt, i915_gem_gtt_mock_selftests) From 5f32616edb8d717f5ef99ab9391388a6185afc58 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:54 +0000 Subject: [PATCH 0198/1299] drm/i915: Add mock exercise for i915_gem_gtt_insert i915_gem_gtt_insert should allocate from the available free space in the GTT, evicting as necessary to create space. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-43-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 208 ++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 4f87e437bf1f..349b0de1c47e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1076,10 +1076,218 @@ static int igt_gtt_reserve(void *arg) return err; } +static int igt_gtt_insert(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct drm_mm_node tmp = {}; + const struct invalid_insert { + u64 size; + u64 alignment; + u64 start, end; + } invalid_insert[] = { + { + i915->ggtt.base.total + I915_GTT_PAGE_SIZE, 0, + 0, i915->ggtt.base.total, + }, + { + 2*I915_GTT_PAGE_SIZE, 0, + 0, I915_GTT_PAGE_SIZE, + }, + { + -(u64)I915_GTT_PAGE_SIZE, 0, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + -(u64)2*I915_GTT_PAGE_SIZE, 2*I915_GTT_PAGE_SIZE, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT << 1, + I915_GTT_MIN_ALIGNMENT, I915_GTT_MIN_ALIGNMENT << 1, + }, + {} + }, *ii; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_insert() tries to allocate some free space in the GTT + * to the node, evicting if required. + */ + + /* Check a couple of obviously invalid requests */ + for (ii = invalid_insert; ii->size; ii++) { + err = i915_gem_gtt_insert(&i915->ggtt.base, &tmp, + ii->size, ii->alignment, + I915_COLOR_UNEVICTABLE, + ii->start, ii->end, + 0); + if (err != -ENOSPC) { + pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", + ii->size, ii->alignment, ii->start, ii->end, + err); + return -EINVAL; + } + } + + /* Start by filling the GGTT */ + for (total = 0; + total + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err == -ENOSPC) { + /* maxed out the GGTT space */ + i915_gem_object_put(obj); + break; + } + if (err) { + pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + __i915_vma_pin(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + + list_for_each_entry(obj, &objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + if (!drm_mm_node_allocated(&vma->node)) { + pr_err("VMA was unexpectedly evicted!\n"); + err = -EINVAL; + goto out; + } + + __i915_vma_unpin(vma); + } + + /* If we then reinsert, we should find the same hole */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + offset = vma->node.start; + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset) { + pr_err("i915_gem_gtt_insert did not return node to its previous location (the only hole), expected address %llx, found %llx\n", + offset, vma->node.start); + err = -EINVAL; + goto out; + } + } + + /* And then force evictions */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + int i915_gem_gtt_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_gtt_reserve), + SUBTEST(igt_gtt_insert), }; struct drm_i915_private *i915; int err; From 210e8ac48f26c965d07db16cff7a5b3d02f8e337 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:55 +0000 Subject: [PATCH 0199/1299] drm/i915: Add mock tests for GTT/VMA handling Use the live tests against the mock ppgtt for quick testing on all platforms of the VMA layer. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-44-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 349b0de1c47e..5caa5153a394 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -28,6 +28,7 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" @@ -913,6 +914,45 @@ static void track_vma_bind(struct i915_vma *vma) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); } +static int exercise_mock(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + ctx = mock_context(i915, "mock"); + if (!ctx) + return -ENOMEM; + + ppgtt = ctx->ppgtt; + GEM_BUG_ON(!ppgtt); + + err = func(i915, &ppgtt->base, 0, ppgtt->base.total, end_time); + + mock_context_close(ctx); + return err; +} + +static int igt_mock_fill(void *arg) +{ + return exercise_mock(arg, fill_hole); +} + +static int igt_mock_walk(void *arg) +{ + return exercise_mock(arg, walk_hole); +} + +static int igt_mock_drunk(void *arg) +{ + return exercise_mock(arg, drunk_hole); +} + static int igt_gtt_reserve(void *arg) { struct drm_i915_private *i915 = arg; @@ -1286,6 +1326,9 @@ static int igt_gtt_insert(void *arg) int i915_gem_gtt_mock_selftests(void) { static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_drunk), + SUBTEST(igt_mock_walk), + SUBTEST(igt_mock_fill), SUBTEST(igt_gtt_reserve), SUBTEST(igt_gtt_insert), }; From af85f50d1890e4ff29e083ad9ba2f93dbc684276 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:56 +0000 Subject: [PATCH 0200/1299] drm/i915: Exercise manipulate of single pages in the GGTT Move a single page of an object around within the GGTT and check coherency of writes and reads. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-45-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5caa5153a394..36e5cb582af4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -903,6 +903,96 @@ static int igt_ggtt_lowlevel(void *arg) return exercise_ggtt(arg, lowlevel_hole); } +static int igt_ggtt_page(void *arg) +{ + const unsigned int count = PAGE_SIZE/sizeof(u32); + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_i915_gem_object *obj; + struct drm_mm_node tmp; + unsigned int *order, n; + int err; + + mutex_lock(&i915->drm.struct_mutex); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_free; + + memset(&tmp, 0, sizeof(tmp)); + err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, + 1024 * PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (err) + goto out_unpin; + + order = i915_random_order(count, &prng); + if (!order) { + err = -ENOMEM; + goto out_remove; + } + + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + iowrite32(n, vaddr + n); + io_mapping_unmap_atomic(vaddr); + + wmb(); + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + u32 val; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + val = ioread32(vaddr + n); + io_mapping_unmap_atomic(vaddr); + + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + + if (val != n) { + pr_err("insert page failed: found %d, expected %d\n", + val, n); + err = -EINVAL; + break; + } + } + + kfree(order); +out_remove: + drm_mm_remove_node(&tmp); +out_unpin: + i915_gem_object_unpin_pages(obj); +out_free: + i915_gem_object_put(obj); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + static void track_vma_bind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -1360,6 +1450,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), + SUBTEST(igt_ggtt_page), }; GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); From 7db4dceafa443f0b6c7df90f01b16ec8b9f8b25d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:57 +0000 Subject: [PATCH 0201/1299] drm/i915: Exercise crossing pot boundaries in the GTT As the page-table trees within the GTT are naturally aligned to power-of-two boundaries, by inserting an object that crosses a power-of-two (and the power-of-two intervals) we can quickly check the code for errors in switching between levels in the tree. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-46-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 94 +++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 36e5cb582af4..5af9339087b7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -582,6 +582,82 @@ static int walk_hole(struct drm_i915_private *i915, return 0; } +static int pot_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned long flags; + unsigned int pot; + int err; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + /* Insert a pair of pages across every pot boundary within the hole */ + for (pot = fls64(hole_end - 1) - 1; + pot > ilog2(2 * I915_GTT_PAGE_SIZE); + pot--) { + u64 step = BIT_ULL(pot); + u64 addr; + + for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr + vma->size <= hole_end; + addr += step) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + } + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, pot, fls64(hole_end - 1) - 1)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + return err; +} + static int drunk_hole(struct drm_i915_private *i915, struct i915_address_space *vm, u64 hole_start, u64 hole_end, @@ -819,6 +895,11 @@ static int igt_ppgtt_walk(void *arg) return exercise_ppgtt(arg, walk_hole); } +static int igt_ppgtt_pot(void *arg) +{ + return exercise_ppgtt(arg, pot_hole); +} + static int igt_ppgtt_drunk(void *arg) { return exercise_ppgtt(arg, drunk_hole); @@ -893,6 +974,11 @@ static int igt_ggtt_walk(void *arg) return exercise_ggtt(arg, walk_hole); } +static int igt_ggtt_pot(void *arg) +{ + return exercise_ggtt(arg, pot_hole); +} + static int igt_ggtt_drunk(void *arg) { return exercise_ggtt(arg, drunk_hole); @@ -1038,6 +1124,11 @@ static int igt_mock_walk(void *arg) return exercise_mock(arg, walk_hole); } +static int igt_mock_pot(void *arg) +{ + return exercise_mock(arg, pot_hole); +} + static int igt_mock_drunk(void *arg) { return exercise_mock(arg, drunk_hole); @@ -1418,6 +1509,7 @@ int i915_gem_gtt_mock_selftests(void) static const struct i915_subtest tests[] = { SUBTEST(igt_mock_drunk), SUBTEST(igt_mock_walk), + SUBTEST(igt_mock_pot), SUBTEST(igt_mock_fill), SUBTEST(igt_gtt_reserve), SUBTEST(igt_gtt_insert), @@ -1444,11 +1536,13 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_lowlevel), SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), + SUBTEST(igt_ppgtt_pot), SUBTEST(igt_ppgtt_fill), SUBTEST(igt_ppgtt_shrink), SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), + SUBTEST(igt_ggtt_pot), SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), }; From 496b575e3ccbf6fbe57a674c721af43dc8826361 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:58 +0000 Subject: [PATCH 0202/1299] drm/i915: Add initial selftests for hang detection and resets Check that we can reset the GPU and continue executing from the next request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-47-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/intel_hangcheck.c | 4 + .../drm/i915/selftests/i915_live_selftests.h | 1 + .../gpu/drm/i915/selftests/intel_hangcheck.c | 537 ++++++++++++++++++ 4 files changed, 544 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/intel_hangcheck.c diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 80b653fb3722..6cfdb64926d7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3377,8 +3377,8 @@ int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags); +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index f05971f5586f..dce742243ba6 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -480,3 +480,7 @@ void intel_hangcheck_init(struct drm_i915_private *i915) INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, i915_hangcheck_elapsed); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_hangcheck.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index eedd7901ce61..18b174d855ca 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -16,3 +16,4 @@ selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) selftest(contexts, i915_gem_context_live_selftests) +selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c new file mode 100644 index 000000000000..d4acee6730e9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -0,0 +1,537 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +struct hang { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + u32 *seqno; + u32 *batch; +}; + +static int hang_init(struct hang *h, struct drm_i915_private *i915) +{ + void *vaddr; + int err; + + memset(h, 0, sizeof(*h)); + h->i915 = i915; + + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->hws)) + return PTR_ERR(h->hws); + + h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->obj)) { + err = PTR_ERR(h->obj); + goto err_hws; + } + + i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + h->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + vaddr = i915_gem_object_pin_map(h->obj, + HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + h->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(h->hws); +err_obj: + i915_gem_object_put(h->obj); +err_hws: + i915_gem_object_put(h->hws); + return err; +} + +static u64 hws_address(const struct i915_vma *hws, + const struct drm_i915_gem_request *rq) +{ + return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); +} + +static int emit_recurse_batch(struct hang *h, + struct drm_i915_gem_request *rq) +{ + struct drm_i915_private *i915 = h->i915; + struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; + struct i915_vma *hws, *vma; + unsigned int flags; + u32 *batch; + int err; + + vma = i915_vma_instance(h->obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + hws = i915_vma_instance(h->hws, vm, NULL); + if (IS_ERR(hws)) + return PTR_ERR(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto unpin_hws; + + err = i915_switch_context(rq); + if (err) + goto unpin_hws; + + i915_vma_move_to_active(vma, rq, 0); + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + i915_vma_move_to_active(hws, rq, 0); + if (!i915_gem_object_has_active_reference(hws->obj)) { + i915_gem_object_get(hws->obj); + i915_gem_object_set_active_reference(hws->obj); + } + + batch = h->batch; + if (INTEL_GEN(i915) >= 8) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8; + *batch++ = lower_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; + *batch++ = lower_32_bits(vma->node.start); + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; + *batch++ = lower_32_bits(vma->node.start); + } + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + +unpin_hws: + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static struct drm_i915_gem_request * +hang_create_request(struct hang *h, + struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_gem_request *rq; + int err; + + if (i915_gem_object_is_active(h->obj)) { + struct drm_i915_gem_object *obj; + void *vaddr; + + obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vaddr = i915_gem_object_pin_map(obj, + HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + h->obj = obj; + h->batch = vaddr; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return rq; + + err = emit_recurse_batch(h, rq); + if (err) { + __i915_add_request(rq, false); + return ERR_PTR(err); + } + + return rq; +} + +static u32 hws_seqno(const struct hang *h, + const struct drm_i915_gem_request *rq) +{ + return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); +} + +static void hang_fini(struct hang *h) +{ + *h->batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + i915_gem_object_unpin_map(h->hws); + i915_gem_object_put(h->hws); + + i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); + i915_gem_retire_requests(h->i915); +} + +static int igt_hang_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Basic check that we can execute our hanging batch */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + long timeout; + + rq = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + pr_err("Failed to create request for %s, err=%d\n", + engine->name, err); + goto fini; + } + + i915_gem_request_get(rq); + + *h.batch = MI_BATCH_BUFFER_END; + __i915_add_request(rq, true); + + timeout = i915_wait_request(rq, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + i915_gem_request_put(rq); + + if (timeout < 0) { + err = timeout; + pr_err("Wait for request failed on %s, err=%d\n", + engine->name, err); + goto fini; + } + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_global_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned int reset_count; + int err = 0; + + /* Check that we can issue a global GPU reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + reset_count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + } + mutex_unlock(&i915->drm.struct_mutex); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + +static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +{ + u32 reset_count; + + rq->engine->hangcheck.stalled = true; + rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + + reset_count = i915_reset_count(&rq->i915->gpu_error); + + set_bit(I915_RESET_IN_PROGRESS, &rq->i915->gpu_error.flags); + wake_up_all(&rq->i915->gpu_error.wait_queue); + + return reset_count; +} + +static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + +static int igt_wait_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + unsigned int reset_count; + struct hang h; + long timeout; + int err; + + /* Check that we detect a stuck waiter and issue a reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, rq)) { + pr_err("Failed to start request %x\n", rq->fence.seqno); + err = -EIO; + goto out_rq; + } + + reset_count = fake_hangcheck(rq); + + timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + if (timeout < 0) { + pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + timeout); + err = timeout; + goto out_rq; + } + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + +out_rq: + i915_gem_request_put(rq); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +static int igt_reset_queue(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Check that we replay pending requests following a hang */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *prev; + IGT_TIMEOUT(end_time); + unsigned int count; + + prev = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(prev)) { + err = PTR_ERR(prev); + goto fini; + } + + i915_gem_request_get(prev); + __i915_add_request(prev, true); + + count = 0; + do { + struct drm_i915_gem_request *rq; + unsigned int reset_count; + + rq = hang_create_request(&h, + engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, prev)) { + pr_err("Failed to start request %x\n", + prev->fence.seqno); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EIO; + goto fini; + } + + reset_count = fake_hangcheck(prev); + + i915_reset(i915); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, + &i915->gpu_error.flags)); + if (prev->fence.error != -EIO) { + pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", + prev->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (rq->fence.error) { + pr_err("Fence error status not zero [%d] after unrelated reset\n", + rq->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + i915_gem_request_put(prev); + prev = rq; + count++; + } while (time_before(jiffies, end_time)); + pr_info("%s: Completed %d resets\n", engine->name, count); + + *h.batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_request_put(prev); + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +int intel_hangcheck_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_hang_sanitycheck), + SUBTEST(igt_global_reset), + SUBTEST(igt_wait_reset), + SUBTEST(igt_reset_queue), + }; + + if (!intel_has_gpu_reset(i915)) + return 0; + + return i915_subtests(tests, i915); +} From 04a68a35ce6d7b54749989f943993020f48fed62 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Nov 2016 10:39:05 +0000 Subject: [PATCH 0203/1299] drm/i915/gvt: Disable access to stolen memory as a guest Explicitly disable stolen memory when running as a guest in a virtual machine, since the memory is not mediated between clients and reserved entirely for the host. The actual size should be reported as zero, but like every other quirk we want to tell the user what is happening. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99028 Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161109103905.17860-1-chris@chris-wilson.co.uk Reviewed-by: Zhenyu Wang Cc: stable@vger.kernel.org --- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 82ca8f49fec1..f3abdc27c5dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -409,6 +409,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->mm.stolen_lock); + if (intel_vgpu_active(dev_priv)) { + DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + return 0; + } + #ifdef CONFIG_INTEL_IOMMU if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) { DRM_INFO("DMAR active, disabling use of stolen memory\n"); From d892e9398ecf6defc7972a62227b77dad6be20bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 21:53:43 +0000 Subject: [PATCH 0204/1299] drm/i915: Pass timeout==0 on to i915_gem_object_wait_fence() The i915_gem_object_wait_fence() uses an incoming timeout=0 to query whether the current fence is busy or idle, without waiting. This can be used by the wait-ioctl to implement a busy query. Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers") Testcase: igt/gem_wait/basic-busy-write-all Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170212215344.16600-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 82489ed10373..71297920fdf4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -441,7 +441,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, timeout = i915_gem_object_wait_fence(shared[i], flags, timeout, rps); - if (timeout <= 0) + if (timeout < 0) break; dma_fence_put(shared[i]); @@ -454,7 +454,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout > 0) + if (excl && timeout >= 0) timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); dma_fence_put(excl); From 4c86d77743a54fb2d8a4d18a037a074c892bb3be Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Tue, 14 Feb 2017 07:43:56 +0100 Subject: [PATCH 0205/1299] xfrm: Don't use sk_family for socket policy lookups On IPv4-mapped IPv6 addresses sk_family is AF_INET6, but the flow informations are created based on AF_INET. So the routing set up 'struct flowi4' but we try to access 'struct flowi6' what leads to an out of bounds access. Fix this by using the family we get with the dst_entry, like we do it for the standard policy lookup. Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_policy.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3c8f5b70abf8..a9af17f0fce6 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1248,7 +1248,7 @@ static inline int policy_to_flow_dir(int dir) } static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, - const struct flowi *fl) + const struct flowi *fl, u16 family) { struct xfrm_policy *pol; @@ -1256,8 +1256,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, - sk->sk_family); + bool match = xfrm_selector_match(&pol->selector, fl, family); int err = 0; if (match) { @@ -2253,7 +2252,7 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; - pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2532,7 +2531,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { - pol = xfrm_sk_policy_lookup(sk, dir, &fl); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, family); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; From d2d1501625e96170958f38646a9fcc9b69bbc2df Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Mon, 13 Feb 2017 16:57:33 +0200 Subject: [PATCH 0206/1299] drm/i915: Convert remaining users of 32bit power domain masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I screwed up the rebase of commit d8fc70b7367b ("drm/i915: Make power domain masks 64 bit long") before sending v2, causing a couple of conversions from 32 to 64 bit masks to be lost. Fixes: d8fc70b7367b ("drm/i915: Make power domain masks 64 bit long") Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Ville Syrjälä Cc: Ander Conselvan de Oliveira Cc: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213145733.8779-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 88b7d96c46a4..34c000859c0d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5701,7 +5701,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, return mask; } -static unsigned long +static u64 modeset_get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -5903,7 +5903,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; - unsigned long domains; + u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; int ret; From 73dec95e6ba37d8138bb111be5c9b8a1f3a622ae Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Feb 2017 11:32:42 +0000 Subject: [PATCH 0207/1299] drm/i915: Emit to ringbuffer directly This removes the usage of intel_ring_emit in favour of directly writing to the ring buffer. intel_ring_emit was preventing the compiler for optimising fetch and increment of the current ring buffer pointer and therefore generating very verbose code for every write. It had no useful purpose since all ringbuffer operations are started and ended with intel_ring_begin and intel_ring_advance respectively, with no bail out in the middle possible, so it is fine to increment the tail in intel_ring_begin and let the code manage the pointer itself. Useless instruction removal amounts to approximately two and half kilobytes of saved text on my build. Not sure if this has any measurable performance implications but executing a ton of useless instructions on fast paths cannot be good. v2: * Change return from intel_ring_begin to error pointer by popular demand. * Move tail increment to intel_ring_advance to enable some error checking. v3: * Move tail advance back into intel_ring_begin. * Rebase and tidy. v4: * Complete rebase after a few months since v3. v5: * Remove unecessary cast and fix !debug compile. (Chris Wilson) v6: * Make intel_ring_offset take request as well. * Fix recording of request postfix plus a sprinkle of asserts. (Chris Wilson) v7: * Use intel_ring_offset to get the postfix. (Chris Wilson) * Convert GVT code as well. v8: * Rename *out++ to *cs++. v9: * Fix GVT out to cs conversion in GVT. v10: * Rebase for new intel_ring_begin in selftests. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Zhi Wang Reviewed-by: Chris Wilson Acked-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170214113242.29241-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 36 +- drivers/gpu/drm/i915/i915_gem_context.c | 76 +-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 38 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 67 +- drivers/gpu/drm/i915/i915_gem_request.c | 8 +- drivers/gpu/drm/i915/intel_display.c | 131 ++-- drivers/gpu/drm/i915/intel_lrc.c | 207 +++---- drivers/gpu/drm/i915/intel_mocs.c | 51 +- drivers/gpu/drm/i915/intel_overlay.c | 79 +-- drivers/gpu/drm/i915/intel_ringbuffer.c | 580 ++++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 30 +- .../drm/i915/selftests/i915_gem_coherency.c | 33 +- 12 files changed, 617 insertions(+), 719 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 9a4b23c3ee97..c3d44c03157c 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1513,7 +1513,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, len += copy_len; gma += copy_len; } - return 0; + return len; } @@ -1630,7 +1630,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, gma, gma + bb_size, dst); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); goto unmap_src; } @@ -2594,11 +2594,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; - struct intel_ring *ring = shadow_ctx->engine[ring_id].ring; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - unsigned int copy_len = 0; + u32 *cs; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2612,36 +2609,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_top = workload->rb_start + guest_rb_size; /* allocate shadow ring buffer */ - ret = intel_ring_begin(workload->req, workload->rb_len / 4); - if (ret) - return ret; + cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = ring->vaddr + ring->tail; + workload->shadow_ring_buffer_va = cs; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, - workload->shadow_ring_buffer_va); - if (ret) { + gma_head, gma_top, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - copy_len = gma_top - gma_head; + cs += ret / sizeof(u32); gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_tail, - workload->shadow_ring_buffer_va + copy_len); - if (ret) { + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - ring->tail += workload->rb_len; - intel_ring_advance(ring); + cs += ret / sizeof(u32); + intel_ring_advance(workload->req, cs); return 0; } @@ -2695,7 +2689,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->workload->vgpu->gtt.ggtt_mm, guest_gma, guest_gma + ctx_size, map); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest indirect ctx\n"); goto unmap_src; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c73bf02870d5..9037356536dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -596,10 +596,9 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; enum intel_engine_id id; - u32 flags = hw_flags | MI_MM_SPACE_GTT; + u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? @@ -629,99 +628,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) if (INTEL_GEN(dev_priv) >= 7) len += 2 + (num_rings ? 4*num_rings + 6 : 0); - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; - intel_ring_emit_reg(ring, - RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(ring, - _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } } } - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, - i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } /* Insert a delay before the next switch! */ - intel_ring_emit(ring, - MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - i915_ggtt_offset(engine->scratch)); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; } - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return ret; } static int remap_l3(struct drm_i915_gem_request *req, int slice) { - u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ring *ring = req->ring; - int i, ret; + u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; + int i; if (!remap_info) return 0; - ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* * Note: We do not worry about the concurrent register cacheline hang * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); - intel_ring_emit(ring, remap_info[i]); + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 111b7f8c617a..da0846fe2ad6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1336,25 +1336,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret, i; + u32 *cs; + int i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - ret = intel_ring_begin(req, 4 * 3); - if (ret) - return ret; + cs = intel_ring_begin(req, 4 * 3); + if (IS_ERR(cs)) + return PTR_ERR(cs); for (i = 0; i < 4; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(ring, 0); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); + *cs++ = 0; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -1415,7 +1415,7 @@ execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; - u32 instp_mask; + u32 instp_mask, *cs; int ret; ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); @@ -1461,17 +1461,15 @@ execbuf_submit(struct i915_execbuffer_params *params, if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { - struct intel_ring *ring = params->request->ring; + cs = intel_ring_begin(params->request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(INSTPM); + *cs++ = instp_mask << 16 | instp_mode; + intel_ring_advance(params->request, cs); dev_priv->relative_constants_mode = instp_mode; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index eebbffdb9a0b..cb5ea5d38f11 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -686,23 +686,22 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; BUG_ON(entry >= 4); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(ring, upper_32_bits(addr)); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(ring, lower_32_bits(addr)); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry)); + *cs++ = upper_32_bits(addr); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); + *cs++ = lower_32_bits(addr); + intel_ring_advance(req, cs); return 0; } @@ -1730,8 +1729,8 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; + u32 *cs; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1739,17 +1738,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1757,8 +1756,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; + u32 *cs; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1766,17 +1765,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 24571a5289a4..2f6cfa47dc61 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -824,6 +824,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) struct intel_ring *ring = request->ring; struct intel_timeline *timeline = request->timeline; struct drm_i915_gem_request *prev; + u32 *cs; int err; lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -862,10 +863,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - err = intel_ring_begin(request, engine->emit_breadcrumb_sz); - GEM_BUG_ON(err); - request->postfix = ring->tail; - ring->tail += engine->emit_breadcrumb_sz * sizeof(u32); + cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(IS_ERR(cs)); + request->postfix = intel_ring_offset(request, cs); /* Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 34c000859c0d..02e6bb2d614c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10158,14 +10158,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Can't queue multiple flips, so wait for the previous * one to finish before executing the next. @@ -10174,13 +10172,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, 0); /* aux display base address, unused */ + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = 0; /* aux display base address, unused */ return 0; } @@ -10192,26 +10189,23 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); if (intel_crtc->plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -10223,25 +10217,22 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* i965+ uses the linear or tiled offsets from the * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | - intel_fb_modifier_to_tiling(fb->modifier)); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset | + intel_fb_modifier_to_tiling(fb->modifier); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -10249,7 +10240,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -10261,21 +10252,17 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -10285,7 +10272,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -10298,9 +10285,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, uint32_t flags) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t plane_bit = 0; + u32 *cs, plane_bit = 0; int len, ret; switch (intel_crtc->plane) { @@ -10344,9 +10330,9 @@ static int intel_gen7_queue_flip(struct drm_device *dev, if (ret) return ret; - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Unmask the flip-done completion message. Note that the bspec says that * we should do this for both the BCS and RCS, and that we must not unmask @@ -10358,31 +10344,28 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * to zero does lead to lockups within MI_DISPLAY_FLIP. */ if (req->engine->id == RCS) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | - DERRMR_PIPEB_PRI_FLIP_DONE | - DERRMR_PIPEC_PRI_FLIP_DONE)); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = ~(DERRMR_PIPEA_PRI_FLIP_DONE | + DERRMR_PIPEB_PRI_FLIP_DONE | + DERRMR_PIPEC_PRI_FLIP_DONE); if (IS_GEN8(dev_priv)) - intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT); + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT; else - intel_ring_emit(ring, MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, - i915_ggtt_offset(req->engine->scratch) + 256); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = i915_ggtt_offset(req->engine->scratch) + 256; if (IS_GEN8(dev_priv)) { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } } - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, (MI_NOOP)); + *cs++ = MI_DISPLAY_FLIP_I915 | plane_bit; + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index af717e1356a9..f567d4b08863 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -834,6 +834,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; + u32 *cs; int ret; GEM_BUG_ON(!ce->pin_count); @@ -858,9 +859,11 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) goto err; } - ret = intel_ring_begin(request, 0); - if (ret) + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) { + ret = PTR_ERR(cs); goto err_unreserve; + } if (!ce->initialised) { ret = engine->init_context(request); @@ -889,9 +892,9 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { - int ret, i; - struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; + int ret, i; if (w->count == 0) return 0; @@ -900,18 +903,18 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - ret = intel_ring_begin(req, w->count * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, w->count * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); + *cs++ = MI_LOAD_REGISTER_IMM(w->count); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; } - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) @@ -1404,27 +1407,27 @@ static void reset_common_ring(struct intel_engine_cs *engine, static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; - int i, ret; + u32 *cs; + int i; - ret = intel_ring_begin(req, num_lri_cmds * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds); for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); - intel_ring_emit(ring, upper_32_bits(pd_daddr)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); - intel_ring_emit(ring, lower_32_bits(pd_daddr)); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i)); + *cs++ = lower_32_bits(pd_daddr); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1433,8 +1436,8 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); + u32 *cs; int ret; /* Don't rely in hw updating PDPs, specially in lite-restore. @@ -1455,19 +1458,17 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1488,13 +1489,11 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; - u32 cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(request, 4); - if (ret) - return ret; + cs = intel_ring_begin(request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW + 1; @@ -1511,13 +1510,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) cmd |= MI_INVALIDATE_BSD; } - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + intel_ring_advance(request, cs); return 0; } @@ -1525,13 +1522,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; u32 scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; int len; flags |= PIPE_CONTROL_CS_STALL; @@ -1573,45 +1568,45 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (dc_flush_wa) len += 12; - ret = intel_ring_begin(request, len); - if (ret) - return ret; + cs = intel_ring_begin(request, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); if (vf_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; } if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_DC_FLUSH_ENABLE; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; } - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_CS_STALL; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; } - intel_ring_advance(ring); + intel_ring_advance(request, cs); return 0; } @@ -1621,34 +1616,33 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out) +static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *out++ = MI_NOOP; - *out++ = MI_NOOP; - request->wa_tail = intel_ring_offset(request->ring, out); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, - u32 *out) +static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *out++ = 0; - *out++ = request->global_seqno; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = request->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, - u32 *out) + u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1657,20 +1651,19 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(request->engine); - *out++ = 0; - *out++ = request->global_seqno; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(request->engine); + *cs++ = 0; + *cs++ = request->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 773e36253e7c..92e461c68385 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -276,23 +276,22 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); for (index = 0; index < table->size; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[index].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[index].control_value; } /* @@ -304,12 +303,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[0].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[0].control_value; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -336,29 +335,27 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; unsigned int i; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); for (i = 0; i < table->size/2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 0); i++; } @@ -368,12 +365,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 0, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 0, 0); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 0608fad7f593..5ef9f5bfb92c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; + u32 *cs; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); @@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 4); - if (ret) { + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } overlay->active = true; @@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_I830(dev_priv)) i830_overlay_clock_gating(dev_priv, false); - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON; + *cs++ = overlay->flip_addr | OFC_UPDATE; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; - u32 tmp; - int ret; + u32 tmp, *cs; WARN_ON(!overlay->active); @@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, vma); @@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_gem_request *req; - struct intel_ring *ring; - u32 flip_addr = overlay->flip_addr; - int ret; + u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 6); - if (ret) { + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - /* wait for overlay to go idle */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; /* turn overlay off */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_OFF; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, NULL); @@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; + u32 *cs; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; - struct intel_ring *ring; req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, - MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9fba5664376e..0b9030c36625 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -61,22 +61,20 @@ void intel_ring_update_space(struct intel_ring *ring) static int gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -84,9 +82,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; /* * read/write caches: @@ -123,13 +119,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd |= MI_INVALIDATE_ISP; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -174,35 +170,33 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); /* low dword */ - intel_ring_emit(ring, 0); /* high dword */ - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -210,10 +204,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; + u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -247,15 +240,15 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -263,20 +256,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -284,11 +274,9 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; /* * Ensure that any following seqno writes only happen when the render @@ -332,15 +320,15 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen7_render_ring_cs_stall_wa(req); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -349,20 +337,19 @@ static int gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -659,8 +646,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; int ret, i; if (w->count == 0) @@ -670,18 +657,18 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - ret = intel_ring_begin(req, (w->count * 2 + 2)); - if (ret) - return ret; + cs = intel_ring_begin(req, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); + *cs++ = MI_LOAD_REGISTER_IMM(w->count); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; } - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) @@ -1276,7 +1263,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&dev_priv->semaphore); } -static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1287,23 +1274,22 @@ static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *out++ = lower_32_bits(gtt_offset); - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = 0; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL; + *cs++ = lower_32_bits(gtt_offset); + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = 0; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1314,19 +1300,19 @@ static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine; @@ -1341,16 +1327,16 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - *out++ = MI_LOAD_REGISTER_IMM(1); - *out++ = i915_mmio_reg_offset(mbox_reg); - *out++ = req->global_seqno; + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(mbox_reg); + *cs++ = req->global_seqno; num_rings++; } } if (num_rings & 1) - *out++ = MI_NOOP; + *cs++ = MI_NOOP; - return out; + return cs; } static void i9xx_submit_request(struct drm_i915_gem_request *request) @@ -1362,15 +1348,14 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) I915_WRITE_TAIL(request->engine, request->tail); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { - *out++ = MI_STORE_DWORD_INDEX; - *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *out++ = req->global_seqno; - *out++ = MI_USER_INTERRUPT; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; + *cs++ = req->global_seqno; + *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -1383,34 +1368,32 @@ static const int i9xx_emit_breadcrumb_sz = 4; * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, out)); + req->engine->semaphore.signal(req, cs)); } static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) + u32 *cs) { struct intel_engine_cs *engine = req->engine; if (engine->semaphore.signal) - out = engine->semaphore.signal(req, out); + cs = engine->semaphore.signal(req, cs); - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(engine); - *out++ = 0; - *out++ = req->global_seqno; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(engine); + *cs++ = 0; + *cs++ = req->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); } static const int gen8_render_emit_breadcrumb_sz = 8; @@ -1427,24 +1410,21 @@ static int gen8_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(ring, signal->global_seqno); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_advance(ring); + *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = signal->global_seqno; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + intel_ring_advance(req, cs); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. @@ -1461,28 +1441,27 @@ static int gen6_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; - int ret; + u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, dw1 | wait_mbox); + *cs++ = dw1 | wait_mbox; /* Throughout all of the GEM code, seqno passed implies our current * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(ring, signal->global_seqno - 1); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = signal->global_seqno - 1; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1583,16 +1562,15 @@ i8xx_irq_disable(struct intel_engine_cs *engine) static int bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1658,20 +1636,16 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - MI_BATCH_GTT | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & + I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -1685,59 +1659,56 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - u32 cs_offset = i915_ggtt_offset(req->engine->scratch); - int ret; + u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Evict the invalid PTE TLBs */ - intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 0xdeadbeef); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(req, 6 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Blit the batch (which has now all relocs applied) to the * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, - BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 4096); - intel_ring_emit(ring, offset); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* ... and execute it. */ offset = cs_offset; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1747,17 +1718,16 @@ i915_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -2165,7 +2135,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) static int ring_request_alloc(struct drm_i915_gem_request *request) { - int ret; + u32 *cs; GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); @@ -2178,9 +2148,9 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) GEM_BUG_ON(!request->engine->buffer); request->ring = request->engine->buffer; - ret = intel_ring_begin(request, 0); - if (ret) - return ret; + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) + return PTR_ERR(cs); request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; @@ -2235,7 +2205,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) return 0; } -int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) +u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; int remain_actual = ring->size - ring->tail; @@ -2243,6 +2213,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; + u32 *cs; total_bytes = bytes + req->reserved_space; @@ -2269,7 +2240,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) - return ret; + return ERR_PTR(ret); } if (unlikely(need_wrap)) { @@ -2282,32 +2253,34 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= remain_actual; } + GEM_BUG_ON(ring->tail > ring->size - bytes); + cs = ring->vaddr + ring->tail; + ring->tail += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); - GEM_DEBUG_EXEC(ring->advance = ring->tail + bytes); - return 0; + + return cs; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; int num_dwords = - (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); - int ret; + (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + u32 *cs; if (num_dwords == 0) return 0; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - ret = intel_ring_begin(req, num_dwords); - if (ret) - return ret; + cs = intel_ring_begin(req, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); while (num_dwords--) - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2351,13 +2324,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2379,16 +2350,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2397,23 +2368,21 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -2423,22 +2392,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2448,20 +2414,17 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2470,13 +2433,11 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2497,17 +2458,16 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) */ if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cc62e89010d3..4350713dbc58 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -145,7 +145,6 @@ struct intel_ring { u32 head; u32 tail; - GEM_DEBUG_DECL(u32 advance); int space; int size; @@ -292,7 +291,7 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *out); + u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -375,7 +374,7 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; /* Execlists */ @@ -497,21 +496,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_ring *ring, u32 data) -{ - *(uint32_t *)(ring->vaddr + ring->tail) = data; - ring->tail += 4; -} +u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); -static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) -{ - intel_ring_emit(ring, i915_mmio_reg_offset(reg)); -} - -static inline void intel_ring_advance(struct intel_ring *ring) +static inline void +intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) { /* Dummy function. * @@ -521,14 +511,16 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_DEBUG_BUG_ON(ring->tail != ring->advance); + GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); } -static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) +static inline u32 +intel_ring_offset(struct drm_i915_gem_request *req, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - ring->vaddr; - return offset & (ring->size - 1); + u32 offset = addr - req->ring->vaddr; + GEM_BUG_ON(offset > req->ring->size); + return offset & (req->ring->size - 1); } int __intel_ring_space(int head, int tail, int size); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 483cbe02d983..ad92c60290f1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -186,6 +186,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_i915_gem_request *rq; struct i915_vma *vma; + u32 *cs; int err; err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -202,30 +203,30 @@ static int gpu_set(struct drm_i915_gem_object *obj, return PTR_ERR(rq); } - err = intel_ring_begin(rq, 4); - if (err) { + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { __i915_add_request(rq, false); i915_vma_unpin(vma); - return err; + return PTR_ERR(cs); } if (INTEL_GEN(i915) >= 8) { - intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); - intel_ring_emit(rq->ring, lower_32_bits(i915_ggtt_offset(vma) + offset)); - intel_ring_emit(rq->ring, upper_32_bits(i915_ggtt_offset(vma) + offset)); - intel_ring_emit(rq->ring, v); + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = v; } else if (INTEL_GEN(i915) >= 4) { - intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); - intel_ring_emit(rq->ring, 0); - intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); - intel_ring_emit(rq->ring, v); + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; } else { - intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM | 1 << 22); - intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); - intel_ring_emit(rq->ring, v); - intel_ring_emit(rq->ring, MI_NOOP); + *cs++ = MI_STORE_DWORD_IMM | 1 << 22; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + *cs++ = MI_NOOP; } - intel_ring_advance(rq->ring); + intel_ring_advance(rq, cs); i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); From a937eaf8242a44f402f63a8ed135026bdc2ab0e9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Feb 2017 15:29:01 +0000 Subject: [PATCH 0208/1299] drm/i915: Fix uninitialized return from mi_set_context For some reason my compiler (and CI as well) failed to spot the uninitialized ret in mi_set_context. Signed-off-by: Tvrtko Ursulin Fixes: 73dec95e6ba3 ("drm/i915: Emit to ringbuffer directly") Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214152901.20361-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9037356536dc..262452055563 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -604,7 +604,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) i915.semaphores ? INTEL_INFO(dev_priv)->num_rings - 1 : 0; - int len, ret; + int len; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value @@ -612,7 +612,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->emit_flush(req, EMIT_INVALIDATE); + int ret = engine->emit_flush(req, EMIT_INVALIDATE); if (ret) return ret; } @@ -687,7 +687,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) intel_ring_advance(req, cs); - return ret; + return 0; } static int remap_l3(struct drm_i915_gem_request *req, int slice) From 9cc19733fd7c83ac0577f2b80121dae3c289351b Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Sun, 12 Feb 2017 13:32:52 +0000 Subject: [PATCH 0209/1299] drm/i915: fix for WaDisableDopClockGating:bdw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This workaround for BDW was incomplete as it also requires EUTC clock gating to be disabled via UCGCTL1. v2: read modify write UCGTL1 in broadwell_init_clock_gating (Ville) Signed-off-by: Robert Bragg Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170212133252.20990-1-robert@sixbynine.org Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c0b0f5a4b9f1..3c13be8985f1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7229,6 +7229,14 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); lpt_init_clock_gating(dev_priv); + + /* WaDisableDopClockGating:bdw + * + * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP + * clock gating. + */ + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0b9030c36625..95906c45289b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -812,7 +812,11 @@ static int bdw_init_workarounds(struct intel_engine_cs *engine) /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - /* WaDisableDopClockGating:bdw */ + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, DOP_CLOCK_GATING_DISABLE); From 64d83e34264bb4eeaf4f0f0fa69b8cf0bc558a7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Dec 2016 16:31:14 +0200 Subject: [PATCH 0210/1299] drm/i915: Dump more configuration information for DSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dump out more of the DSI configuration details during init. This includes pclk, burst_mode_ratio, lane_count, pixel_overlap, video_mode_format and reset_timer_val. v2: Dump more info (Chris) v3: Use the VIDEO_MODE_ defines for consistency (Chris) Dump dphy_reg too (Chris) Cc: Chris Wilson Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161221143114.23530-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 8f683b8b1816..84b3683c1f46 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -801,6 +801,19 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) 8); intel_dsi->clk_hs_to_lp_count += extra_byte_count; + DRM_DEBUG_KMS("Pclk %d\n", intel_dsi->pclk); + DRM_DEBUG_KMS("Pixel overlap %d\n", intel_dsi->pixel_overlap); + DRM_DEBUG_KMS("Lane count %d\n", intel_dsi->lane_count); + DRM_DEBUG_KMS("DPHY param reg 0x%x\n", intel_dsi->dphy_reg); + DRM_DEBUG_KMS("Video mode format %s\n", + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ? + "non-burst with sync pulse" : + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ? + "non-burst with sync events" : + intel_dsi->video_mode_format == VIDEO_MODE_BURST ? + "burst" : ""); + DRM_DEBUG_KMS("Burst mode ratio %d\n", intel_dsi->burst_mode_ratio); + DRM_DEBUG_KMS("Reset timer %d\n", intel_dsi->rst_timer_val); DRM_DEBUG_KMS("Eot %s\n", enableddisabled(intel_dsi->eotp_pkt)); DRM_DEBUG_KMS("Clockstop %s\n", enableddisabled(!intel_dsi->clock_stop)); DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video"); From 96676fe3a18e61ee2d67992da537d6b68f45df66 Mon Sep 17 00:00:00 2001 From: Deepak S Date: Fri, 12 Aug 2016 18:46:41 +0530 Subject: [PATCH 0211/1299] drm/i915/chv: Set min freq to RPn on CHV. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With latest Punit FW, vgg input voltag drop falling to minimum is fixed. So reverting the WA patch & moving to turbo freq opreation range to [RPn -> RP0] This is not a 1:1 revert of the commit 5b7c91b78b1ce6663e0f1f037f6cb4d7c9537d44. You can refer to commit 5b5929cbe3f7 ("drm/i915/chv: remove pre-production hardware workarounds") as the reason for the discrepancy commit 5b7c91b78b1ce6663e0f1f037f6cb4d7c9537d44 Author: Deepak S Date: Sat May 9 18:15:46 2015 +0530 drm/i915/chv: Set min freq to efficient frequency on chv v2: Fix inconsistent return type. (Chris) v3: drop pre-production hw case (Ville) Acked-by: Chris Wilson Signed-off-by: Deepak S Link: http://patchwork.freedesktop.org/patch/msgid/1471007801-86075-1-git-send-email-deepak.s@linux.intel.com Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_pm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3c13be8985f1..633cb0478b92 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5708,6 +5708,17 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) return rp1; } +static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; + + val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); + rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & + FB_GFX_FREQ_FUSE_MASK); + + return rpn; +} + static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { u32 val, rp1; @@ -5944,8 +5955,7 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), dev_priv->rps.rp1_freq); - /* PUnit validated range is only [RPe, RP0] */ - dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; + dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); From e1c5f754067b594de58d387aa5873dec83b6c9fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 09:23:44 +0000 Subject: [PATCH 0212/1299] drm/i915: Avoid overflow in computing pot_hole loop termination When using the mock_ppgtt selftest, the GTT is large enough to cause an overflow in pot_hole() when adding 2 pages to the address. Avoid the overflow by computing the final valid address and iterating up to that address. Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170214092344.12330-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5af9339087b7..468f0992db39 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -615,7 +615,7 @@ static int pot_hole(struct drm_i915_private *i915, u64 addr; for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; - addr + vma->size <= hole_end; + addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { From 72affdf9729d9e9a81498196ed5ada4d8f1c599e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 11:37:56 +0000 Subject: [PATCH 0213/1299] drm/i915: Silence compiler for GTT selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-4.7 spotted that In file included from drivers/gpu/drm/i915/i915_gem_gtt.c:3791:0: drivers/gpu/drm/i915/selftests/i915_gem_gtt.c: In function ‘pot_hole’: drivers/gpu/drm/i915/selftests/i915_gem_gtt.c:594:6: error: ‘err’ may be used uninitialized in this function [-Werror=maybe-uninitialized] So set it to 0 should we ever skip over a hole smaller than a few pages. Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170214113756.27834-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 468f0992db39..320c6879fd83 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -591,7 +591,7 @@ static int pot_hole(struct drm_i915_private *i915, struct i915_vma *vma; unsigned long flags; unsigned int pot; - int err; + int err = 0; flags = PIN_OFFSET_FIXED | PIN_USER; if (i915_is_ggtt(vm)) From b8f2169db98eba2c5009cc67a448dc48a0fd4c48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 14:35:09 +0000 Subject: [PATCH 0214/1299] drm/i915: Silence compiler warning for seltests/i915_gem_coherency In general, the compiler should not be able to detect if we do any passes through the test loops: In file included from drivers/gpu/drm/i915/i915_gem.c:5029: drivers/gpu/drm/i915/selftests/i915_gem_coherency.c: In function 'igt_gem_coherency': drivers/gpu/drm/i915/selftests/i915_gem_coherency.c:274: error: 'err' may be used uninitialized in this function Reported-by: kbuild test robot Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214143509.15719-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_coherency.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index ad92c60290f1..f08d0179b3df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -272,7 +272,7 @@ static int igt_gem_coherency(void *arg) struct drm_i915_gem_object *obj; unsigned long count, n; u32 *offsets, *values; - int err; + int err = 0; /* We repeatedly write, overwrite and read from a sequence of * cachelines in order to try and detect incoherency (unflushed writes From 65300b1f6e04a905200ac5943cb9bfdadc3651a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 13:34:20 +0000 Subject: [PATCH 0215/1299] drm/i915/guc: Don't take struct_mutex for object unreference We no longer need to take the struct_mutex for freeing objects, and on the finalisation paths here the mutex is not been used for serialisation of the pointer access, so remove the BKL wart. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214133420.7977-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_guc_loader.c | 14 ++++++-------- drivers/gpu/drm/i915/intel_huc.c | 9 ++++----- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 8ef33d88d5a0..9885f760f2ef 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -714,12 +714,9 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, uc_fw->obj); - mutex_lock(&dev_priv->drm.struct_mutex); - obj = uc_fw->obj; + obj = fetch_and_zero(&uc_fw->obj); if (obj) i915_gem_object_put(obj); - uc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); release_firmware(fw); /* OK even if fw is NULL */ uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; @@ -793,16 +790,17 @@ void intel_guc_init(struct drm_i915_private *dev_priv) void intel_guc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct drm_i915_gem_object *obj; mutex_lock(&dev_priv->drm.struct_mutex); guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); - - if (guc_fw->obj) - i915_gem_object_put(guc_fw->obj); - guc_fw->obj = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&guc_fw->obj); + if (obj) + i915_gem_object_put(obj); + guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c144609425f6..c28543d220a2 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -274,12 +274,11 @@ int intel_huc_load(struct drm_i915_private *dev_priv) void intel_huc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_i915_gem_object *obj; - mutex_lock(&dev_priv->drm.struct_mutex); - if (huc_fw->obj) - i915_gem_object_put(huc_fw->obj); - huc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&huc_fw->obj); + if (obj) + i915_gem_object_put(obj); huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } From 5a4c6f1b1b2d91c5252cedf1c7b68a894c3f117e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 16:46:11 +0000 Subject: [PATCH 0216/1299] drm/i915: The return of i915_gpu_info to debugfs Once upon a time before we had automated GPU state capture upon hangs, we had intel_gpu_dump. Now we come almost full circle and reinstate that view of the current GPU queues and registers by using the error capture facility to snapshot the GPU state when debugfs/.../i915_gpu_info is opened - which should provided useful debugging to both the error capture routines (without having to cause a hang and avoid the error state being eaten by igt) and generally. v2: Rename drm_i915_error_state to i915_gpu_state to alleviate some name collisions between the error state dump and inspecting the gpu state. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170214164611.11381-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 123 ++++++++--------- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 46 ++++--- drivers/gpu/drm/i915/i915_gpu_error.c | 181 ++++++++++++++------------ drivers/gpu/drm/i915/i915_sysfs.c | 26 ++-- drivers/gpu/drm/i915/intel_display.c | 2 +- 6 files changed, 203 insertions(+), 177 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cd023fda1a3b..0de0596d41ac 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -988,6 +988,61 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) } #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) +static ssize_t gpu_state_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) +{ + struct i915_gpu_state *error = file->private_data; + struct drm_i915_error_state_buf str; + ssize_t ret; + loff_t tmp; + + if (!error) + return 0; + + ret = i915_error_state_buf_init(&str, error->i915, count, *pos); + if (ret) + return ret; + + ret = i915_error_state_to_str(&str, error); + if (ret) + goto out; + + tmp = 0; + ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes); + if (ret < 0) + goto out; + + *pos = str.start + ret; +out: + i915_error_state_buf_release(&str); + return ret; +} + +static int gpu_state_release(struct inode *inode, struct file *file) +{ + i915_gpu_state_put(file->private_data); + return 0; +} + +static int i915_gpu_info_open(struct inode *inode, struct file *file) +{ + struct i915_gpu_state *gpu; + + gpu = i915_capture_gpu_state(inode->i_private); + if (!gpu) + return -ENOMEM; + + file->private_data = gpu; + return 0; +} + +static const struct file_operations i915_gpu_info_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read, + .llseek = default_llseek, + .release = gpu_state_release, +}; static ssize_t i915_error_state_write(struct file *filp, @@ -995,82 +1050,31 @@ i915_error_state_write(struct file *filp, size_t cnt, loff_t *ppos) { - struct i915_error_state_file_priv *error_priv = filp->private_data; + struct i915_gpu_state *error = filp->private_data; + + if (!error) + return 0; DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(error_priv->i915); + i915_reset_error_state(error->i915); return cnt; } static int i915_error_state_open(struct inode *inode, struct file *file) { - struct drm_i915_private *dev_priv = inode->i_private; - struct i915_error_state_file_priv *error_priv; - - error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL); - if (!error_priv) - return -ENOMEM; - - error_priv->i915 = dev_priv; - - i915_error_state_get(&dev_priv->drm, error_priv); - - file->private_data = error_priv; - + file->private_data = i915_first_error_state(inode->i_private); return 0; } -static int i915_error_state_release(struct inode *inode, struct file *file) -{ - struct i915_error_state_file_priv *error_priv = file->private_data; - - i915_error_state_put(error_priv); - kfree(error_priv); - - return 0; -} - -static ssize_t i915_error_state_read(struct file *file, char __user *userbuf, - size_t count, loff_t *pos) -{ - struct i915_error_state_file_priv *error_priv = file->private_data; - struct drm_i915_error_state_buf error_str; - loff_t tmp_pos = 0; - ssize_t ret_count = 0; - int ret; - - ret = i915_error_state_buf_init(&error_str, error_priv->i915, - count, *pos); - if (ret) - return ret; - - ret = i915_error_state_to_str(&error_str, error_priv); - if (ret) - goto out; - - ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos, - error_str.buf, - error_str.bytes); - - if (ret_count < 0) - ret = ret_count; - else - *pos = error_str.start + ret_count; -out: - i915_error_state_buf_release(&error_str); - return ret ?: ret_count; -} - static const struct file_operations i915_error_state_fops = { .owner = THIS_MODULE, .open = i915_error_state_open, - .read = i915_error_state_read, + .read = gpu_state_read, .write = i915_error_state_write, .llseek = default_llseek, - .release = i915_error_state_release, + .release = gpu_state_release, }; - #endif static int @@ -4811,6 +4815,7 @@ static const struct i915_debugfs_files { {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, + {"i915_gpu_info", &i915_gpu_info_fops}, #endif {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index efb1f64f0e41..50de00921cc9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1370,7 +1370,7 @@ void i915_driver_unload(struct drm_device *dev) /* Free error state after interrupts are fully disabled. */ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6cfdb64926d7..992bda004f6b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -897,7 +897,7 @@ struct intel_device_info { struct intel_display_error_state; -struct drm_i915_error_state { +struct i915_gpu_state { struct kref ref; struct timeval time; struct timeval boottime; @@ -917,7 +917,7 @@ struct drm_i915_error_state { u32 eir; u32 pgtbl_er; u32 ier; - u32 gtier[4]; + u32 gtier[4], ngtier; u32 ccid; u32 derrmr; u32 forcewake; @@ -931,6 +931,7 @@ struct drm_i915_error_state { u32 gab_ctl; u32 gfx_mode; + u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; @@ -1512,11 +1513,6 @@ struct drm_i915_error_state_buf { loff_t pos; }; -struct i915_error_state_file_priv { - struct drm_i915_private *i915; - struct drm_i915_error_state *error; -}; - #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ @@ -1533,7 +1529,7 @@ struct i915_gpu_error { /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct drm_i915_error_state *first_error; + struct i915_gpu_state *first_error; unsigned long missed_irq_rings; @@ -3574,7 +3570,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_error_state_file_priv *error); + const struct i915_gpu_state *gpu); int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, struct drm_i915_private *i915, size_t count, loff_t pos); @@ -3583,13 +3579,28 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } + +struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, u32 engine_mask, const char *error_msg); -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv); -void i915_destroy_error_state(struct drm_i915_private *dev_priv); + +static inline struct i915_gpu_state * +i915_gpu_state_get(struct i915_gpu_state *gpu) +{ + kref_get(&gpu->ref); + return gpu; +} + +void __i915_gpu_state_free(struct kref *kref); +static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +{ + if (gpu) + kref_put(&gpu->ref, __i915_gpu_state_free); +} + +struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +void i915_reset_error_state(struct drm_i915_private *i915); #else @@ -3599,7 +3610,13 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, { } -static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv) +static inline struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) +{ + return NULL; +} + +static inline void i915_reset_error_state(struct drm_i915_private *i915) { } @@ -3747,7 +3764,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, extern struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c28ccfef84ab..3a3c7c3c4931 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -342,7 +342,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } static void error_print_instdone(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { int slice; int subslice; @@ -372,7 +372,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq) { if (!erq->seqno) return; @@ -386,7 +386,7 @@ static void error_print_request(struct drm_i915_error_state_buf *m, static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, - struct drm_i915_error_context *ctx) + const struct drm_i915_error_context *ctx) { err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, @@ -394,7 +394,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m, } static void error_print_engine(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); err_printf(m, " START: 0x%08x\n", ee->start); @@ -569,21 +569,32 @@ static void err_print_params(struct drm_i915_error_state_buf *m, #undef PRINT } -int i915_error_state_to_str(struct drm_i915_error_state_buf *m, - const struct i915_error_state_file_priv *error_priv) +static void err_print_pciid(struct drm_i915_error_state_buf *m, + struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = error_priv->i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_error_state *error = error_priv->error; + struct pci_dev *pdev = i915->drm.pdev; + + err_printf(m, "PCI ID: 0x%04x\n", pdev->device); + err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); + err_printf(m, "PCI Subsystem: %04x:%04x\n", + pdev->subsystem_vendor, + pdev->subsystem_device); +} + +int i915_error_state_to_str(struct drm_i915_error_state_buf *m, + const struct i915_gpu_state *error) +{ + struct drm_i915_private *dev_priv = m->i915; struct drm_i915_error_object *obj; int i, j; if (!error) { - err_printf(m, "no error state collected\n"); - goto out; + err_printf(m, "No error state collected\n"); + return 0; } - err_printf(m, "%s\n", error->error_msg); + if (*error->error_msg) + err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: " UTS_RELEASE "\n"); err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); @@ -605,11 +616,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); - err_printf(m, "PCI ID: 0x%04x\n", pdev->device); - err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); - err_printf(m, "PCI Subsystem: %04x:%04x\n", - pdev->subsystem_vendor, - pdev->subsystem_device); + err_print_pciid(m, error->i915); err_printf(m, "IOMMU enabled?: %d\n", error->iommu); @@ -625,19 +632,15 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); - if (INTEL_GEN(dev_priv) >= 8) { - for (i = 0; i < 4; i++) - err_printf(m, "GTIER gt %d: 0x%08x\n", i, - error->gtier[i]); - } else if (HAS_PCH_SPLIT(dev_priv) || IS_VALLEYVIEW(dev_priv)) - err_printf(m, "GTIER: 0x%08x\n", error->gtier[0]); + for (i = 0; i < error->ngtier; i++) + err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); - for (i = 0; i < dev_priv->num_fence_regs; i++) + for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); if (INTEL_GEN(dev_priv) >= 6) { @@ -686,7 +689,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->pinned_bo_count); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - struct drm_i915_error_engine *ee = &error->engine[i]; + const struct drm_i915_error_engine *ee = &error->engine[i]; obj = ee->batchbuffer; if (obj) { @@ -751,12 +754,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, intel_overlay_print_error_state(m, error->overlay); if (error->display) - intel_display_print_error_state(m, dev_priv, error->display); + intel_display_print_error_state(m, error->display); err_print_capabilities(m, &error->device_info); err_print_params(m, &error->params); -out: if (m->bytes == 0 && m->err) return m->err; @@ -808,10 +810,10 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) kfree(obj); } -static void i915_error_state_free(struct kref *error_ref) +void __i915_gpu_state_free(struct kref *error_ref) { - struct drm_i915_error_state *error = container_of(error_ref, - typeof(*error), ref); + struct i915_gpu_state *error = + container_of(error_ref, typeof(*error), ref); int i; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { @@ -976,7 +978,7 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * It's only a small step better than a random number in its current form. */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, int *engine_id) { uint32_t error_code = 0; @@ -1001,20 +1003,21 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, } static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; - if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) { - for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ(FENCE_REG(i)); - } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) { - for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); - } else if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(dev_priv) >= 6) { for (i = 0; i < dev_priv->num_fence_regs; i++) error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + } else if (INTEL_GEN(dev_priv) >= 4) { + for (i = 0; i < dev_priv->num_fence_regs; i++) + error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); + } else { + for (i = 0; i < dev_priv->num_fence_regs; i++) + error->fence[i] = I915_READ(FENCE_REG(i)); } + error->nfence = i; } static inline u32 @@ -1038,7 +1041,7 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1131,7 +1134,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, spin_unlock_irq(&b->lock); } -static void error_record_engine_registers(struct drm_i915_error_state *error, +static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1328,7 +1331,7 @@ static void record_context(struct drm_i915_error_context *e, } static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; @@ -1404,7 +1407,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, struct i915_address_space *vm, int idx) { @@ -1430,7 +1433,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, } static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1455,7 +1458,7 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, } static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_address_space *vm = &dev_priv->ggtt.base; struct drm_i915_error_buffer *bo; @@ -1486,7 +1489,7 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, } static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { /* Capturing log buf contents won't be useful if logging was disabled */ if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0)) @@ -1498,7 +1501,7 @@ static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; @@ -1555,9 +1558,11 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->ier = I915_READ(GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) error->gtier[i] = I915_READ(GEN8_GT_IER(i)); + error->ngtier = 4; } else if (HAS_PCH_SPLIT(dev_priv)) { error->ier = I915_READ(DEIER); error->gtier[0] = I915_READ(GTIER); + error->ngtier = 1; } else if (IS_GEN2(dev_priv)) { error->ier = I915_READ16(IER); } else if (!IS_VALLEYVIEW(dev_priv)) { @@ -1568,7 +1573,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, u32 engine_mask, const char *error_msg) { @@ -1595,7 +1600,7 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, } static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU @@ -1611,7 +1616,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, static int capture(void *data) { - struct drm_i915_error_state *error = data; + struct i915_gpu_state *error = data; do_gettimeofday(&error->time); error->boottime = ktime_to_timeval(ktime_get_boottime()); @@ -1637,6 +1642,23 @@ static int capture(void *data) #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) +struct i915_gpu_state * +i915_capture_gpu_state(struct drm_i915_private *i915) +{ + struct i915_gpu_state *error; + + error = kzalloc(sizeof(*error), GFP_ATOMIC); + if (!error) + return NULL; + + kref_init(&error->ref); + error->i915 = i915; + + stop_machine(capture, error, NULL); + + return error; +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -1651,7 +1673,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, const char *error_msg) { static bool warned; - struct drm_i915_error_state *error; + struct i915_gpu_state *error; unsigned long flags; if (!i915.error_capture) @@ -1660,18 +1682,12 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, if (READ_ONCE(dev_priv->gpu_error.first_error)) return; - /* Account for pipe specific data like PIPE*STAT */ - error = kzalloc(sizeof(*error), GFP_ATOMIC); + error = i915_capture_gpu_state(dev_priv); if (!error) { DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); return; } - kref_init(&error->ref); - error->i915 = dev_priv; - - stop_machine(capture, error, NULL); - i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); DRM_INFO("%s\n", error->error_msg); @@ -1685,7 +1701,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } if (error) { - i915_error_state_free(&error->ref); + __i915_gpu_state_free(&error->ref); return; } @@ -1701,33 +1717,28 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } } -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv) +struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); - - spin_lock_irq(&dev_priv->gpu_error.lock); - error_priv->error = dev_priv->gpu_error.first_error; - if (error_priv->error) - kref_get(&error_priv->error->ref); - spin_unlock_irq(&dev_priv->gpu_error.lock); -} - -void i915_error_state_put(struct i915_error_state_file_priv *error_priv) -{ - if (error_priv->error) - kref_put(&error_priv->error->ref, i915_error_state_free); -} - -void i915_destroy_error_state(struct drm_i915_private *dev_priv) -{ - struct drm_i915_error_state *error; - - spin_lock_irq(&dev_priv->gpu_error.lock); - error = dev_priv->gpu_error.first_error; - dev_priv->gpu_error.first_error = NULL; - spin_unlock_irq(&dev_priv->gpu_error.lock); + struct i915_gpu_state *error; + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; if (error) - kref_put(&error->ref, i915_error_state_free); + i915_gpu_state_get(error); + spin_unlock_irq(&i915->gpu_error.lock); + + return error; +} + +void i915_reset_error_state(struct drm_i915_private *i915) +{ + struct i915_gpu_state *error; + + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + i915->gpu_error.first_error = NULL; + spin_unlock_irq(&i915->gpu_error.lock); + + i915_gpu_state_put(error); } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index a721ff116101..af0ac9f261fd 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -522,33 +522,27 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_error_state_file_priv error_priv; struct drm_i915_error_state_buf error_str; - ssize_t ret_count = 0; - int ret; + struct i915_gpu_state *gpu; + ssize_t ret; - memset(&error_priv, 0, sizeof(error_priv)); - - ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off); + ret = i915_error_state_buf_init(&error_str, dev_priv, count, off); if (ret) return ret; - error_priv.i915 = dev_priv; - i915_error_state_get(dev, &error_priv); - - ret = i915_error_state_to_str(&error_str, &error_priv); + gpu = i915_first_error_state(dev_priv); + ret = i915_error_state_to_str(&error_str, gpu); if (ret) goto out; - ret_count = count < error_str.bytes ? count : error_str.bytes; + ret = count < error_str.bytes ? count : error_str.bytes; + memcpy(buf, error_str.buf, ret); - memcpy(buf, error_str.buf, ret_count); out: - i915_error_state_put(&error_priv); + i915_gpu_state_put(gpu); i915_error_state_buf_release(&error_str); - return ret ?: ret_count; + return ret; } static ssize_t error_state_write(struct file *file, struct kobject *kobj, @@ -559,7 +553,7 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); return count; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 02e6bb2d614c..660581fb329a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15838,9 +15838,9 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) void intel_display_print_error_state(struct drm_i915_error_state_buf *m, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error) { + struct drm_i915_private *dev_priv = m->i915; int i; if (!error) From db4c5e0b726f021e864e887e6b49ad6ecebabf5a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 15:03:46 +0000 Subject: [PATCH 0217/1299] drm/i915: Enable fine-tuned RPS for cherryview When the RPS tuning was applied to Baytrail, in commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail"), concern was given that it might cause Cherryview excess wakeups of the common power well. However, the static thresholds perform poorly for Kodi, and the GPU is unable to deliver the video frames on time. Enabling the dynamic, finer thresholds used on all other platforms (including Skylake and Broxton that also have the same multiple powerwell concerns) allows the GPU to pick a more appropriate frequency and not drop frames. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210150348.22146-1-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/intel_pm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 633cb0478b92..e801b5d26b90 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4971,8 +4971,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) if (err) return err; - if (!IS_CHERRYVIEW(dev_priv)) - gen6_set_rps_thresholds(dev_priv, val); + gen6_set_rps_thresholds(dev_priv, val); } dev_priv->rps.cur_freq = val; From 17136d548e1b5ecd947da996f125907a671313d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 15:03:47 +0000 Subject: [PATCH 0218/1299] drm/i915: Don't accidentally increase the frequency in handling DOWN rps If we receive a DOWN_TIMEOUT rps interrupt, we respond by reducing the GPU clocks significantly. Before we do, double check that the frequency we pick is actually a decrease. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210150348.22146-2-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 27c13193f7a2..86cca37a4e4f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1186,7 +1186,7 @@ static void gen6_pm_rps_work(struct work_struct *work) } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) new_delay = dev_priv->rps.efficient_freq; - else + else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) new_delay = dev_priv->rps.min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { From bd64818dfe3b19ecdcf47a7b0726c4b59ff4fb5e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 15:03:48 +0000 Subject: [PATCH 0219/1299] drm/i915: Only apply the jump to the "efficient RPS" frequency on startup Currently we apply the jump to rpe if we are below it and the GPU needs more power. For some GPUs, the rpe is 75% of the maximum range causing us to dramatically overshoot low power applications *and* unable to reach the low frequency that can most efficiently deliver their workload. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210150348.22146-3-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_irq.c | 8 -------- drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 86cca37a4e4f..84fda3fce4b8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1173,14 +1173,6 @@ static void gen6_pm_rps_work(struct work_struct *work) if (new_delay >= dev_priv->rps.max_freq_softlimit) adj = 0; - /* - * For better performance, jump directly - * to RPe if we're below it. - */ - if (new_delay < dev_priv->rps.efficient_freq - adj) { - new_delay = dev_priv->rps.efficient_freq; - adj = 0; - } } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e801b5d26b90..3d311e100643 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5019,6 +5019,8 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { + u8 freq; + if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, @@ -5026,9 +5028,14 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_enable_rps_interrupts(dev_priv); - /* Ensure we start at the user's desired frequency */ + /* Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + freq = max(dev_priv->rps.cur_freq, + dev_priv->rps.efficient_freq); + if (intel_set_rps(dev_priv, - clamp(dev_priv->rps.cur_freq, + clamp(freq, dev_priv->rps.min_freq_softlimit, dev_priv->rps.max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); From 4ac9659ef9f110a8f651e5a75c24e1ac439b44ff Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Feb 2017 15:00:17 +0000 Subject: [PATCH 0220/1299] drm/i915: Remove duplicate intel_logical_ring_workarounds_emit intel_ring_workarounds_emit is exactly the same code. Signed-off-by: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214150017.16058-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 35 +------------------------ drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 3 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f567d4b08863..ee431d39ce06 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -890,39 +890,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) return ret; } -static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - struct i915_workarounds *w = &req->i915->workarounds; - u32 *cs; - int ret, i; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(req, w->count * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); - *cs++ = w->reg[i].value; - } - *cs++ = MI_NOOP; - - intel_ring_advance(req, cs); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - #define wa_ctx_emit(batch, index, cmd) \ do { \ int __index = (index)++; \ @@ -1672,7 +1639,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; - ret = intel_logical_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(req); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 95906c45289b..f7499829ecc2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -644,7 +644,7 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { struct i915_workarounds *w = &req->i915->workarounds; u32 *cs; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4350713dbc58..16714096810d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -560,6 +560,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) } int init_workarounds_ring(struct intel_engine_cs *engine); +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); From dfb65e71ea2c1d97ac373cc0587dc60b3307581a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Feb 2017 18:12:38 +0200 Subject: [PATCH 0221/1299] drm/i915: Fix not finding the VBT when it overlaps with OPREGION_ASLE_EXT If there is no OPREGION_ASLE_EXT then a VBT stored in mailbox #4 may use the ASLE_EXT parts of the opregion. Adjust the vbt_size calculation for a vbt in mailbox #4 for this. This fixes the driver not finding the VBT on a jumper ezpad mini3 cherrytrail tablet and on a ACER SW5_017 machine. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487088758-30050-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_opregion.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index f4429f67a4e3..4a862a358c70 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -982,7 +982,18 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) opregion->vbt_size = vbt_size; } else { vbt = base + OPREGION_VBT_OFFSET; - vbt_size = OPREGION_ASLE_EXT_OFFSET - OPREGION_VBT_OFFSET; + /* + * The VBT specification says that if the ASLE ext + * mailbox is not used its area is reserved, but + * on some CHT boards the VBT extends into the + * ASLE ext area. Allow this even though it is + * against the spec, so we do not end up rejecting + * the VBT on those boards (and end up not finding the + * LCD panel because of this). + */ + vbt_size = (mboxes & MBOX_ASLE_EXT) ? + OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; + vbt_size -= OPREGION_VBT_OFFSET; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; From ba7a5741b9ee6e9f682f949c8db6a96201ab4ca1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:35 +0000 Subject: [PATCH 0222/1299] drm/i915: Micro-optimise i915_get_ggtt_vma_pages() The predominant VMA class is normal GTT, so allow gcc to emphasize that path and avoid unnecessary stack movement. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++++++++++++++-------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cb5ea5d38f11..ae04b6ea1cc3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2621,14 +2621,16 @@ static int ggtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; - u32 pte_flags = 0; - int ret; + u32 pte_flags; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + int ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ + pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -2653,18 +2655,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; u32 pte_flags; - int ret; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + int ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; - if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, @@ -3430,9 +3432,9 @@ rotate_pages(const dma_addr_t *in, unsigned int offset, return sg; } -static struct sg_table * -intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) { const size_t n_pages = obj->base.size / PAGE_SIZE; unsigned int size = intel_rotation_info_size(rot_info); @@ -3493,7 +3495,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, return ERR_PTR(ret); } -static struct sg_table * +static noinline struct sg_table * intel_partial_pages(const struct i915_ggtt_view *view, struct drm_i915_gem_object *obj) { @@ -3547,7 +3549,7 @@ intel_partial_pages(const struct i915_ggtt_view *view, static int i915_get_ggtt_vma_pages(struct i915_vma *vma) { - int ret = 0; + int ret; /* The vma->pages are only valid within the lifespan of the borrowed * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so @@ -3556,32 +3558,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) */ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - if (vma->pages) + switch (vma->ggtt_view.type) { + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; return 0; - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->pages = vma->obj->mm.pages; - else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) + case I915_GGTT_VIEW_ROTATED: vma->pages = - intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated, - vma->obj); - else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - else + break; + + default: WARN_ONCE(1, "GGTT view %u not implemented!\n", vma->ggtt_view.type); + return -EINVAL; + } - if (!vma->pages) { - DRM_ERROR("Failed to get pages for GGTT view type %u!\n", - vma->ggtt_view.type); - ret = -EINVAL; - } else if (IS_ERR(vma->pages)) { + ret = 0; + if (unlikely(IS_ERR(vma->pages))) { ret = PTR_ERR(vma->pages); vma->pages = NULL; DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", vma->ggtt_view.type, ret); } - return ret; } From b31144c0daa8c570d3fb60ddbd0e26ba1ea62485 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:36 +0000 Subject: [PATCH 0223/1299] drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Inline the address computation to avoid the vfunc call for every page. We still have to pay the high overhead of sg_page_iter_next(), but now at least GCC can optimise the inner most loop, giving a significant boost to some thrashing Unreal Engine workloads. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++--------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ae04b6ea1cc3..b33ff11c1345 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1887,6 +1887,11 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, } } +struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +}; + static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, @@ -1896,27 +1901,34 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, unsigned first_entry = start >> PAGE_SHIFT; unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; - gen6_pte_t *pt_vaddr = NULL; - struct sgt_iter sgt_iter; - dma_addr_t addr; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter; + gen6_pte_t *vaddr; - for_each_sgt_dma(addr, sgt_iter, pages) { - if (pt_vaddr == NULL) - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + iter.sg = pages->sgl; + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + do { + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); - pt_vaddr[act_pte] = - vm->pte_encode(addr, cache_level, flags); + iter.dma += PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; + + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } if (++act_pte == GEN6_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - act_pt++; + kunmap_px(ppgtt, vaddr); + vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } - } - - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + } while (1); + kunmap_px(ppgtt, vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -2502,27 +2514,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen6_pte_t __iomem *gtt_entries; - gen6_pte_t gtt_entry; + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = start >> PAGE_SHIFT; + struct sgt_iter iter; dma_addr_t addr; - int i = 0; - - gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); - - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = vm->pte_encode(addr, level, flags); - iowrite32(gtt_entry, >t_entries[i++]); - } - - /* XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readl(>t_entries[i-1]) != gtt_entry); + for_each_sgt_dma(addr, iter, st) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates From 894ccebee2b0e606ba9638d20dd87b33568482d7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:37 +0000 Subject: [PATCH 0224/1299] drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Improve the sg iteration and in hte process eliminate a bug in miscomputing the pml4 length as orig_nents< Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 172 +++++++++++++++------------- 1 file changed, 93 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b33ff11c1345..f40f9365782f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -750,9 +750,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; - gen8_pte_t *pt_vaddr; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t *vaddr; if (WARN_ON(!px_page(pt))) return false; @@ -765,12 +765,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, return true; } - pt_vaddr = kmap_px(pt); - + vaddr = kmap_px(pt); while (pte < pte_end) - pt_vaddr[pte++] = scratch_pte; - - kunmap_px(ppgtt, pt_vaddr); + vaddr[pte++] = scratch_pte; + kunmap_px(ppgtt, vaddr); return false; } @@ -878,71 +876,100 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); } -static void -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, +struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +}; + +static __always_inline bool +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, - struct sg_page_iter *sg_iter, - uint64_t start, + struct sgt_dma *iter, + u64 start, enum i915_cache_level cache_level) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen8_pte_t *pt_vaddr; - unsigned pdpe = gen8_pdpe_index(start); - unsigned pde = gen8_pde_index(start); - unsigned pte = gen8_pte_index(start); + unsigned int pdpe = gen8_pdpe_index(start); + unsigned int pde = gen8_pde_index(start); + unsigned int pte = gen8_pte_index(start); + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + gen8_pte_t *vaddr; + bool ret; - pt_vaddr = NULL; + pd = pdp->page_directory[pdpe]; + vaddr = kmap_px(pd->page_table[pde]); + do { + vaddr[pte] = pte_encode | iter->dma; + iter->dma += PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + ret = false; + break; + } - while (__sg_page_iter_next(sg_iter)) { - if (pt_vaddr == NULL) { - struct i915_page_directory *pd = pdp->page_directory[pdpe]; - struct i915_page_table *pt = pd->page_table[pde]; - pt_vaddr = kmap_px(pt); + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; } - pt_vaddr[pte] = - gen8_pte_encode(sg_page_iter_dma_address(sg_iter), - cache_level); if (++pte == GEN8_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; if (++pde == I915_PDES) { - if (++pdpe == I915_PDPES_PER_PDP(vm->i915)) + /* Limited by sg length for 3lvl */ + if (++pdpe == GEN8_PML4ES_PER_PML4) { + ret = true; break; + } + + GEM_BUG_ON(pdpe > GEN8_LEGACY_PDPES); + pd = pdp->page_directory[pdpe]; pde = 0; } + + kunmap_px(ppgtt, vaddr); + vaddr = kmap_px(pd->page_table[pde]); pte = 0; } - } + } while (1); + kunmap_px(ppgtt, vaddr); - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + return ret; } -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, - struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, - u32 unused) +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sg_page_iter sg_iter; + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; - __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, + start, cache_level); +} - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, - cache_level); - } else { - struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, + struct sg_table *pages, + uint64_t start, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; + unsigned int pml4e = gen8_pml4e_index(start); - gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { - gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, - start, cache_level); - } - } + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter, + start, cache_level)) + ; } static void gen8_free_page_tables(struct drm_i915_private *dev_priv, @@ -1524,8 +1551,8 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; uint64_t start = ppgtt->base.start; uint64_t length = ppgtt->base.total; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); if (!USES_FULL_48BIT_PPGTT(vm->i915)) { gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); @@ -1590,7 +1617,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; @@ -1605,6 +1631,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; + + ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; } else { ret = __pdp_init(dev_priv, &ppgtt->pdp); if (ret) @@ -1621,6 +1649,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) goto free_scratch; } + + ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; } if (intel_vgpu_active(dev_priv)) @@ -1887,11 +1917,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, } } -struct sgt_dma { - struct scatterlist *sg; - dma_addr_t dma, max; -}; - static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, @@ -2433,26 +2458,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - gen8_pte_t gtt_entry; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level); dma_addr_t addr; - int i = 0; - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += start >> PAGE_SHIFT; + for_each_sgt_dma(addr, sgt_iter, st) + gen8_set_pte(gtt_entries++, pte_encode | addr); - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = gen8_pte_encode(addr, level); - gen8_set_pte(>t_entries[i++], gtt_entry); - } - - /* - * XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readq(>t_entries[i-1]) != gtt_entry); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2540,7 +2554,9 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; - gen8_pte_t scratch_pte, __iomem *gtt_base = + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; int i; @@ -2550,8 +2566,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); readl(gtt_base); From 1188bc66eb33e64ac7452b5acd62ce0395204148 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:38 +0000 Subject: [PATCH 0225/1299] drm/i915: Don't special case teardown of aliasing_ppgtt The aliasing_ppgtt is a regular ppgtt, and we can use the regular i915_ppgtt_put() to properly tear it down. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 50 +++++++++-------------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f40f9365782f..424a690bc17f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2238,23 +2238,6 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv, - const char *name) -{ - int ret; - - ret = __hw_ppgtt_init(ppgtt, dev_priv); - if (ret == 0) { - kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv, name); - ppgtt->base.file = file_priv; - } - - return ret; -} - int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) { gtt_write_workarounds(dev_priv); @@ -2292,12 +2275,16 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } + kref_init(&ppgtt->ref); + i915_address_space_init(&ppgtt->base, dev_priv, name); + ppgtt->base.file = fpriv; + trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2757,19 +2744,15 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) struct i915_hw_ppgtt *ppgtt; int err; - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return -ENOMEM; - - err = __hw_ppgtt_init(ppgtt, i915); - if (err) - goto err_ppgtt; + ppgtt = i915_ppgtt_create(i915, NULL, "[alias]"); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); if (ppgtt->base.allocate_va_range) { err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, ppgtt->base.total); if (err) - goto err_ppgtt_cleanup; + goto err_ppgtt; } ppgtt->base.clear_range(&ppgtt->base, @@ -2782,10 +2765,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) return 0; -err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); err_ppgtt: - kfree(ppgtt); + i915_ppgtt_put(ppgtt); return err; } @@ -2798,8 +2779,7 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) if (!ppgtt) return; - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); + i915_ppgtt_put(ppgtt); ggtt->base.bind_vma = ggtt_bind_vma; } @@ -2874,21 +2854,21 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) WARN_ON(i915_vma_unbind(vma)); mutex_unlock(&dev_priv->drm.struct_mutex); - i915_gem_fini_aliasing_ppgtt(dev_priv); i915_gem_cleanup_stolen(&dev_priv->drm); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_fini_aliasing_ppgtt(dev_priv); + if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_fini(&ggtt->base); - mutex_unlock(&dev_priv->drm.struct_mutex); } ggtt->base.cleanup(&ggtt->base); + mutex_unlock(&dev_priv->drm.struct_mutex); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->mappable); From cbc4e9e6a6d31fcc44921d2be41104425be8ab01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:39 +0000 Subject: [PATCH 0226/1299] drm/i915: Split ggtt/alasing_gtt unbind_vma Similar to how we already split the bind_vma for ggtt/aliasing_gtt, also split up the unbind for symmetry. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 424a690bc17f..efc7a6b1d6f1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2648,6 +2648,15 @@ static int ggtt_bind_vma(struct i915_vma *vma, return 0; } +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + + intel_runtime_pm_get(i915); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + intel_runtime_pm_put(i915); +} + static int aliasing_gtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) @@ -2684,22 +2693,21 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, return 0; } -static void ggtt_unbind_vma(struct i915_vma *vma) +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - const u64 size = min(vma->size, vma->node.size); if (vma->flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, - vma->node.start, size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); intel_runtime_pm_put(i915); } - if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) - appgtt->base.clear_range(&appgtt->base, - vma->node.start, size); + if (vma->flags & I915_VMA_LOCAL_BIND) { + struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; + + vm->clear_range(vm, vma->node.start, vma->size); + } } void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, @@ -2760,9 +2768,13 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) ppgtt->base.total); i915->mm.aliasing_ppgtt = ppgtt; + WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); ggtt->base.bind_vma = aliasing_gtt_bind_vma; + WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); + ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; + return 0; err_ppgtt: @@ -2782,6 +2794,7 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) i915_ppgtt_put(ppgtt); ggtt->base.bind_vma = ggtt_bind_vma; + ggtt->base.unbind_vma = ggtt_unbind_vma; } int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) From 8448661d65f6f5dbcdb9c5cba185b284f2464b65 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:40 +0000 Subject: [PATCH 0227/1299] drm/i915: Convert clflushed pagetables over to WC maps We flush the entire page every time we update a few bytes, making the update of a page table many, many times slower than is required. If we create a WC map of the page for our updates, we can avoid the clflush but incur additional cost for creating the pagetable. We amoritize that cost by reusing page vmappings, and only changing the page protection in batches. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/i915_gem_gtt.c | 333 +++++++++--------- drivers/gpu/drm/i915/i915_gem_gtt.h | 8 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 8 +- 4 files changed, 181 insertions(+), 170 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 992bda004f6b..f582f5492e37 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2481,8 +2481,6 @@ struct drm_i915_private { /* Used to save the pipe-to-encoder mapping for audio */ struct intel_encoder *av_enc_map[I915_MAX_PIPES]; - I915_SELFTEST_DECLARE(struct fault_attr vm_fault); - /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index efc7a6b1d6f1..4eb0e114a295 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -343,46 +343,72 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, return pte; } -static int __setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, gfp_t flags) +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) { - struct device *kdev = &dev_priv->drm.pdev->dev; + struct page *page; - if (I915_SELFTEST_ONLY(should_fail(&dev_priv->vm_fault, 1))) - i915_gem_shrink_all(dev_priv); + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); - p->page = alloc_page(flags); - if (!p->page) + if (vm->free_pages.nr) + return vm->free_pages.pages[--vm->free_pages.nr]; + + page = alloc_page(gfp); + if (!page) + return NULL; + + if (vm->pt_kmap_wc) + set_pages_array_wc(&page, 1); + + return page; +} + +static void vm_free_pages_release(struct i915_address_space *vm) +{ + GEM_BUG_ON(!pagevec_count(&vm->free_pages)); + + if (vm->pt_kmap_wc) + set_pages_array_wb(vm->free_pages.pages, + pagevec_count(&vm->free_pages)); + + __pagevec_release(&vm->free_pages); +} + +static void vm_free_page(struct i915_address_space *vm, struct page *page) +{ + if (!pagevec_add(&vm->free_pages, page)) + vm_free_pages_release(vm); +} + +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!p->page)) return -ENOMEM; - p->daddr = dma_map_page(kdev, - p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - - if (dma_mapping_error(kdev, p->daddr)) { - __free_page(p->page); - return -EINVAL; + p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; } return 0; } -static int setup_page_dma(struct drm_i915_private *dev_priv, +static int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) { - return __setup_page_dma(dev_priv, p, I915_GFP_DMA); + return __setup_page_dma(vm, p, I915_GFP_DMA); } -static void cleanup_page_dma(struct drm_i915_private *dev_priv, +static void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) { - struct pci_dev *pdev = dev_priv->drm.pdev; - - if (WARN_ON(!p->page)) - return; - - dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); - memset(p, 0, sizeof(*p)); + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); } static void *kmap_page_dma(struct i915_page_dma *p) @@ -393,67 +419,54 @@ static void *kmap_page_dma(struct i915_page_dma *p) /* We use the flushing unmap only with ppgtt structures: * page directories, page tables and scratch pages. */ -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) +static void kunmap_page_dma(void *vaddr) { - /* There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - drm_clflush_virt_range(vaddr, PAGE_SIZE); - kunmap_atomic(vaddr); } #define kmap_px(px) kmap_page_dma(px_base(px)) -#define kunmap_px(ppgtt, vaddr) \ - kunmap_page_dma((ppgtt)->base.i915, (vaddr)) +#define kunmap_px(vaddr) kunmap_page_dma((vaddr)) -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) -#define fill32_px(dev_priv, px, v) \ - fill_page_dma_32((dev_priv), px_base(px), (v)) +#define setup_px(vm, px) setup_page_dma((vm), px_base(px)) +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v)) +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v)) -static void fill_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint64_t val) +static void fill_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + const u64 val) { + u64 * const vaddr = kmap_page_dma(p); int i; - uint64_t * const vaddr = kmap_page_dma(p); for (i = 0; i < 512; i++) vaddr[i] = val; - kunmap_page_dma(dev_priv, vaddr); + kunmap_page_dma(vaddr); } -static void fill_page_dma_32(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint32_t val32) +static void fill_page_dma_32(struct i915_address_space *vm, + struct i915_page_dma *p, + const u32 v) { - uint64_t v = val32; - - v = v << 32 | val32; - - fill_page_dma(dev_priv, p, v); + fill_page_dma(vm, p, (u64)v << 32 | v); } static int -setup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch, - gfp_t gfp) +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); + return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO); } -static void cleanup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch) +static void cleanup_scratch_page(struct i915_address_space *vm) { - cleanup_page_dma(dev_priv, scratch); + cleanup_page_dma(vm, &vm->scratch_page); } -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) +static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; + const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES; int ret = -ENOMEM; pt = kzalloc(sizeof(*pt), GFP_KERNEL); @@ -466,7 +479,7 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) if (!pt->used_ptes) goto fail_bitmap; - ret = setup_px(dev_priv, pt); + ret = setup_px(vm, pt); if (ret) goto fail_page_m; @@ -480,10 +493,9 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) return ERR_PTR(ret); } -static void free_pt(struct drm_i915_private *dev_priv, - struct i915_page_table *pt) +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - cleanup_px(dev_priv, pt); + cleanup_px(vm, pt); kfree(pt->used_ptes); kfree(pt); } @@ -496,7 +508,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm, scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); - fill_px(vm->i915, pt, scratch_pte); + fill_px(vm, pt, scratch_pte); } static void gen6_initialize_pt(struct i915_address_space *vm, @@ -509,10 +521,10 @@ static void gen6_initialize_pt(struct i915_address_space *vm, scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); - fill32_px(vm->i915, pt, scratch_pte); + fill32_px(vm, pt, scratch_pte); } -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; int ret = -ENOMEM; @@ -526,7 +538,7 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) if (!pd->used_pdes) goto fail_bitmap; - ret = setup_px(dev_priv, pd); + ret = setup_px(vm, pd); if (ret) goto fail_page_m; @@ -540,11 +552,11 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) return ERR_PTR(ret); } -static void free_pd(struct drm_i915_private *dev_priv, +static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { if (px_page(pd)) { - cleanup_px(dev_priv, pd); + cleanup_px(vm, pd); kfree(pd->used_pdes); kfree(pd); } @@ -557,7 +569,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm, scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); - fill_px(vm->i915, pd, scratch_pde); + fill_px(vm, pd, scratch_pde); } static int __pdp_init(struct drm_i915_private *dev_priv, @@ -591,23 +603,23 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) pdp->page_directory = NULL; } -static struct -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) +static struct i915_page_directory_pointer * +alloc_pdp(struct i915_address_space *vm) { struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); + WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(dev_priv, pdp); + ret = __pdp_init(vm->i915, pdp); if (ret) goto fail_bitmap; - ret = setup_px(dev_priv, pdp); + ret = setup_px(vm, pdp); if (ret) goto fail_page_m; @@ -621,12 +633,12 @@ i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) return ERR_PTR(ret); } -static void free_pdp(struct drm_i915_private *dev_priv, +static void free_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - cleanup_px(dev_priv, pdp); + if (USES_FULL_48BIT_PPGTT(vm->i915)) { + cleanup_px(vm, pdp); kfree(pdp); } } @@ -638,7 +650,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); - fill_px(vm->i915, pdp, scratch_pdpe); + fill_px(vm, pdp, scratch_pdpe); } static void gen8_initialize_pml4(struct i915_address_space *vm, @@ -649,7 +661,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC); - fill_px(vm->i915, pml4, scratch_pml4e); + fill_px(vm, pml4, scratch_pml4e); } static void @@ -665,20 +677,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, page_directorypo = kmap_px(pdp); page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_px(ppgtt, page_directorypo); + kunmap_px(page_directorypo); } static void -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt, - struct i915_pml4 *pml4, +gen8_setup_pml4e(struct i915_pml4 *pml4, struct i915_page_directory_pointer *pdp, int index) { gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); - WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_px(ppgtt, pagemap); + kunmap_px(pagemap); } /* Broadwell Page Directory Pointer Descriptors */ @@ -746,7 +756,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; @@ -768,7 +777,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, vaddr = kmap_px(pt); while (pte < pte_end) vaddr[pte++] = scratch_pte; - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); return false; } @@ -781,7 +790,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; uint64_t pde; gen8_pde_t *pde_vaddr; @@ -796,8 +804,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, __clear_bit(pde, pd->used_pdes); pde_vaddr = kmap_px(pd); pde_vaddr[pde] = scratch_pde; - kunmap_px(ppgtt, pde_vaddr); - free_pt(vm->i915, pt); + kunmap_px(pde_vaddr); + free_pt(vm, pt); } } @@ -826,7 +834,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { __clear_bit(pdpe, pdp->used_pdpes); gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); - free_pd(vm->i915, pd); + free_pd(vm, pd); } } @@ -847,7 +855,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory_pointer *pdp; uint64_t pml4e; @@ -859,8 +866,8 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { __clear_bit(pml4e, pml4->used_pml4es); - gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e); - free_pdp(vm->i915, pdp); + gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); } } } @@ -925,12 +932,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, pde = 0; } - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); vaddr = kmap_px(pd->page_table[pde]); pte = 0; } } while (1); - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); return ret; } @@ -972,7 +979,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, ; } -static void gen8_free_page_tables(struct drm_i915_private *dev_priv, +static void gen8_free_page_tables(struct i915_address_space *vm, struct i915_page_directory *pd) { int i; @@ -984,34 +991,33 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv, if (WARN_ON(!pd->page_table[i])) continue; - free_pt(dev_priv, pd->page_table[i]); + free_pt(vm, pd->page_table[i]); pd->page_table[i] = NULL; } } static int gen8_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { ret = PTR_ERR(vm->scratch_pt); goto free_scratch_page; } - vm->scratch_pd = alloc_pd(dev_priv); + vm->scratch_pd = alloc_pd(vm); if (IS_ERR(vm->scratch_pd)) { ret = PTR_ERR(vm->scratch_pd); goto free_pt; } - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - vm->scratch_pdp = alloc_pdp(dev_priv); + if (USES_FULL_48BIT_PPGTT(dev)) { + vm->scratch_pdp = alloc_pdp(vm); if (IS_ERR(vm->scratch_pdp)) { ret = PTR_ERR(vm->scratch_pdp); goto free_pd; @@ -1026,11 +1032,11 @@ static int gen8_init_scratch(struct i915_address_space *vm) return 0; free_pd: - free_pd(dev_priv, vm->scratch_pd); + free_pd(vm, vm->scratch_pd); free_pt: - free_pt(dev_priv, vm->scratch_pt); + free_pt(vm, vm->scratch_pt); free_scratch_page: - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return ret; } @@ -1068,44 +1074,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) static void gen8_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - if (USES_FULL_48BIT_PPGTT(dev_priv)) - free_pdp(dev_priv, vm->scratch_pdp); - free_pd(dev_priv, vm->scratch_pd); - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + if (USES_FULL_48BIT_PPGTT(vm->i915)) + free_pdp(vm, vm->scratch_pdp); + free_pd(vm, vm->scratch_pd); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) { if (WARN_ON(!pdp->page_directory[i])) continue; - gen8_free_page_tables(dev_priv, pdp->page_directory[i]); - free_pd(dev_priv, pdp->page_directory[i]); + gen8_free_page_tables(vm, pdp->page_directory[i]); + free_pd(vm, pdp->page_directory[i]); } - free_pdp(dev_priv, pdp); + free_pdp(vm, pdp); } static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { if (WARN_ON(!ppgtt->pml4.pdps[i])) continue; - gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); } - cleanup_px(dev_priv, &ppgtt->pml4); + cleanup_px(&ppgtt->base, &ppgtt->pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1116,8 +1119,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (!USES_FULL_48BIT_PPGTT(dev_priv)) - gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); + if (!USES_FULL_48BIT_PPGTT(vm->i915)) + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); else gen8_ppgtt_cleanup_4lvl(ppgtt); @@ -1148,7 +1151,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, uint64_t length, unsigned long *new_pts) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; uint32_t pde; @@ -1160,7 +1162,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, continue; } - pt = alloc_pt(dev_priv); + pt = alloc_pt(vm); if (IS_ERR(pt)) goto unwind_out; @@ -1174,7 +1176,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, unwind_out: for_each_set_bit(pde, new_pts, I915_PDES) - free_pt(dev_priv, pd->page_table[pde]); + free_pt(vm, pd->page_table[pde]); return -ENOMEM; } @@ -1209,7 +1211,6 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, uint64_t length, unsigned long *new_pds) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory *pd; uint32_t pdpe; uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); @@ -1220,7 +1221,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, if (test_bit(pdpe, pdp->used_pdpes)) continue; - pd = alloc_pd(dev_priv); + pd = alloc_pd(vm); if (IS_ERR(pd)) goto unwind_out; @@ -1234,7 +1235,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, unwind_out: for_each_set_bit(pdpe, new_pds, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); + free_pd(vm, pdp->page_directory[pdpe]); return -ENOMEM; } @@ -1262,7 +1263,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, uint64_t length, unsigned long *new_pdps) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory_pointer *pdp; uint32_t pml4e; @@ -1270,7 +1270,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { if (!test_bit(pml4e, pml4->used_pml4es)) { - pdp = alloc_pdp(dev_priv); + pdp = alloc_pdp(vm); if (IS_ERR(pdp)) goto unwind_out; @@ -1288,7 +1288,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, unwind_out: for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - free_pdp(dev_priv, pml4->pdps[pml4e]); + free_pdp(vm, pml4->pdps[pml4e]); return -ENOMEM; } @@ -1337,7 +1337,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned long *new_page_dirs, *new_page_tables; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -1406,7 +1405,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, * point we're still relying on insert_entries() */ } - kunmap_px(ppgtt, page_directory); + kunmap_px(page_directory); __set_bit(pdpe, pdp->used_pdpes); gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); } @@ -1421,12 +1420,11 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, for_each_set_bit(temp, new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES), I915_PDES) - free_pt(dev_priv, - pdp->page_directory[pdpe]->page_table[temp]); + free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]); } for_each_set_bit(pdpe, new_page_dirs, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); + free_pd(vm, pdp->page_directory[pdpe]); free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); mark_tlbs_dirty(ppgtt); @@ -1439,7 +1437,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, uint64_t length) { DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory_pointer *pdp; uint64_t pml4e; int ret = 0; @@ -1463,7 +1460,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, if (ret) goto err_out; - gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e); + gen8_setup_pml4e(pml4, pdp, pml4e); } bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, @@ -1473,7 +1470,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, err_out: for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]); + gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]); return ret; } @@ -1489,7 +1486,8 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); } -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, uint64_t start, uint64_t length, gen8_pte_t scratch_pte, struct seq_file *m) @@ -1555,7 +1553,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } else { uint64_t pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; @@ -1566,7 +1564,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) continue; seq_printf(m, " PML4E #%llu\n", pml4e); - gen8_dump_pdp(pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); } } } @@ -1622,8 +1620,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->debug_dump = gen8_dump_ppgtt; + /* There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) + ppgtt->base.pt_kmap_wc = true; + if (USES_FULL_48BIT_PPGTT(dev_priv)) { - ret = setup_px(dev_priv, &ppgtt->pml4); + ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) goto free_scratch; @@ -1712,7 +1716,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } seq_puts(m, "\n"); } - kunmap_px(ppgtt, pt_vaddr); + kunmap_px(pt_vaddr); } } @@ -1909,7 +1913,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, for (i = first_pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; - kunmap_px(ppgtt, pt_vaddr); + kunmap_px(pt_vaddr); num_entries -= last_pte - first_pte; first_pte = 0; @@ -1948,12 +1952,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } if (++act_pte == GEN6_PTES) { - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } } while (1); - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -1987,7 +1991,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, /* We've already allocated a page table */ WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); - pt = alloc_pt(dev_priv); + pt = alloc_pt(vm); if (IS_ERR(pt)) { ret = PTR_ERR(pt); goto unwind_out; @@ -2035,7 +2039,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, struct i915_page_table *pt = ppgtt->pd.page_table[pde]; ppgtt->pd.page_table[pde] = vm->scratch_pt; - free_pt(dev_priv, pt); + free_pt(vm, pt); } mark_tlbs_dirty(ppgtt); @@ -2044,16 +2048,15 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, static int gen6_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return PTR_ERR(vm->scratch_pt); } @@ -2064,17 +2067,14 @@ static int gen6_init_scratch(struct i915_address_space *vm) static void gen6_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd = &ppgtt->pd; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; uint32_t pde; @@ -2082,7 +2082,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) gen6_for_all_pdes(pt, pd, pde) if (pt != vm->scratch_pt) - free_pt(dev_priv, pt); + free_pt(vm, pt); gen6_free_scratch(vm); } @@ -2191,6 +2191,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv) { ppgtt->base.i915 = dev_priv; + ppgtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); @@ -2212,10 +2213,14 @@ static void i915_address_space_init(struct i915_address_space *vm, INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); + pagevec_init(&vm->free_pages, false); } static void i915_address_space_fini(struct i915_address_space *vm) { + if (pagevec_count(&vm->free_pages)) + vm_free_pages_release(vm); + i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -2323,9 +2328,8 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list)); - i915_address_space_fini(&ppgtt->base); - ppgtt->base.cleanup(&ppgtt->base); + i915_address_space_fini(&ppgtt->base); kfree(ppgtt); } @@ -2991,7 +2995,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); + ret = setup_scratch_page(&ggtt->base, GFP_DMA32); if (ret) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -3080,7 +3084,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); iounmap(ggtt->gsm); - cleanup_scratch_page(vm->i915, &vm->scratch_page); + cleanup_scratch_page(vm); } static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3231,6 +3235,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) int ret; ggtt->base.i915 = dev_priv; + ggtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_GEN(dev_priv) <= 5) ret = i915_gmch_probe(ggtt); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fe922059a412..6162bedc0811 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,9 +36,11 @@ #include #include +#include #include "i915_gem_timeline.h" #include "i915_gem_request.h" +#include "i915_selftest.h" #define I915_GTT_PAGE_SIZE 4096UL #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE @@ -247,6 +249,7 @@ struct i915_address_space { struct drm_mm mm; struct i915_gem_timeline timeline; struct drm_i915_private *i915; + struct device *dma; /* Every address space belongs to a struct file - except for the global * GTT that is owned by the driver (and so @file is set to NULL). In * principle, no information should leak from one context to another @@ -297,6 +300,9 @@ struct i915_address_space { */ struct list_head unbound_list; + struct pagevec free_pages; + bool pt_kmap_wc; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -326,6 +332,8 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; #define i915_is_ggtt(V) (!(V)->file) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 320c6879fd83..e23753181720 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -832,17 +832,17 @@ static int shrink_hole(struct drm_i915_private *i915, unsigned long prime; int err; - i915->vm_fault.probability = 999; - atomic_set(&i915->vm_fault.times, -1); + vm->fault_attr.probability = 999; + atomic_set(&vm->fault_attr.times, -1); for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { - i915->vm_fault.interval = prime; + vm->fault_attr.interval = prime; err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); if (err) break; } - memset(&i915->vm_fault, 0, sizeof(i915->vm_fault)); + memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); return err; } From 9231da70b338b336b982c74fad4afab5b55e6534 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:41 +0000 Subject: [PATCH 0228/1299] drm/i915: Remove kmap/kunmap wrappers As these are now both plain and simple kmap_atomic/kunmap_atomic pairs, we can remove the wrappers for a small gain of clarity (in particular, not hiding the atomic critical sections!). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 69 +++++++++++------------------ 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4eb0e114a295..8ac62cabf4cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -411,21 +411,7 @@ static void cleanup_page_dma(struct i915_address_space *vm, vm_free_page(vm, p->page); } -static void *kmap_page_dma(struct i915_page_dma *p) -{ - return kmap_atomic(p->page); -} - -/* We use the flushing unmap only with ppgtt structures: - * page directories, page tables and scratch pages. - */ -static void kunmap_page_dma(void *vaddr) -{ - kunmap_atomic(vaddr); -} - -#define kmap_px(px) kmap_page_dma(px_base(px)) -#define kunmap_px(vaddr) kunmap_page_dma((vaddr)) +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) #define setup_px(vm, px) setup_page_dma((vm), px_base(px)) #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) @@ -436,13 +422,13 @@ static void fill_page_dma(struct i915_address_space *vm, struct i915_page_dma *p, const u64 val) { - u64 * const vaddr = kmap_page_dma(p); + u64 * const vaddr = kmap_atomic(p->page); int i; for (i = 0; i < 512; i++) vaddr[i] = val; - kunmap_page_dma(vaddr); + kunmap_atomic(vaddr); } static void fill_page_dma_32(struct i915_address_space *vm, @@ -675,9 +661,9 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) return; - page_directorypo = kmap_px(pdp); + page_directorypo = kmap_atomic_px(pdp); page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_px(page_directorypo); + kunmap_atomic(page_directorypo); } static void @@ -685,10 +671,10 @@ gen8_setup_pml4e(struct i915_pml4 *pml4, struct i915_page_directory_pointer *pdp, int index) { - gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); + gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4); pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_px(pagemap); + kunmap_atomic(pagemap); } /* Broadwell Page Directory Pointer Descriptors */ @@ -774,10 +760,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, return true; } - vaddr = kmap_px(pt); + vaddr = kmap_atomic_px(pt); while (pte < pte_end) vaddr[pte++] = scratch_pte; - kunmap_px(vaddr); + kunmap_atomic(vaddr); return false; } @@ -802,9 +788,9 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { __clear_bit(pde, pd->used_pdes); - pde_vaddr = kmap_px(pd); + pde_vaddr = kmap_atomic_px(pd); pde_vaddr[pde] = scratch_pde; - kunmap_px(pde_vaddr); + kunmap_atomic(pde_vaddr); free_pt(vm, pt); } } @@ -904,7 +890,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, bool ret; pd = pdp->page_directory[pdpe]; - vaddr = kmap_px(pd->page_table[pde]); + vaddr = kmap_atomic_px(pd->page_table[pde]); do { vaddr[pte] = pte_encode | iter->dma; iter->dma += PAGE_SIZE; @@ -932,12 +918,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, pde = 0; } - kunmap_px(vaddr); - vaddr = kmap_px(pd->page_table[pde]); + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(pd->page_table[pde]); pte = 0; } } while (1); - kunmap_px(vaddr); + kunmap_atomic(vaddr); return ret; } @@ -1370,7 +1356,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, /* Allocations have completed successfully, so set the bitmaps, and do * the mappings. */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - gen8_pde_t *const page_directory = kmap_px(pd); + gen8_pde_t *const page_directory = kmap_atomic_px(pd); struct i915_page_table *pt; uint64_t pd_len = length; uint64_t pd_start = start; @@ -1405,7 +1391,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, * point we're still relying on insert_entries() */ } - kunmap_px(page_directory); + kunmap_atomic(page_directory); __set_bit(pdpe, pdp->used_pdpes); gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); } @@ -1512,7 +1498,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, if (!test_bit(pde, pd->used_pdes)) continue; - pt_vaddr = kmap_px(pt); + pt_vaddr = kmap_atomic_px(pt); for (pte = 0; pte < GEN8_PTES; pte += 4) { uint64_t va = (pdpe << GEN8_PDPE_SHIFT) | @@ -1536,9 +1522,6 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, } seq_puts(m, "\n"); } - /* don't use kunmap_px, it could trigger - * an unnecessary flush. - */ kunmap_atomic(pt_vaddr); } } @@ -1693,7 +1676,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); + pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); for (pte = 0; pte < GEN6_PTES; pte+=4) { unsigned long va = @@ -1716,7 +1699,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } seq_puts(m, "\n"); } - kunmap_px(pt_vaddr); + kunmap_atomic(pt_vaddr); } } @@ -1908,12 +1891,12 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, if (last_pte > GEN6_PTES) last_pte = GEN6_PTES; - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); for (i = first_pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; - kunmap_px(pt_vaddr); + kunmap_atomic(pt_vaddr); num_entries -= last_pte - first_pte; first_pte = 0; @@ -1934,7 +1917,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sgt_dma iter; gen6_pte_t *vaddr; - vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); iter.sg = pages->sgl; iter.dma = sg_dma_address(iter.sg); iter.max = iter.dma + iter.sg->length; @@ -1952,12 +1935,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } if (++act_pte == GEN6_PTES) { - kunmap_px(vaddr); - vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]); + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } } while (1); - kunmap_px(vaddr); + kunmap_atomic(vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, From ff685975d97f8cb28e2fe4e6d90a71fd93244815 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:42 +0000 Subject: [PATCH 0229/1299] drm/i915: Move allocate_va_range to GTT In the future, we need to call allocate_va_range on the aliasing-ppgtt which means moving the call down from the vma into the vm (which is more appropriate for calling the vm function). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 39 ++++++++++++++++++++--------- drivers/gpu/drm/i915/i915_vma.c | 9 ------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8ac62cabf4cc..8a24bf006a82 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -190,11 +190,18 @@ static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 unused) { - u32 pte_flags = 0; + u32 pte_flags; + int ret; + + trace_i915_va_alloc(vma); + ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); + if (ret) + return ret; vma->pages = vma->obj->mm.pages; /* Currently applicable only to VLV */ + pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -206,9 +213,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, - vma->node.start, - vma->size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, @@ -2650,9 +2655,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; u32 pte_flags; + int ret; if (unlikely(!vma->pages)) { - int ret = i915_get_ggtt_vma_pages(vma); + ret = i915_get_ggtt_vma_pages(vma); if (ret) return ret; } @@ -2662,6 +2668,22 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; + + if (appgtt->base.allocate_va_range) { + ret = appgtt->base.allocate_va_range(&appgtt->base, + vma->node.start, + vma->node.size); + if (ret) + return ret; + } + + appgtt->base.insert_entries(&appgtt->base, + vma->pages, vma->node.start, + cache_level, pte_flags); + } + if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, @@ -2670,13 +2692,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, intel_runtime_pm_put(i915); } - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - appgtt->base.insert_entries(&appgtt->base, - vma->pages, vma->node.start, - cache_level, pte_flags); - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0dc994b76924..c1abfe7b48ea 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -263,15 +263,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, vma->vm->total))) return -ENODEV; - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) From 52c126ee907f436fc871bfc15836b8011f6eb76d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:43 +0000 Subject: [PATCH 0230/1299] drm/i915: Always preallocate gen6/7 ppgtt The hardware does not cope very well with us changing the PD within an active context (the context must be idle for it to re-read the PD). As we only check whether the page is idle before changing the entry (and on through the PD tree), we cannot reliably replace PD entries on gen6/gen7. To fully avoid changing the tree at runtime, preallocate it on init. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8a24bf006a82..1c78aef5709e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2103,6 +2103,12 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); + ppgtt->pd.base.ggtt_offset = + ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); + + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + + ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); + return 0; err_out: @@ -2145,7 +2151,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.allocate_va_range = gen6_alloc_va_range; ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; @@ -2155,22 +2160,21 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; - ppgtt->pd.base.ggtt_offset = - ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); - - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + - ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); - gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); - gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); + ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); + if (ret) { + gen6_ppgtt_cleanup(&ppgtt->base); + return ret; + } + DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd.base.ggtt_offset << 10); + DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", + ppgtt->pd.base.ggtt_offset << 10); return 0; } From f0a22974acbdd17b03cff4bdee880e4f08cccf6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:44 +0000 Subject: [PATCH 0231/1299] drm/i915: Remove redundant clear of appgtt Upon creation of the va range, it is initialised to point at scratch. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1c78aef5709e..52586432d5e8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2769,10 +2769,6 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) goto err_ppgtt; } - ppgtt->base.clear_range(&ppgtt->base, - ppgtt->base.start, - ppgtt->base.total); - i915->mm.aliasing_ppgtt = ppgtt; WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); From 16a011c8be2f36f972fd9295d0c350c7dffd4779 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:45 +0000 Subject: [PATCH 0232/1299] drm/i915: Tidy gen6_write_pde() Stop passing around unused parameters makes the code more compact. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 52586432d5e8..17c5c1a5b652 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1709,36 +1709,28 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } /* Write pde (index) from the page directory @pd to the page table @pt */ -static void gen6_write_pde(struct i915_page_directory *pd, - const int pde, struct i915_page_table *pt) +static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) { /* Caller needs to make sure the write completes if necessary */ - struct i915_hw_ppgtt *ppgtt = - container_of(pd, struct i915_hw_ppgtt, pd); - u32 pd_entry; - - pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); - pd_entry |= GEN6_PDE_VALID; - - writel(pd_entry, ppgtt->pd_addr + pde); + writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); } /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ -static void gen6_write_page_range(struct drm_i915_private *dev_priv, - struct i915_page_directory *pd, +static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, uint32_t start, uint32_t length) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_page_table *pt; - uint32_t pde; + unsigned int pde; - gen6_for_each_pde(pt, pd, start, length, pde) - gen6_write_pde(pd, pde, pt); + gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) + gen6_write_pde(ppgtt, pde, pt); + wmb(); - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); + mark_tlbs_dirty(ppgtt); } static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) @@ -2003,7 +1995,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, gen6_pte_count(start, length)); if (__test_and_clear_bit(pde, new_page_tables)) - gen6_write_pde(&ppgtt->pd, pde, pt); + gen6_write_pde(ppgtt, pde, pt); trace_i915_page_table_entry_map(vm, pde, pt, gen6_pte_index(start), @@ -2161,7 +2153,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->debug_dump = gen6_dump_ppgtt; gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); - gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); if (ret) { @@ -3395,8 +3387,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) struct i915_address_space *vm; list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - /* TODO: Perhaps it shouldn't be gen6 specific */ - struct i915_hw_ppgtt *ppgtt; if (i915_is_ggtt(vm)) @@ -3404,8 +3394,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) else ppgtt = i915_vm_to_ppgtt(vm); - gen6_write_page_range(dev_priv, &ppgtt->pd, - 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); } } From dd19674bacba227ae5d3ce680cbc5668198894dc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:46 +0000 Subject: [PATCH 0233/1299] drm/i915: Remove bitmap tracking for used-ptes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99295 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 332 +++++++++------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +- drivers/gpu/drm/i915/i915_trace.h | 19 +- 3 files changed, 118 insertions(+), 238 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 17c5c1a5b652..ae2ff36b275a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -457,62 +457,38 @@ static void cleanup_scratch_page(struct i915_address_space *vm) static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES; - int ret = -ENOMEM; - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - if (!pt) + pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), - GFP_KERNEL); - - if (!pt->used_ptes) - goto fail_bitmap; - - ret = setup_px(vm, pt); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pt))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + pt->used_ptes = 0; return pt; - -fail_page_m: - kfree(pt->used_ptes); -fail_bitmap: - kfree(pt); - - return ERR_PTR(ret); } static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) { cleanup_px(vm, pt); - kfree(pt->used_ptes); kfree(pt); } static void gen8_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen8_pte_t scratch_pte; - - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); - - fill_px(vm, pt, scratch_pte); + fill_px(vm, pt, + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); } static void gen6_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen6_pte_t scratch_pte; - - WARN_ON(vm->scratch_page.daddr == 0); - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); - - fill32_px(vm, pt, scratch_pte); + fill32_px(vm, pt, + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); } static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) @@ -556,11 +532,12 @@ static void free_pd(struct i915_address_space *vm, static void gen8_initialize_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - gen8_pde_t scratch_pde; + unsigned int i; - scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); - - fill_px(vm, pd, scratch_pde); + fill_px(vm, pd, + gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); + for (i = 0; i < I915_PDES; i++) + pd->page_table[i] = vm->scratch_pt; } static int __pdp_init(struct drm_i915_private *dev_priv, @@ -744,8 +721,7 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) */ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, struct i915_page_table *pt, - uint64_t start, - uint64_t length) + u64 start, u64 length) { unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); @@ -754,16 +730,11 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); gen8_pte_t *vaddr; - if (WARN_ON(!px_page(pt))) - return false; + GEM_BUG_ON(num_entries > pt->used_ptes); - GEM_BUG_ON(pte_end > GEN8_PTES); - - bitmap_clear(pt->used_ptes, pte, num_entries); - if (USES_FULL_PPGTT(vm->i915)) { - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) - return true; - } + pt->used_ptes -= num_entries; + if (!pt->used_ptes) + return true; vaddr = kmap_atomic_px(pt); while (pte < pte_end) @@ -773,31 +744,38 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, return false; } +static void gen8_ppgtt_set_pde(struct i915_address_space *vm, + struct i915_page_directory *pd, + struct i915_page_table *pt, + unsigned int pde) +{ + gen8_pde_t *vaddr; + + pd->page_table[pde] = pt; + + vaddr = kmap_atomic_px(pd); + vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); + kunmap_atomic(vaddr); +} + /* Removes entries from a single page dir, releasing it if it's empty. * Caller can use the return value to update higher-level entries */ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, struct i915_page_directory *pd, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_page_table *pt; - uint64_t pde; - gen8_pde_t *pde_vaddr; - gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), - I915_CACHE_LLC); + u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - if (WARN_ON(!pd->page_table[pde])) - break; + if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) + continue; - if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { - __clear_bit(pde, pd->used_pdes); - pde_vaddr = kmap_atomic_px(pd); - pde_vaddr[pde] = scratch_pde; - kunmap_atomic(pde_vaddr); - free_pt(vm, pt); - } + gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + __clear_bit(pde, pd->used_pdes); + + free_pt(vm, pt); } if (bitmap_empty(pd->used_pdes, I915_PDES)) @@ -1124,8 +1102,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) * @pd: Page directory for this address range. * @start: Starting virtual address to begin allocations. * @length: Size of the allocations. - * @new_pts: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. * * Allocate the required number of page tables. Extremely similar to * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by @@ -1138,37 +1114,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) */ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, struct i915_page_directory *pd, - uint64_t start, - uint64_t length, - unsigned long *new_pts) + u64 start, u64 length) { struct i915_page_table *pt; + u64 from = start; uint32_t pde; gen8_for_each_pde(pt, pd, start, length, pde) { /* Don't reallocate page tables */ - if (test_bit(pde, pd->used_pdes)) { - /* Scratch is never allocated this way */ - WARN_ON(pt == vm->scratch_pt); - continue; + if (!test_bit(pde, pd->used_pdes)) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind; + + gen8_initialize_pt(vm, pt); + pd->page_table[pde] = pt; } - - pt = alloc_pt(vm); - if (IS_ERR(pt)) - goto unwind_out; - - gen8_initialize_pt(vm, pt); - pd->page_table[pde] = pt; - __set_bit(pde, new_pts); + pt->used_ptes += gen8_pte_count(start, length); trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); } return 0; -unwind_out: - for_each_set_bit(pde, new_pts, I915_PDES) - free_pt(vm, pd->page_table[pde]); - +unwind: + gen8_ppgtt_clear_pd(vm, pd, from, start - from); return -ENOMEM; } @@ -1285,9 +1254,8 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, } static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) +free_gen8_temp_bitmaps(unsigned long *new_pds) { - kfree(new_pts); kfree(new_pds); } @@ -1296,29 +1264,16 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) */ static int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long **new_pts, uint32_t pdpes) { unsigned long *pds; - unsigned long *pts; pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); if (!pds) return -ENOMEM; - pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), - GFP_TEMPORARY); - if (!pts) - goto err_out; - *new_pds = pds; - *new_pts = pts; - return 0; - -err_out: - free_gen8_temp_bitmaps(pds, pts); - return -ENOMEM; } static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, @@ -1327,7 +1282,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, uint64_t length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned long *new_page_dirs, *new_page_tables; + unsigned long *new_page_dirs; struct i915_page_directory *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -1335,7 +1290,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); int ret; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); if (ret) return ret; @@ -1343,14 +1298,13 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, new_page_dirs); if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); return ret; } /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, - new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); + ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length); if (ret) goto err_out; } @@ -1376,11 +1330,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, WARN_ON(!pd_len); WARN_ON(!gen8_pte_count(pd_start, pd_len)); - /* Set our used ptes within the page table */ - bitmap_set(pt->used_ptes, - gen8_pte_index(pd_start), - gen8_pte_count(pd_start, pd_len)); - /* Our pde is now pointing to the pagetable, pt */ __set_bit(pde, pd->used_pdes); @@ -1389,8 +1338,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, I915_CACHE_LLC); trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, gen8_pte_index(start), - gen8_pte_count(start, length), - GEN8_PTES); + gen8_pte_count(start, length)); /* NB: We haven't yet mapped ptes to pages. At this * point we're still relying on insert_entries() */ @@ -1401,23 +1349,15 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return 0; err_out: - while (pdpe--) { - unsigned long temp; - - for_each_set_bit(temp, new_page_tables + pdpe * - BITS_TO_LONGS(I915_PDES), I915_PDES) - free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]); - } - for_each_set_bit(pdpe, new_page_dirs, pdpes) free_pd(vm, pdp->page_directory[pdpe]); - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return ret; } @@ -1559,14 +1499,14 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs, *new_page_tables; + unsigned long *new_page_dirs; uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); int ret; /* We allocate temp bitmap for page tables for no gain * but as this is for init only, lets keep the things simple */ - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); if (ret) return ret; @@ -1579,7 +1519,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) if (!ret) *ppgtt->pdp.used_pdpes = *new_page_dirs; - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); return ret; } @@ -1728,16 +1668,15 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) gen6_write_pde(ppgtt, pde, pt); - wmb(); mark_tlbs_dirty(ppgtt); + wmb(); } -static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) { - BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - - return (ppgtt->pd.base.ggtt_offset / 64) << 16; + GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); + return ppgtt->pd.base.ggtt_offset << 10; } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, @@ -1869,35 +1808,36 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen6_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN6_PTES; - unsigned first_pte = first_entry % GEN6_PTES; - unsigned last_pte, i; - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); + unsigned int first_entry = start >> PAGE_SHIFT; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length >> PAGE_SHIFT; + gen6_pte_t scratch_pte = + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); while (num_entries) { - last_pte = first_pte + num_entries; - if (last_pte > GEN6_PTES) - last_pte = GEN6_PTES; + struct i915_page_table *pt = ppgtt->pd.page_table[pde++]; + unsigned int end = min(pte + num_entries, GEN6_PTES); + gen6_pte_t *vaddr; - pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); + num_entries -= end - pte; - for (i = first_pte; i < last_pte; i++) - pt_vaddr[i] = scratch_pte; + /* Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ - kunmap_atomic(pt_vaddr); + vaddr = kmap_atomic_px(pt); + do { + vaddr[pte++] = scratch_pte; + } while (pte < end); + kunmap_atomic(vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + pte = 0; } } @@ -1941,89 +1881,37 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } static int gen6_alloc_va_range(struct i915_address_space *vm, - uint64_t start_in, uint64_t length_in) + u64 start, u64 length) { - DECLARE_BITMAP(new_page_tables, I915_PDES); - struct drm_i915_private *dev_priv = vm->i915; - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint32_t start, length, start_save, length_save; - uint32_t pde; - int ret; - - start = start_save = start_in; - length = length_save = length_in; - - bitmap_zero(new_page_tables, I915_PDES); - - /* The allocation is done in two stages so that we can bail out with - * minimal amount of pain. The first stage finds new page tables that - * need allocation. The second stage marks use ptes within the page - * tables. - */ - gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - if (pt != vm->scratch_pt) { - WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); - continue; - } - - /* We've already allocated a page table */ - WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); - - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; - } - - gen6_initialize_pt(vm, pt); - - ppgtt->pd.page_table[pde] = pt; - __set_bit(pde, new_page_tables); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); - } - - start = start_save; - length = length_save; + u64 from = start; + unsigned int pde; + bool flush = false; gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind_out; - bitmap_zero(tmp_bitmap, GEN6_PTES); - bitmap_set(tmp_bitmap, gen6_pte_index(start), - gen6_pte_count(start, length)); - - if (__test_and_clear_bit(pde, new_page_tables)) + gen6_initialize_pt(vm, pt); + ppgtt->pd.page_table[pde] = pt; gen6_write_pde(ppgtt, pde, pt); - - trace_i915_page_table_entry_map(vm, pde, pt, - gen6_pte_index(start), - gen6_pte_count(start, length), - GEN6_PTES); - bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, - GEN6_PTES); + flush = true; + } } - WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); + if (flush) { + mark_tlbs_dirty(ppgtt); + wmb(); + } - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); - - mark_tlbs_dirty(ppgtt); return 0; unwind_out: - for_each_set_bit(pde, new_page_tables, I915_PDES) { - struct i915_page_table *pt = ppgtt->pd.page_table[pde]; - - ppgtt->pd.page_table[pde] = vm->scratch_pt; - free_pt(vm, pt); - } - - mark_tlbs_dirty(ppgtt); - return ret; + gen6_ppgtt_clear_range(vm, from, start); + return -ENOMEM; } static int gen6_init_scratch(struct i915_address_space *vm) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6162bedc0811..5ad5b59a01b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -69,7 +69,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN6_PTE_UNCACHED (1 << 1) #define GEN6_PTE_VALID (1 << 0) -#define I915_PTES(pte_len) (PAGE_SIZE / (pte_len)) +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) #define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) #define I915_PDES 512 #define I915_PDE_MASK (I915_PDES - 1) @@ -220,8 +220,7 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - - unsigned long *used_ptes; + unsigned int used_ptes; }; struct i915_page_directory { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4461df5a94fe..de31c49781d3 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -245,15 +245,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, DECLARE_EVENT_CLASS(i915_page_table_entry_update, TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits), + struct i915_page_table *pt, u32 first, u32 count), + TP_ARGS(vm, pde, pt, first, count), TP_STRUCT__entry( __field(struct i915_address_space *, vm) __field(u32, pde) __field(u32, first) __field(u32, last) - __dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits)) ), TP_fast_assign( @@ -261,22 +260,16 @@ DECLARE_EVENT_CLASS(i915_page_table_entry_update, __entry->pde = pde; __entry->first = first; __entry->last = first + count - 1; - scnprintf(__get_str(cur_ptes), - TRACE_PT_SIZE(bits), - "%*pb", - bits, - pt->used_ptes); ), - TP_printk("vm=%p, pde=%d, updating %u:%u\t%s", - __entry->vm, __entry->pde, __entry->last, __entry->first, - __get_str(cur_ptes)) + TP_printk("vm=%p, pde=%d, updating %u:%u", + __entry->vm, __entry->pde, __entry->last, __entry->first) ); DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map, TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits) + struct i915_page_table *pt, u32 first, u32 count), + TP_ARGS(vm, pde, pt, first, count) ); TRACE_EVENT(i915_gem_object_change_domain, From fe52e37fa85d8b1404bed1389dde16297dad706a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:47 +0000 Subject: [PATCH 0234/1299] drm/i915: Remove bitmap tracking for used-pdes We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 250 ++++++++++------------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +- 2 files changed, 86 insertions(+), 170 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ae2ff36b275a..f3171d56d914 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -494,39 +494,25 @@ static void gen6_initialize_pt(struct i915_address_space *vm, static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - int ret = -ENOMEM; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) + pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), - sizeof(*pd->used_pdes), GFP_KERNEL); - if (!pd->used_pdes) - goto fail_bitmap; - - ret = setup_px(vm, pd); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pd))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + pd->used_pdes = 0; return pd; - -fail_page_m: - kfree(pd->used_pdes); -fail_bitmap: - kfree(pd); - - return ERR_PTR(ret); } static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - if (px_page(pd)) { - cleanup_px(vm, pd); - kfree(pd->used_pdes); - kfree(pd); - } + cleanup_px(vm, pd); + kfree(pd); } static void gen8_initialize_pd(struct i915_address_space *vm, @@ -540,10 +526,11 @@ static void gen8_initialize_pd(struct i915_address_space *vm, pd->page_table[i] = vm->scratch_pt; } -static int __pdp_init(struct drm_i915_private *dev_priv, +static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - size_t pdpes = I915_PDPES_PER_PDP(dev_priv); + size_t pdpes = I915_PDPES_PER_PDP(vm->i915); + int i; pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), @@ -551,8 +538,8 @@ static int __pdp_init(struct drm_i915_private *dev_priv, if (!pdp->used_pdpes) return -ENOMEM; - pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), - GFP_KERNEL); + pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), + GFP_KERNEL); if (!pdp->page_directory) { kfree(pdp->used_pdpes); /* the PDP might be the statically allocated top level. Keep it @@ -561,6 +548,9 @@ static int __pdp_init(struct drm_i915_private *dev_priv, return -ENOMEM; } + for (i = 0; i < pdpes; i++) + pdp->page_directory[i] = vm->scratch_pd; + return 0; } @@ -583,7 +573,7 @@ alloc_pdp(struct i915_address_space *vm) if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(vm->i915, pdp); + ret = __pdp_init(vm, pdp); if (ret) goto fail_bitmap; @@ -633,25 +623,9 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, } static void -gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, - struct i915_page_directory *pd, - int index) -{ - gen8_ppgtt_pdpe_t *page_directorypo; - - if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) - return; - - page_directorypo = kmap_atomic_px(pdp); - page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_atomic(page_directorypo); -} - -static void -gen8_setup_pml4e(struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) +gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + int index) { gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4); @@ -758,9 +732,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm, kunmap_atomic(vaddr); } -/* Removes entries from a single page dir, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, struct i915_page_directory *pd, u64 start, u64 length) @@ -773,15 +744,28 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, continue; gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); - __clear_bit(pde, pd->used_pdes); + pd->used_pdes--; free_pt(vm, pt); } - if (bitmap_empty(pd->used_pdes, I915_PDES)) - return true; + return !pd->used_pdes; +} - return false; +static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + unsigned int pdpe) +{ + gen8_ppgtt_pdpe_t *vaddr; + + pdp->page_directory[pdpe] = pd; + if (!USES_FULL_48BIT_PPGTT(vm->i915)) + return; + + vaddr = kmap_atomic_px(pdp); + vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single page dir pointer, releasing it if it's empty. @@ -789,41 +773,42 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, */ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; - uint64_t pdpe; + unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (WARN_ON(!pdp->page_directory[pdpe])) - break; + if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) + continue; - if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { - __clear_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); - free_pd(vm, pd); - } + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + __clear_bit(pdpe, pdp->used_pdpes); + + free_pd(vm, pd); } - mark_tlbs_dirty(ppgtt); - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) return true; return false; } +static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); +} + /* Removes entries from a single pml4. * This is the top-level structure in 4-level page tables used on gen8+. * Empty entries are always scratch pml4e. */ -static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; uint64_t pml4e; @@ -835,23 +820,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { __clear_bit(pml4e, pml4->used_pml4es); - gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e); + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); free_pdp(vm, pdp); } } } -static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (USES_FULL_48BIT_PPGTT(vm->i915)) - gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); - else - gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); -} - struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; @@ -956,12 +930,9 @@ static void gen8_free_page_tables(struct i915_address_space *vm, if (!px_page(pd)) return; - for_each_set_bit(i, pd->used_pdes, I915_PDES) { - if (WARN_ON(!pd->page_table[i])) - continue; - - free_pt(vm, pd->page_table[i]); - pd->page_table[i] = NULL; + for (i = 0; i < I915_PDES; i++) { + if (pd->page_table[i] != vm->scratch_pt) + free_pt(vm, pd->page_table[i]); } } @@ -1056,7 +1027,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, int i; for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) { - if (WARN_ON(!pdp->page_directory[i])) + if (pdp->page_directory[i] == vm->scratch_pd) continue; gen8_free_page_tables(vm, pdp->page_directory[i]); @@ -1096,44 +1067,28 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) gen8_free_scratch(vm); } -/** - * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. - * @vm: Master vm structure. - * @pd: Page directory for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * - * Allocate the required number of page tables. Extremely similar to - * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by - * the page directory boundary (instead of the page directory pointer). That - * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is - * possible, and likely that the caller will need to use multiple calls of this - * function to achieve the appropriate allocation. - * - * Return: 0 if success; negative error code otherwise. - */ -static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) +static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { struct i915_page_table *pt; u64 from = start; - uint32_t pde; + unsigned int pde; gen8_for_each_pde(pt, pd, start, length, pde) { - /* Don't reallocate page tables */ - if (!test_bit(pde, pd->used_pdes)) { + if (pt == vm->scratch_pt) { pt = alloc_pt(vm); if (IS_ERR(pt)) goto unwind; gen8_initialize_pt(vm, pt); - pd->page_table[pde] = pt; - } - pt->used_ptes += gen8_pte_count(start, length); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); - } + gen8_ppgtt_set_pde(vm, pd, pt, pde); + pd->used_pdes++; + } + + pt->used_ptes += gen8_pte_count(start, length); + } return 0; unwind: @@ -1210,7 +1165,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, * caller to free on error. * * Allocate the required number of page directory pointers. Extremely similar to - * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). + * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd(). * The main difference is here we are limited by the pml4 boundary (instead of * the page directory pointer). * @@ -1262,9 +1217,8 @@ free_gen8_temp_bitmaps(unsigned long *new_pds) /* Fills in the page directory bitmap, and the array of page tables bitmap. Both * of these are based on the number of PDPEs in the system. */ -static -int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - uint32_t pdpes) +static int __must_check +alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes) { unsigned long *pds; @@ -1284,8 +1238,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned long *new_page_dirs; struct i915_page_directory *pd; - const uint64_t orig_start = start; - const uint64_t orig_length = length; uint32_t pdpe; uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); int ret; @@ -1304,51 +1256,16 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length); + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); if (ret) goto err_out; - } - start = orig_start; - length = orig_length; + if (test_and_set_bit(pdpe, pdp->used_pdpes)) + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + } /* Allocations have completed successfully, so set the bitmaps, and do * the mappings. */ - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - gen8_pde_t *const page_directory = kmap_atomic_px(pd); - struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; - - /* Every pd should be allocated, we just did that above. */ - WARN_ON(!pd); - - gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - /* Same reasoning as pd */ - WARN_ON(!pt); - WARN_ON(!pd_len); - WARN_ON(!gen8_pte_count(pd_start, pd_len)); - - /* Our pde is now pointing to the pagetable, pt */ - __set_bit(pde, pd->used_pdes); - - /* Map the PDE to the page table */ - page_directory[pde] = gen8_pde_encode(px_dma(pt), - I915_CACHE_LLC); - trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, - gen8_pte_index(start), - gen8_pte_count(start, length)); - - /* NB: We haven't yet mapped ptes to pages. At this - * point we're still relying on insert_entries() */ - } - - kunmap_atomic(page_directory); - __set_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); - } - free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return 0; @@ -1391,7 +1308,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, if (ret) goto err_out; - gen8_setup_pml4e(pml4, pdp, pml4e); + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); } bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, @@ -1440,7 +1357,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, uint32_t pte; gen8_pte_t *pt_vaddr; - if (!test_bit(pde, pd->used_pdes)) + if (pd->page_table[pde] == ppgtt->base.scratch_pt) continue; pt_vaddr = kmap_atomic_px(pt); @@ -1543,7 +1460,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->debug_dump = gen8_dump_ppgtt; @@ -1565,8 +1481,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_48b_mm_switch; ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; } else { - ret = __pdp_init(dev_priv, &ppgtt->pdp); + ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); if (ret) goto free_scratch; @@ -1583,6 +1500,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5ad5b59a01b1..a62b0ef1f3fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -226,8 +226,8 @@ struct i915_page_table { struct i915_page_directory { struct i915_page_dma base; - unsigned long *used_pdes; struct i915_page_table *page_table[I915_PDES]; /* PDEs */ + unsigned int used_pdes; }; struct i915_page_directory_pointer { @@ -520,9 +520,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) { - return test_bit(n, ppgtt->pdp.used_pdpes) ? - px_dma(ppgtt->pdp.page_directory[n]) : - px_dma(ppgtt->base.scratch_pd); + return px_dma(ppgtt->pdp.page_directory[n]); } static inline struct i915_ggtt * From e2b763caa6eb68ea56918ee6f79b40b82bdcf7c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:48 +0000 Subject: [PATCH 0235/1299] drm/i915: Remove bitmap tracking for used-pdpes We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 279 +++++++++------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- 2 files changed, 86 insertions(+), 196 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f3171d56d914..46a791b29ae5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -529,24 +529,13 @@ static void gen8_initialize_pd(struct i915_address_space *vm, static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - size_t pdpes = I915_PDPES_PER_PDP(vm->i915); - int i; - - pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), - sizeof(unsigned long), - GFP_KERNEL); - if (!pdp->used_pdpes) - return -ENOMEM; + const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915); + unsigned int i; pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), - GFP_KERNEL); - if (!pdp->page_directory) { - kfree(pdp->used_pdpes); - /* the PDP might be the statically allocated top level. Keep it - * as clean as possible */ - pdp->used_pdpes = NULL; + GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pdp->page_directory)) return -ENOMEM; - } for (i = 0; i < pdpes; i++) pdp->page_directory[i] = vm->scratch_pd; @@ -556,7 +545,6 @@ static int __pdp_init(struct i915_address_space *vm, static void __pdp_fini(struct i915_page_directory_pointer *pdp) { - kfree(pdp->used_pdpes); kfree(pdp->page_directory); pdp->page_directory = NULL; } @@ -614,23 +602,12 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, static void gen8_initialize_pml4(struct i915_address_space *vm, struct i915_pml4 *pml4) { - gen8_ppgtt_pml4e_t scratch_pml4e; + unsigned int i; - scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), - I915_CACHE_LLC); - - fill_px(vm, pml4, scratch_pml4e); -} - -static void -gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) -{ - gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4); - - pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_atomic(pagemap); + fill_px(vm, pml4, + gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) + pml4->pdps[i] = vm->scratch_pdp; } /* Broadwell Page Directory Pointer Descriptors */ @@ -783,15 +760,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, continue; gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - __clear_bit(pdpe, pdp->used_pdpes); + pdp->used_pdpes--; free_pd(vm, pd); } - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) - return true; - - return false; + return !pdp->used_pdpes; } static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, @@ -800,6 +774,19 @@ static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); } +static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + unsigned int pml4e) +{ + gen8_ppgtt_pml4e_t *vaddr; + + pml4->pdps[pml4e] = pdp; + + vaddr = kmap_atomic_px(pml4); + vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_atomic(vaddr); +} + /* Removes entries from a single pml4. * This is the top-level structure in 4-level page tables used on gen8+. * Empty entries are always scratch pml4e. @@ -810,19 +797,18 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; + unsigned int pml4e; GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (WARN_ON(!pml4->pdps[pml4e])) - break; + if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) + continue; - if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { - __clear_bit(pml4e, pml4->used_pml4es); - gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - free_pdp(vm, pdp); - } + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + __clear_bit(pml4e, pml4->used_pml4es); + + free_pdp(vm, pdp); } } @@ -1026,7 +1012,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, { int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) { + for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) { if (pdp->page_directory[i] == vm->scratch_pd) continue; @@ -1096,65 +1082,6 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, return -ENOMEM; } -/** - * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. - * @vm: Master vm structure. - * @pdp: Page directory pointer for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pds: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directories starting at the pde index of - * @start, and ending at the pde index @start + @length. This function will skip - * over already allocated page directories within the range, and only allocate - * new ones, setting the appropriate pointer within the pdp as well as the - * correct position in the bitmap @new_pds. - * - * The function will only allocate the pages within the range for a give page - * directory pointer. In other words, if @start + @length straddles a virtually - * addressed PDP boundary (512GB for 4k pages), there will be more allocations - * required by the caller, This is not currently possible, and the BUG in the - * code will prevent it. - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length, - unsigned long *new_pds) -{ - struct i915_page_directory *pd; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); - - WARN_ON(!bitmap_empty(new_pds, pdpes)); - - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (test_bit(pdpe, pdp->used_pdpes)) - continue; - - pd = alloc_pd(vm); - if (IS_ERR(pd)) - goto unwind_out; - - gen8_initialize_pd(vm, pd); - pdp->page_directory[pdpe] = pd; - __set_bit(pdpe, new_pds); - trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); - } - - return 0; - -unwind_out: - for_each_set_bit(pdpe, new_pds, pdpes) - free_pd(vm, pdp->page_directory[pdpe]); - - return -ENOMEM; -} - /** * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. * @vm: Master vm structure. @@ -1175,23 +1102,19 @@ static int gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, struct i915_pml4 *pml4, uint64_t start, - uint64_t length, - unsigned long *new_pdps) + uint64_t length) { struct i915_page_directory_pointer *pdp; uint32_t pml4e; - WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { if (!test_bit(pml4e, pml4->used_pml4es)) { pdp = alloc_pdp(vm); if (IS_ERR(pdp)) - goto unwind_out; + return PTR_ERR(pdp); gen8_initialize_pdp(vm, pdp); pml4->pdps[pml4e] = pdp; - __set_bit(pml4e, new_pdps); trace_i915_page_directory_pointer_entry_alloc(vm, pml4e, start, @@ -1200,34 +1123,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, } return 0; - -unwind_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - free_pdp(vm, pml4->pdps[pml4e]); - - return -ENOMEM; -} - -static void -free_gen8_temp_bitmaps(unsigned long *new_pds) -{ - kfree(new_pds); -} - -/* Fills in the page directory bitmap, and the array of page tables bitmap. Both - * of these are based on the number of PDPEs in the system. - */ -static int __must_check -alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes) -{ - unsigned long *pds; - - pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); - if (!pds) - return -ENOMEM; - - *new_pds = pds; - return 0; } static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, @@ -1236,47 +1131,37 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, uint64_t length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned long *new_page_dirs; struct i915_page_directory *pd; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); + u64 from = start; + unsigned int pdpe; int ret; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); - if (ret) - return ret; - - /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, - new_page_dirs); - if (ret) { - free_gen8_temp_bitmaps(new_page_dirs); - return ret; - } - - /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); - if (ret) - goto err_out; + if (pd == vm->scratch_pd) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - if (test_and_set_bit(pdpe, pdp->used_pdpes)) + gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + } + + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); + if (unlikely(ret)) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + pdp->used_pdpes--; + free_pd(vm, pd); + goto unwind; + } } - /* Allocations have completed successfully, so set the bitmaps, and do - * the mappings. */ - free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return 0; -err_out: - for_each_set_bit(pdpe, new_page_dirs, pdpes) - free_pd(vm, pdp->page_directory[pdpe]); - - free_gen8_temp_bitmaps(new_page_dirs); - mark_tlbs_dirty(ppgtt); - return ret; +unwind: + gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); + return -ENOMEM; } static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, @@ -1296,8 +1181,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, /* The pagedirectory and pagetable allocations are done in the shared 3 * and 4 level code. Just allocate the pdps. */ - ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, - new_pdps); + ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length); if (ret) return ret; @@ -1349,7 +1233,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, uint64_t pd_start = start; uint32_t pde; - if (!test_bit(pdpe, pdp->used_pdpes)) + if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); @@ -1414,31 +1298,36 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } } -static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) +static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs; - uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); - int ret; + struct i915_address_space *vm = &ppgtt->base; + struct i915_page_directory_pointer *pdp = &ppgtt->pdp; + struct i915_page_directory *pd; + u64 start = 0, length = ppgtt->base.total; + u64 from = start; + unsigned int pdpe; - /* We allocate temp bitmap for page tables for no gain - * but as this is for init only, lets keep the things simple - */ - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); - if (ret) - return ret; + gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - /* Allocate for all pdps regardless of how the ppgtt - * was defined. - */ - ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, - 0, 1ULL << 32, - new_page_dirs); - if (!ret) - *ppgtt->pdp.used_pdpes = *new_page_dirs; + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + } - free_gen8_temp_bitmaps(new_page_dirs); + pdp->used_pdpes++; /* never remove */ + return 0; - return ret; +unwind: + start -= from; + gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + free_pd(vm, pd); + } + pdp->used_pdpes = 0; + return -ENOMEM; } /* @@ -1494,9 +1383,11 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) GEN8_PML4E_SHIFT); if (intel_vgpu_active(dev_priv)) { - ret = gen8_preallocate_top_level_pdps(ppgtt); - if (ret) + ret = gen8_preallocate_top_level_pdp(ppgtt); + if (ret) { + __pdp_fini(&ppgtt->pdp); goto free_scratch; + } } ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a62b0ef1f3fc..3628c7c564ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -232,9 +232,8 @@ struct i915_page_directory { struct i915_page_directory_pointer { struct i915_page_dma base; - - unsigned long *used_pdpes; struct i915_page_directory **page_directory; + unsigned int used_pdpes; }; struct i915_pml4 { From c5d092a4293fa33559d5464c5bd58a2e3a44d6d7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:49 +0000 Subject: [PATCH 0236/1299] drm/i915: Remove bitmap tracking for used-pml4 We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 132 ++++++++-------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 - 2 files changed, 38 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 46a791b29ae5..76ec3fa0a67a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -806,7 +806,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, continue; gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - __clear_bit(pml4e, pml4->used_pml4es); free_pdp(vm, pdp); } @@ -1027,8 +1026,8 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) { int i; - for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { - if (WARN_ON(!ppgtt->pml4.pdps[i])) + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { + if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) continue; gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); @@ -1082,53 +1081,9 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, return -ENOMEM; } -/** - * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. - * @vm: Master vm structure. - * @pml4: Page map level 4 for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pdps: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directory pointers. Extremely similar to - * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd(). - * The main difference is here we are limited by the pml4 boundary (instead of - * the page directory pointer). - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) -{ - struct i915_page_directory_pointer *pdp; - uint32_t pml4e; - - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) { - pdp = alloc_pdp(vm); - if (IS_ERR(pdp)) - return PTR_ERR(pdp); - - gen8_initialize_pdp(vm, pdp); - pml4->pdps[pml4e] = pdp; - trace_i915_page_directory_pointer_entry_alloc(vm, - pml4e, - start, - GEN8_PML4E_SHIFT); - } - } - - return 0; -} - -static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; @@ -1164,58 +1119,46 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, return -ENOMEM; } -static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, + u64 start, u64 length) { - DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); + return gen8_ppgtt_alloc_pdp(vm, + &i915_vm_to_ppgtt(vm)->pdp, start, length); +} + +static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - int ret = 0; - - /* Do the pml4 allocations first, so we don't need to track the newly - * allocated tables below the pdp */ - bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); - - /* The pagedirectory and pagetable allocations are done in the shared 3 - * and 4 level code. Just allocate the pdps. - */ - ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length); - if (ret) - return ret; + u64 from = start; + u32 pml4e; + int ret; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - WARN_ON(!pdp); + if (pml4->pdps[pml4e] == vm->scratch_pdp) { + pdp = alloc_pdp(vm); + if (IS_ERR(pdp)) + goto unwind; - ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); - if (ret) - goto err_out; + gen8_initialize_pdp(vm, pdp); + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + } - gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); + if (unlikely(ret)) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); + goto unwind; + } } - bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, - GEN8_PML4ES_PER_PML4); - return 0; -err_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]); - - return ret; -} - -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (USES_FULL_48BIT_PPGTT(vm->i915)) - return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); - else - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +unwind: + gen8_ppgtt_clear_4lvl(vm, from, start - from); + return -ENOMEM; } static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, @@ -1289,7 +1232,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_page_directory_pointer *pdp; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) + if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) continue; seq_printf(m, " PML4E #%llu\n", pml4e); @@ -1348,7 +1291,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.allocate_va_range = gen8_alloc_va_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->debug_dump = gen8_dump_ppgtt; @@ -1369,6 +1311,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; } else { @@ -1390,6 +1333,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } } + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3628c7c564ae..933888725368 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -238,8 +238,6 @@ struct i915_page_directory_pointer { struct i915_pml4 { struct i915_page_dma base; - - DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; }; From ec151f31cd81cc99b957d6b528709d6ecfb25801 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:50 +0000 Subject: [PATCH 0237/1299] drm/i915: Remove superfluous posting reads after clear GGTT The barrier here is not required - we apply the barrier before the range is ever reused by the GPU instead. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 76ec3fa0a67a..234a65e8f7b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2184,7 +2184,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); - readl(gtt_base); } static void gen6_ggtt_clear_range(struct i915_address_space *vm, @@ -2209,7 +2208,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); - readl(gtt_base); } static void i915_ggtt_insert_page(struct i915_address_space *vm, @@ -2233,7 +2231,6 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); - } static void i915_ggtt_clear_range(struct i915_address_space *vm, From 75afcf72c714cd324b0be5f23916d2d501bdb9e6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:51 +0000 Subject: [PATCH 0238/1299] drm/i915: Always mark the PDP as dirty when altered We want to reload the PDP (and flush the TLB) when the addresses are changed. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 234a65e8f7b1..9db033c8e7c5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1085,7 +1085,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; u64 from = start; unsigned int pdpe; @@ -1100,6 +1099,8 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); pdp->used_pdpes++; + + mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); @@ -1111,7 +1112,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, } } - mark_tlbs_dirty(ppgtt); return 0; unwind: From 3dc523eacef1f73c71b30babad91f028e2946b7d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:52 +0000 Subject: [PATCH 0239/1299] drm/i915: Remove defunct GTT tracepoints The tracepoints are now entirely synonymous with binding and unbinding the VMA (and the tracepoints there). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 -- drivers/gpu/drm/i915/i915_trace.h | 97 ----------------------------- 2 files changed, 101 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9db033c8e7c5..0748c4406f27 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -193,7 +193,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); if (ret) return ret; @@ -1321,9 +1320,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = 1ULL << 32; ppgtt->switch_mm = gen8_legacy_mm_switch; - trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, - 0, 0, - GEN8_PML4E_SHIFT); if (intel_vgpu_active(dev_priv)) { ret = gen8_preallocate_top_level_pdp(ppgtt); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index de31c49781d3..7a547cdfc381 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -175,103 +175,6 @@ TRACE_EVENT(i915_vma_unbind, __entry->obj, __entry->offset, __entry->size, __entry->vm) ); -TRACE_EVENT(i915_va_alloc, - TP_PROTO(struct i915_vma *vma), - TP_ARGS(vma), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vma->vm; - __entry->start = vma->node.start; - __entry->end = vma->node.start + vma->node.size - 1; - ), - - TP_printk("vm=%p (%c), 0x%llx-0x%llx", - __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P', __entry->start, __entry->end) -); - -DECLARE_EVENT_CLASS(i915_px_entry, - TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift), - TP_ARGS(vm, px, start, px_shift), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, px) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->px = px; - __entry->start = start; - __entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1; - ), - - TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift), - TP_ARGS(vm, pde, start, pde_shift) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift), - TP_ARGS(vm, pdpe, start, pdpe_shift), - - TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), - TP_ARGS(vm, pml4e, start, pml4e_shift), - - TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -/* Avoid extra math because we only support two sizes. The format is defined by - * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ -#define TRACE_PT_SIZE(bits) \ - ((((bits) == 1024) ? 288 : 144) + 1) - -DECLARE_EVENT_CLASS(i915_page_table_entry_update, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count), - TP_ARGS(vm, pde, pt, first, count), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, pde) - __field(u32, first) - __field(u32, last) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->pde = pde; - __entry->first = first; - __entry->last = first + count - 1; - ), - - TP_printk("vm=%p, pde=%d, updating %u:%u", - __entry->vm, __entry->pde, __entry->last, __entry->first) -); - -DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count), - TP_ARGS(vm, pde, pt, first, count) -); - TRACE_EVENT(i915_gem_object_change_domain, TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), TP_ARGS(obj, old_read, old_write), From 998f6c00a1dcb8c63de3744c37155eefcf27a421 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:53 +0000 Subject: [PATCH 0240/1299] drm/i915: Remove unused ppgtt->enable() We never assign or use the ppgtt->enable() callback, so remove it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 933888725368..6210550e15cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -385,7 +385,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); From 381b943b07027f54083ed685368e261c1089ce53 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:54 +0000 Subject: [PATCH 0241/1299] drm/i915: Remove i915_address_space.start Once upon a time, back in the UMS days, we supported userspace initialising the GTT and sharing portions of the GTT with other users. Now, we own the GTT (both global and per-process) and the tables always start at 0 - so we can remove i915_address_space.start and forget about this old complication. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-20-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 ++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/i915_vgpu.c | 9 ++++----- 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0de0596d41ac..20b3d1328a01 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -476,7 +476,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", - ggtt->base.total, ggtt->mappable_end - ggtt->base.start); + ggtt->base.total, ggtt->mappable_end); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index ca25b1f7f6d1..a0de5734f7d0 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -274,7 +274,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, check_color = vm->mm.color_adjust; if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ - if (start > vm->start) + if (start) start -= I915_GTT_PAGE_SIZE; /* Always look at the page afterwards to avoid the end-of-GTT */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0748c4406f27..ebd87c337394 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1218,10 +1218,9 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; - uint64_t start = ppgtt->base.start; - uint64_t length = ppgtt->base.total; const gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + u64 start = 0, length = ppgtt->base.total; if (!USES_FULL_48BIT_PPGTT(vm->i915)) { gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); @@ -1288,7 +1287,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; @@ -1349,9 +1347,8 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; struct i915_page_table *unused; gen6_pte_t scratch_pte; - uint32_t pd_entry; - uint32_t pte, pde; - uint32_t start = ppgtt->base.start, length = ppgtt->base.total; + u32 pd_entry, pte, pde; + u32 start = 0, length = ppgtt->base.total; scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); @@ -1785,7 +1782,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->base.cleanup = gen6_ppgtt_cleanup; - ppgtt->base.start = 0; ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; @@ -1826,7 +1822,7 @@ static void i915_address_space_init(struct i915_address_space *vm, { i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, vm->start, vm->total); + drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; INIT_LIST_HEAD(&vm->active_list); @@ -2012,7 +2008,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); i915_ggtt_invalidate(dev_priv); } @@ -2980,7 +2976,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); /* First fill our portion of the GTT with scratch pages */ - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6210550e15cf..e85ff6c97208 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -256,7 +256,6 @@ struct i915_address_space { */ struct drm_i915_file_private *file; struct list_head global_link; - u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index d0abfd08a01c..14014068dfcf 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -179,7 +179,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, int intel_vgt_balloon(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long ggtt_end = ggtt->base.start + ggtt->base.total; + unsigned long ggtt_end = ggtt->base.total; unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; @@ -202,8 +202,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) DRM_INFO("Unmappable graphic memory: base 0x%lx size %ldKiB\n", unmappable_base, unmappable_size / 1024); - if (mappable_base < ggtt->base.start || - mappable_end > ggtt->mappable_end || + if (mappable_end > ggtt->mappable_end || unmappable_base < ggtt->mappable_end || unmappable_end > ggtt_end) { DRM_ERROR("Invalid ballooning configuration!\n"); @@ -231,9 +230,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) } /* Mappable graphic memory ballooning */ - if (mappable_base > ggtt->base.start) { + if (mappable_base) { ret = vgt_balloon_space(ggtt, &bl_info.space[0], - ggtt->base.start, mappable_base); + 0, mappable_base); if (ret) goto err; From e565ceb086d70e577e41315b921014dcfb991a6b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:55 +0000 Subject: [PATCH 0242/1299] drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT As the aliasing GTT is only accessed via the global GTT, we will never use more of it than we expose via the Global GTT and so we only need to preallocate sufficient space within the ppgtt for the full GTT. Equally, if the aliasing GTT is smaller than the global GTT, we have a serious issue and must bail. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ebd87c337394..7fbb10804319 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2384,9 +2384,19 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); + if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { + err = -ENODEV; + goto err_ppgtt; + } + if (ppgtt->base.allocate_va_range) { + /* Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ err = ppgtt->base.allocate_va_range(&ppgtt->base, - 0, ppgtt->base.total); + 0, ggtt->base.total); if (err) goto err_ppgtt; } From 57202f47af85381f31f317a2a75fe76ea4c76925 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:56 +0000 Subject: [PATCH 0243/1299] drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Use an invalid filp so that the aliasing_ppgtt can be clearly identified. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-22-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7fbb10804319..84b50d636688 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2380,7 +2380,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) struct i915_hw_ppgtt *ppgtt; int err; - ppgtt = i915_ppgtt_create(i915, NULL, "[alias]"); + ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); From 75c7b0b8620ce5a672da02e721a775acb7863c64 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:57 +0000 Subject: [PATCH 0244/1299] drm/i915: Use preferred kernel types in i915_gem_gtt.c Make checkpatch happy and make the use of u32/u64 consistent throughout i915_gem_gtt.[ch] Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-23-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 83 +++++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 56 ++++++++++--------- 2 files changed, 69 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 84b50d636688..47a38272f54c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -888,7 +888,7 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, + u64 start, enum i915_cache_level cache_level, u32 unused) { @@ -1162,25 +1162,25 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, - uint64_t start, uint64_t length, + u64 start, u64 length, gen8_pte_t scratch_pte, struct seq_file *m) { struct i915_page_directory *pd; - uint32_t pdpe; + u32 pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; + u64 pd_len = length; + u64 pd_start = start; + u32 pde; if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - uint32_t pte; + u32 pte; gen8_pte_t *pt_vaddr; if (pd->page_table[pde] == ppgtt->base.scratch_pt) @@ -1188,10 +1188,9 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, pt_vaddr = kmap_atomic_px(pt); for (pte = 0; pte < GEN8_PTES; pte += 4) { - uint64_t va = - (pdpe << GEN8_PDPE_SHIFT) | - (pde << GEN8_PDE_SHIFT) | - (pte << GEN8_PTE_SHIFT); + u64 va = (pdpe << GEN8_PDPE_SHIFT | + pde << GEN8_PDE_SHIFT | + pte << GEN8_PTE_SHIFT); int i; bool found = false; @@ -1225,7 +1224,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) if (!USES_FULL_48BIT_PPGTT(vm->i915)) { gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } else { - uint64_t pml4e; + u64 pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; @@ -1407,7 +1406,7 @@ static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, - uint32_t start, uint32_t length) + u32 start, u32 length) { struct i915_page_table *pt; unsigned int pde; @@ -1419,7 +1418,7 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, wmb(); } -static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) { GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); return ppgtt->pd.base.ggtt_offset << 10; @@ -1513,7 +1512,7 @@ static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - uint32_t ecochk, ecobits; + u32 ecochk, ecobits; enum intel_engine_id id; ecobits = I915_READ(GAC_ECO_BITS); @@ -1537,7 +1536,7 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) { - uint32_t ecochk, gab_ctl, ecobits; + u32 ecochk, gab_ctl, ecobits; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -1589,8 +1588,9 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 flags) + u64 start, + enum i915_cache_level cache_level, + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -1690,7 +1690,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd = &ppgtt->pd; struct i915_page_table *pt; - uint32_t pde; + u32 pde; drm_mm_remove_node(&ppgtt->node); @@ -1748,10 +1748,10 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) } static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_page_table *unused; - uint32_t pde; + u32 pde; gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; @@ -2045,7 +2045,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 unused) { @@ -2060,8 +2060,9 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 unused) + u64 start, + enum i915_cache_level level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; @@ -2086,7 +2087,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct insert_entries { struct i915_address_space *vm; struct sg_table *st; - uint64_t start; + u64 start; enum i915_cache_level level; u32 flags; }; @@ -2101,7 +2102,7 @@ static int gen8_ggtt_insert_entries__cb(void *_arg) static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, + u64 start, enum i915_cache_level level, u32 flags) { @@ -2111,7 +2112,7 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 flags) { @@ -2132,8 +2133,9 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 flags) + u64 start, + enum i915_cache_level level, + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; @@ -2152,12 +2154,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, } static void nop_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { } static void gen8_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -2179,8 +2181,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, } static void gen6_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -2204,7 +2205,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, static void i915_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 unused) { @@ -2216,8 +2217,9 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm, static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 unused) + u64 start, + enum i915_cache_level cache_level, + u32 unused) { unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; @@ -2226,8 +2228,7 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, } static void i915_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } @@ -2641,7 +2642,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) * writing this data shouldn't be harmful even in those cases. */ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ @@ -2676,7 +2677,7 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; /* * Map WB on BDW to snooped on CHV. @@ -3073,7 +3074,7 @@ static noinline struct sg_table * intel_rotate_pages(struct intel_rotation_info *rot_info, struct drm_i915_gem_object *obj) { - const size_t n_pages = obj->base.size / PAGE_SIZE; + const unsigned long n_pages = obj->base.size / PAGE_SIZE; unsigned int size = intel_rotation_info_size(rot_info); struct sgt_iter sgt_iter; dma_addr_t dma_addr; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e85ff6c97208..f7d4e194a227 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -53,11 +53,11 @@ struct drm_i915_file_private; struct drm_i915_fence_reg; -typedef uint32_t gen6_pte_t; -typedef uint64_t gen8_pte_t; -typedef uint64_t gen8_pde_t; -typedef uint64_t gen8_ppgtt_pdpe_t; -typedef uint64_t gen8_ppgtt_pml4e_t; +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; +typedef u64 gen8_pde_t; +typedef u64 gen8_ppgtt_pdpe_t; +typedef u64 gen8_ppgtt_pml4e_t; #define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT) @@ -143,7 +143,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_WC (1<<0) #define GEN8_PPAT_UC (0<<0) #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) struct sg_table; @@ -210,7 +210,7 @@ struct i915_page_dma { /* For gen6/gen7 only. This is the offset in the GGTT * where the page directory entries for PPGTT begin */ - uint32_t ggtt_offset; + u32 ggtt_offset; }; }; @@ -305,20 +305,19 @@ struct i915_address_space { /* flags for pte_encode */ #define PTE_READ_ONLY (1<<0) int (*allocate_va_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 flags); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level cache_level, u32 flags); + u64 start, + enum i915_cache_level cache_level, + u32 flags); void (*cleanup)(struct i915_address_space *vm); /** Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. */ @@ -411,9 +410,9 @@ struct i915_hw_ppgtt { (pt = (pd)->page_table[iter], true); \ ++iter) -static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) { - const uint32_t mask = NUM_PTE(pde_shift) - 1; + const u32 mask = NUM_PTE(pde_shift) - 1; return (address >> PAGE_SHIFT) & mask; } @@ -422,11 +421,10 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) * does not cross a page table boundary, so the max value would be * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. */ -static inline uint32_t i915_pte_count(uint64_t addr, size_t length, - uint32_t pde_shift) +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) { - const uint64_t mask = ~((1ULL << pde_shift) - 1); - uint64_t end; + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; WARN_ON(length == 0); WARN_ON(offset_in_page(addr|length)); @@ -439,22 +437,22 @@ static inline uint32_t i915_pte_count(uint64_t addr, size_t length, return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); } -static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift) +static inline u32 i915_pde_index(u64 addr, u32 shift) { return (addr >> shift) & I915_PDE_MASK; } -static inline uint32_t gen6_pte_index(uint32_t addr) +static inline u32 gen6_pte_index(u32 addr) { return i915_pte_index(addr, GEN6_PDE_SHIFT); } -static inline size_t gen6_pte_count(uint32_t addr, uint32_t length) +static inline u32 gen6_pte_count(u32 addr, u32 length) { return i915_pte_count(addr, length, GEN6_PDE_SHIFT); } -static inline uint32_t gen6_pde_index(uint32_t addr) +static inline u32 gen6_pde_index(u32 addr) { return i915_pde_index(addr, GEN6_PDE_SHIFT); } @@ -487,27 +485,27 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) -static inline uint32_t gen8_pte_index(uint64_t address) +static inline u32 gen8_pte_index(u64 address) { return i915_pte_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pde_index(uint64_t address) +static inline u32 gen8_pde_index(u64 address) { return i915_pde_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pdpe_index(uint64_t address) +static inline u32 gen8_pdpe_index(u64 address) { return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; } -static inline uint32_t gen8_pml4e_index(uint64_t address) +static inline u32 gen8_pml4e_index(u64 address) { return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } -static inline size_t gen8_pte_count(uint64_t address, uint64_t length) +static inline u64 gen8_pte_count(u64 address, u64 length) { return i915_pte_count(address, length, GEN8_PDE_SHIFT); } From d7e8ef02a6b3d11f0c3ad99c0f656341151c50b9 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 7 Feb 2017 16:54:11 -0800 Subject: [PATCH 0245/1299] drm/i915/dp: Reset the link params on HPD/connected boot/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The max link parameters should be set/reset only on HPD or connected boot case or on system resume. Add a flag reset_link_params to intel_dp to decide when to reset the max link parameters. This prevents the parameters from getting reset/overwritten through all other connector->funcs->detect() calls. This is important when link training fails and the max link params are modified to the lower fallback values. Cc: Ville Syrjala Reviewed-by: Ville Syrjälä Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486515251-23469-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 16 ++++++++++++---- drivers/gpu/drm/i915/intel_drv.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6ef4f3810082..024798a9c016 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4624,11 +4624,15 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(intel_dp_source_supports_hbr2(intel_dp)), yesno(drm_dp_tps3_supported(intel_dp->dpcd))); - /* Set the max lane count for sink */ - intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + if (intel_dp->reset_link_params) { + /* Set the max lane count for sink */ + intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); - /* Set the max link BW for sink */ - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + /* Set the max link BW for sink */ + intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + + intel_dp->reset_link_params = false; + } intel_dp_print_rates(intel_dp); @@ -5010,6 +5014,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) if (lspcon->active) lspcon_resume(lspcon); + intel_dp->reset_link_params = true; + pps_lock(intel_dp); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -5079,6 +5085,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) long_hpd ? "long" : "short"); if (long_hpd) { + intel_dp->reset_link_params = true; intel_dp->detect_done = false; return IRQ_NONE; } @@ -5947,6 +5954,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dig_port->max_lanes, port_name(port))) return false; + intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7845d40f203e..f1cbeae5cf88 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -933,6 +933,7 @@ struct intel_dp { bool has_audio; bool detect_done; bool channel_eq_status; + bool reset_link_params; enum hdmi_force_audio force_audio; bool limited_color_range; bool color_range_auto; From 645a2f6e03ed4a998da4b3642d80dd25d371b6b1 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:50 +0530 Subject: [PATCH 0246/1299] drm/i915: Check for platform specific GPIO config Panel GPIO control should be done based on platform. Add a check to restrict VLV and CHT specific GPIO confirguration, so that they dont apply to other platforms. The VBT spec fails to mention the PMIC backlight control option is valid only for VLV/CHT, and the field may be set to "PMIC" for BXT even if PMIC is not desired or possible. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas [Jani: amended commit message a bit and fixed indentation.] Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-2-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index c98234eca2a6..c3d97e0bc59d 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1584,7 +1584,8 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) * In case of BYT with CRC PMIC, we need to use GPIO for * Panel control. */ - if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) { intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH); From 06a20d2d2d4187b9bc1a4c2f62e989a97a086a76 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:51 +0530 Subject: [PATCH 0247/1299] drm/i915: Fix PLL 8x/3 divider for MIPI video mode MIPI Video Mode for high res panels (requiring dual link), need a 8X/3 divider to be programmed as 0x2. Modifying the same in this patch. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-3-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi_pll.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 61440e5c2563..3a7308681360 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -416,11 +416,7 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, rx_div_lower = rx_div & RX_DIVIDER_BIT_1_2; rx_div_upper = (rx_div & RX_DIVIDER_BIT_3_4) >> 2; - /* As per bpsec program the 8/3X clock divider to the below value */ - if (dev_priv->vbt.dsi.config->is_cmd_mode) - mipi_8by3_divider = 0x2; - else - mipi_8by3_divider = 0x3; + mipi_8by3_divider = 0x2; tmp |= BXT_MIPI_8X_BY3_DIVIDER(port, mipi_8by3_divider); tmp |= BXT_MIPI_TX_ESCLK_DIVIDER(port, tx_div); From e3dc847a5f85b43ee2bfc8eae407a7e383483228 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 15 Feb 2017 11:38:58 +0100 Subject: [PATCH 0248/1299] vti6: Don't report path MTU below IPV6_MIN_MTU. In vti6_xmit(), the check for IPV6_MIN_MTU before we send a ICMPV6_PKT_TOOBIG message is missing. So we might report a PMTU below 1280. Fix this by adding the required check. Fixes: ccd740cbc6e ("vti6: Add pmtu handling to vti6_xmit.") Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index d82042c8d8fd..9156d6b9eb24 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -484,11 +484,15 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!skb->ignore_df && skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); - if (skb->protocol == htons(ETH_P_IPV6)) + if (skb->protocol == htons(ETH_P_IPV6)) { + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - else + } else { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + } return -EMSGSIZE; } From 8aeaf64c96a8698f05664a8726337855a4d4fdf2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Feb 2017 17:21:37 +0200 Subject: [PATCH 0249/1299] drm/i915/bxt: apply clock gating workaround to all revisions No need to cater for old A revisions. Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487172099-24873-2-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3d311e100643..fe243c65de1a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -99,9 +99,8 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) * Wa: Backlight PWM may stop in the asserted state, causing backlight * to stay fully on. */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | - PWM1_GATING_DIS | PWM2_GATING_DIS); + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) From 362f8b9edb1066b12a9bfa14e72d0ccb66fc742b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Feb 2017 17:21:38 +0200 Subject: [PATCH 0250/1299] drm/i915/bxt: remove snooping workaround on old A revisions No need to cater for old A revisions. Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487172099-24873-3-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 0891cc0e8626..2e1fd857e625 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -410,10 +410,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->has_snoop = !info->has_llc; - /* Snooping is broken on BXT A stepping. */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - info->has_snoop = false; - DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", From f2254d293790a030ccf12ff4bb0d77b679be3f4d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Feb 2017 17:21:39 +0200 Subject: [PATCH 0251/1299] drm/i915/bxt: remove WaRsDisableCoarsePowerGating for early BXT No need to cater for old A revisions. Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487172099-24873-4-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f582f5492e37..2a7b277a86de 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2827,9 +2827,7 @@ intel_info(const struct drm_i915_private *dev_priv) /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) || \ - IS_SKL_GT3(dev_priv) || \ - IS_SKL_GT4(dev_priv)) + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts From 262fd485ac6b476479f41f00bb104f6a1766ae66 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 13:15:47 +0000 Subject: [PATCH 0252/1299] drm/i915: Only enable hotplug interrupts if the display interrupts are enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to prevent accessing the hpd registers outside of the display power wells, we should refrain from writing to the registers before the display interrupts are enabled. [ 4.740136] WARNING: CPU: 1 PID: 221 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x44/0x50 [i915] [ 4.740155] Unclaimed read from register 0x1e1110 [ 4.740168] Modules linked in: i915(+) intel_gtt drm_kms_helper prime_numbers [ 4.740190] CPU: 1 PID: 221 Comm: systemd-udevd Not tainted 4.10.0-rc6+ #384 [ 4.740203] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 4.740220] Call Trace: [ 4.740236] dump_stack+0x4d/0x6f [ 4.740251] __warn+0xc1/0xe0 [ 4.740265] warn_slowpath_fmt+0x4a/0x50 [ 4.740281] ? insert_work+0x77/0xc0 [ 4.740355] ? fwtable_write32+0x90/0x130 [i915] [ 4.740431] __unclaimed_reg_debug+0x44/0x50 [i915] [ 4.740507] fwtable_read32+0xd8/0x130 [i915] [ 4.740575] i915_hpd_irq_setup+0xa5/0x100 [i915] [ 4.740649] intel_hpd_init+0x68/0x80 [i915] [ 4.740716] i915_driver_load+0xe19/0x1380 [i915] [ 4.740784] i915_pci_probe+0x32/0x90 [i915] [ 4.740799] pci_device_probe+0x8b/0xf0 [ 4.740815] driver_probe_device+0x2b6/0x450 [ 4.740828] __driver_attach+0xda/0xe0 [ 4.740841] ? driver_probe_device+0x450/0x450 [ 4.740853] bus_for_each_dev+0x5b/0x90 [ 4.740865] driver_attach+0x19/0x20 [ 4.740878] bus_add_driver+0x166/0x260 [ 4.740892] driver_register+0x5b/0xd0 [ 4.740906] ? 0xffffffffa0166000 [ 4.740920] __pci_register_driver+0x47/0x50 [ 4.740985] i915_init+0x5c/0x5e [i915] [ 4.740999] do_one_initcall+0x3e/0x160 [ 4.741015] ? __vunmap+0x7c/0xc0 [ 4.741029] ? kmem_cache_alloc+0xcf/0x120 [ 4.741045] do_init_module+0x55/0x1c4 [ 4.741060] load_module+0x1f3f/0x25b0 [ 4.741073] ? __symbol_put+0x40/0x40 [ 4.741086] ? kernel_read_file+0x100/0x190 [ 4.741100] SYSC_finit_module+0xbc/0xf0 [ 4.741112] SyS_finit_module+0x9/0x10 [ 4.741125] entry_SYSCALL_64_fastpath+0x17/0x98 [ 4.741135] RIP: 0033:0x7f8559a140f9 [ 4.741145] RSP: 002b:00007fff7509a3e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 4.741161] RAX: ffffffffffffffda RBX: 00007f855aba02d1 RCX: 00007f8559a140f9 [ 4.741172] RDX: 0000000000000000 RSI: 000055b6db0914f0 RDI: 0000000000000011 [ 4.741183] RBP: 0000000000020000 R08: 0000000000000000 R09: 000000000000000e [ 4.741193] R10: 0000000000000011 R11: 0000000000000246 R12: 000055b6db0854d0 [ 4.741204] R13: 000055b6db091150 R14: 0000000000000000 R15: 000055b6db035924 v2: Set dev_priv->display_irqs_enabled to true for all platforms other than vlv/chv that manually control the display power domain. Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97798 Suggested-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Lyude Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Hans de Goede Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170215131547.5064-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_hotplug.c | 14 ++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 84fda3fce4b8..0d8ebd147da6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4269,6 +4269,16 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (!IS_GEN2(dev_priv)) dev->vblank_disable_immediate = true; + /* Most platforms treat the display irq block as an always-on + * power domain. vlv/chv can disable it at runtime and need + * special care to avoid writing any of the display block registers + * outside of the power domain. We defer setting up the display irqs + * in this case to the runtime pm. + */ + dev_priv->display_irqs_enabled = true; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display_irqs_enabled = false; + dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD; dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 6a9c16508ab5..0756bdc672b3 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -224,7 +224,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) } } } - if (dev_priv->display.hpd_irq_setup) + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); @@ -430,7 +430,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, } } - if (storm_detected) + if (storm_detected && dev_priv->display_irqs_enabled) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock(&dev_priv->irq_lock); @@ -476,10 +476,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) * Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked checks happy. */ - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) { + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + } } static void i915_hpd_poll_init_work(struct work_struct *work) From 3582ad1361644ba0f937cf6aa59d5c9ead6763ca Mon Sep 17 00:00:00 2001 From: "sagar.a.kamble@intel.com" Date: Fri, 3 Feb 2017 13:58:33 +0530 Subject: [PATCH 0253/1299] drm/i915: Do RPM Wake during GuC/HuC status read HUC_STATUS, GUC_STATUS, SOFT_SCRATCH registers are read in debugfs and getparam ioctl. This patch covers those accesses by RPM get/put. v2: Covering access in i915_getparam(I915_PARAM_HUC_STATUS) (ChrisW) Cc: Arkadiusz Hiler Cc: Anusha Srivatsa Cc: Fiedorowicz, Lukasz Signed-off-by: Sagar Arun Kamble Reviewed-by: Arkadiusz Hiler Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1486110513-12130-1-git-send-email-sagar.a.kamble@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++ drivers/gpu/drm/i915/i915_drv.c | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 20b3d1328a01..11f02fb1f335 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2412,7 +2412,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", huc_fw->rsa_offset, huc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); + intel_runtime_pm_put(dev_priv); return 0; } @@ -2444,6 +2446,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", guc_fw->rsa_offset, guc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); + tmp = I915_READ(GUC_STATUS); seq_printf(m, "\nGuC status 0x%08x:\n", tmp); @@ -2457,6 +2461,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) for (i = 0; i < 16; i++) seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); + intel_runtime_pm_put(dev_priv); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 50de00921cc9..c992d406ae74 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -319,10 +319,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - /* The register is already force-woken. We dont need - * any rpm here - */ + intel_runtime_pm_get(dev_priv); value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + intel_runtime_pm_put(dev_priv); break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all From 6043801f937ada9c9ed9dfa3c6ce542a79643401 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Tue, 14 Feb 2017 18:46:16 +0530 Subject: [PATCH 0254/1299] drm/i915: Set the Z inversion overlap field Dual link Z-inversion overlap field is present in MIPI_CTRL register unlike the older platforms, hence setting the same in this patch. Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487078180-15147-5-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index c3d97e0bc59d..99dd7e12abc0 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -456,12 +456,21 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder) if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { u32 temp; - - temp = I915_READ(VLV_CHICKEN_3); - temp &= ~PIXEL_OVERLAP_CNT_MASK | + if (IS_GEN9_LP(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + temp = I915_READ(MIPI_CTRL(port)); + temp &= ~BXT_PIXEL_OVERLAP_CNT_MASK | + intel_dsi->pixel_overlap << + BXT_PIXEL_OVERLAP_CNT_SHIFT; + I915_WRITE(MIPI_CTRL(port), temp); + } + } else { + temp = I915_READ(VLV_CHICKEN_3); + temp &= ~PIXEL_OVERLAP_CNT_MASK | intel_dsi->pixel_overlap << PIXEL_OVERLAP_CNT_SHIFT; - I915_WRITE(VLV_CHICKEN_3, temp); + I915_WRITE(VLV_CHICKEN_3, temp); + } } for_each_dsi_port(port, intel_dsi->ports) { From eba4daf0dc5861703000f58b1d51110ced2b2fb5 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:54 +0530 Subject: [PATCH 0255/1299] drm/i915/bxt: Fix BXT DSI ULPS sequence Fix the Sequence to program BXT DSI Latch and ULPS. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-6-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 99dd7e12abc0..e7e823d00444 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -366,32 +366,19 @@ static void bxt_dsi_device_ready(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - /* Exit Low power state in 4 steps*/ + /* Enable MIPI PHY transparent latch */ for_each_dsi_port(port, intel_dsi->ports) { - - /* 1. Enable MIPI PHY transparent latch */ val = I915_READ(BXT_MIPI_PORT_CTRL(port)); I915_WRITE(BXT_MIPI_PORT_CTRL(port), val | LP_OUTPUT_HOLD); usleep_range(2000, 2500); + } - /* 2. Enter ULPS */ + /* Clear ULPS and set device ready */ + for_each_dsi_port(port, intel_dsi->ports) { val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_ENTER | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - /* at least 2us - relaxed for hrtimer subsystem optimization */ - usleep_range(10, 50); - - /* 3. Exit ULPS */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_EXIT | DEVICE_READY); - I915_WRITE(MIPI_DEVICE_READY(port), val); - usleep_range(1000, 1500); - - /* Clear ULPS and set device ready */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; + usleep_range(2000, 2500); val |= DEVICE_READY; I915_WRITE(MIPI_DEVICE_READY(port), val); } From bbdf0b2ff32aa75c7bd167569130e9391d2e6282 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:56 +0530 Subject: [PATCH 0256/1299] drm/i915/bxt: Disable device ready before shutdown command Disable device ready before MIPI port shutdown command. This helps to avoid mipi split screen issues. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-8-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index e7e823d00444..c26fe4fc0819 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -612,6 +612,8 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { + struct drm_device *dev = encoder->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -619,6 +621,15 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, intel_panel_disable_backlight(intel_dsi->attached_connector); + /* + * Disable Device ready before the port shutdown in order + * to avoid split screen + */ + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_DEVICE_READY(port), 0); + } + if (is_vid_mode(intel_dsi)) { /* Send Shutdown command to the panel in LP mode */ for_each_dsi_port(port, intel_dsi->ports) From c1d2061b28c2aa25ec39b60d9c248e6beebd7315 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Feb 2017 12:54:41 +0000 Subject: [PATCH 0257/1299] drm/i915: Squelch any ktime/jiffie rounding errors for wait-ioctl We wait upon jiffies, but report the time elapsed using a high-resolution timer. This discrepancy can lead to us timing out the wait prior to us reporting the elapsed time as complete. This restores the squelching lost in commit e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers"). Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170216125441.30923-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 71297920fdf4..e6b8a4f56bbe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3072,6 +3072,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout_ns < 0) args->timeout_ns = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regression from the timespec->ktime conversion. + */ + if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) + args->timeout_ns = 0; } i915_gem_object_put(obj); From 581ab1fe520b05447b4869fb7e3136c3a600abad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 16:39:00 +0000 Subject: [PATCH 0258/1299] drm/i915: Unwind conversion to i915_gem_phys_ops on failure The physical object is treated as permanently pinned. If we fail to take this initial pin during i915_gem_object_attach_phys() we need to revert it back to an ordinary shmemfs object before reporting the failure. v2: git-add Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215163900.11606-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e6b8a4f56bbe..5f7b8c88eb7e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -313,6 +313,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +static const struct drm_i915_gem_object_ops i915_gem_object_ops; + int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -586,9 +588,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->mm.pages) return -EBUSY; + GEM_BUG_ON(obj->ops != &i915_gem_object_ops); obj->ops = &i915_gem_phys_ops; - return i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_xfer; + + return 0; + +err_xfer: + obj->ops = &i915_gem_object_ops; + return ret; } static int From 70001cd256548391add796cce40bd2e2394d4297 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Feb 2017 09:46:21 +0000 Subject: [PATCH 0259/1299] drm/i915: Remove struct_mutex for destroying framebuffers We do not need to hold struct_mutex for destroying drm_i915_gem_objects any longer, and with a little care taken over tracking obj->framebuffer_references, we can relinquish BKL locking around the destroy of intel_framebuffer. v2: Use atomic check for WARN_ON framebuffer miscounting Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170216094621.3426-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 9 ++++----- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 02365a5f7c80..ef4893a4f08c 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -165,7 +165,7 @@ struct drm_i915_gem_object { struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; + atomic_t framebuffer_references; /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index f249a1eb46ac..8bc515e8b2a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!(flags & I915_SHRINK_ACTIVE) && (i915_gem_object_is_active(obj) || - obj->framebuffer_references)) + atomic_read(&obj->framebuffer_references))) continue; if (!can_release_pages(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 8b2b507bdf7e..46ade36dcee6 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -238,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (obj->framebuffer_references) + if (atomic_read(&obj->framebuffer_references)) return -EBUSY; /* We need to rebind the object if its current allocation diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 660581fb329a..e483540d2439 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14265,14 +14265,13 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) { - struct drm_device *dev = fb->dev; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); drm_framebuffer_cleanup(fb); - mutex_lock(&dev->struct_mutex); - WARN_ON(!intel_fb->obj->framebuffer_references--); + + WARN_ON(atomic_dec_return(&intel_fb->obj->framebuffer_references) < 0); i915_gem_object_put(intel_fb->obj); - mutex_unlock(&dev->struct_mutex); + kfree(intel_fb); } @@ -14509,7 +14508,7 @@ static int intel_framebuffer_init(struct drm_device *dev, return ret; } - intel_fb->obj->framebuffer_references++; + atomic_inc(&intel_fb->obj->framebuffer_references); return 0; } From 24dbf51a5517b79de9585dd216488aed09fd5ee8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 10:59:18 +0000 Subject: [PATCH 0260/1299] drm/i915: struct_mutex is not required for allocating the framebuffer We do not need the BKL struct_mutex in order to allocate a GEM object, nor to create the framebuffer, so resist the temptation to take the BKL willy nilly. As this changes the locking contract around internal API calls, the patch is a little larger than a plain removal of a pair of mutex_lock/unlock. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170215105919.7347-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_display.c | 109 ++++++++++++--------------- drivers/gpu/drm/i915/intel_drv.h | 7 +- drivers/gpu/drm/i915/intel_fbdev.c | 21 +++--- 3 files changed, 59 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e483540d2439..5443ba8430da 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -96,10 +96,9 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, static void ironlake_pch_clock_get(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *ifb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +static int intel_framebuffer_init(struct intel_framebuffer *ifb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc); static void intel_set_pipe_timings(struct intel_crtc *intel_crtc); static void intel_set_pipe_src_size(struct intel_crtc *intel_crtc); @@ -2052,11 +2051,13 @@ static void intel_tile_dims(const struct drm_i915_private *dev_priv, } unsigned int -intel_fb_align_height(struct drm_device *dev, unsigned int height, - uint32_t pixel_format, uint64_t fb_modifier) +intel_fb_align_height(struct drm_i915_private *dev_priv, + unsigned int height, + uint32_t pixel_format, + uint64_t fb_modifier) { unsigned int cpp = drm_format_plane_cpp(pixel_format, 0); - unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp); + unsigned int tile_height = intel_tile_height(dev_priv, fb_modifier, cpp); return ALIGN(height, tile_height); } @@ -2622,15 +2623,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; mutex_lock(&dev->struct_mutex); - obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - if (!obj) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); + if (!obj) return false; - } if (plane_config->tiling == I915_TILING_X) obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; @@ -2642,20 +2641,17 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, mode_cmd.modifier[0] = fb->modifier; mode_cmd.flags = DRM_MODE_FB_MODIFIERS; - if (intel_framebuffer_init(dev, to_intel_framebuffer(fb), - &mode_cmd, obj)) { + if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) { DRM_DEBUG_KMS("intel fb init failed\n"); goto out_unref_obj; } - mutex_unlock(&dev->struct_mutex); DRM_DEBUG_KMS("initial plane fb obj %p\n", obj); return true; out_unref_obj: i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); return false; } @@ -7434,7 +7430,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -8475,7 +8472,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->format->format); fb->pitches[0] = (val & 0x3ff) * stride_mult; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -8573,7 +8571,8 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -9399,9 +9398,8 @@ static struct drm_display_mode load_detect_mode = { }; struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_framebuffer *intel_fb; int ret; @@ -9410,7 +9408,7 @@ __intel_framebuffer_create(struct drm_device *dev, if (!intel_fb) return ERR_PTR(-ENOMEM); - ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj); + ret = intel_framebuffer_init(intel_fb, obj, mode_cmd); if (ret) goto err; @@ -9421,23 +9419,6 @@ __intel_framebuffer_create(struct drm_device *dev, return ERR_PTR(ret); } -static struct drm_framebuffer * -intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) -{ - struct drm_framebuffer *fb; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - fb = __intel_framebuffer_create(dev, mode_cmd, obj); - mutex_unlock(&dev->struct_mutex); - - return fb; -} - static u32 intel_framebuffer_pitch_for_width(int width, int bpp) { @@ -9472,7 +9453,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, bpp); mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -14319,7 +14300,7 @@ static u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format) { - u32 gen = INTEL_INFO(dev_priv)->gen; + u32 gen = INTEL_GEN(dev_priv); if (gen >= 9) { int cpp = drm_format_plane_cpp(pixel_format, 0); @@ -14346,18 +14327,17 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, } } -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *intel_fb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); unsigned int tiling = i915_gem_object_get_tiling(obj); - int ret; u32 pitch_limit, stride_alignment; struct drm_format_name_buf format_name; + int ret = -EINVAL; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + atomic_inc(&obj->framebuffer_references); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* @@ -14367,14 +14347,14 @@ static int intel_framebuffer_init(struct drm_device *dev, if (tiling != I915_TILING_NONE && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); - return -EINVAL; + goto err; } } else { if (tiling == I915_TILING_X) { mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; } else if (tiling == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); - return -EINVAL; + goto err; } } @@ -14385,7 +14365,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_GEN(dev_priv) < 9) { DRM_DEBUG("Unsupported tiling 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } case DRM_FORMAT_MOD_NONE: case I915_FORMAT_MOD_X_TILED: @@ -14393,7 +14373,7 @@ static int intel_framebuffer_init(struct drm_device *dev, default: DRM_DEBUG("Unsupported fb modifier 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } /* @@ -14412,7 +14392,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->pitches[0] & (stride_alignment - 1)) { DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n", mode_cmd->pitches[0], stride_alignment); - return -EINVAL; + goto err; } pitch_limit = intel_fb_pitch_limit(dev_priv, mode_cmd->modifier[0], @@ -14422,7 +14402,7 @@ static int intel_framebuffer_init(struct drm_device *dev, mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ? "tiled" : "linear", mode_cmd->pitches[0], pitch_limit); - return -EINVAL; + goto err; } /* @@ -14434,7 +14414,7 @@ static int intel_framebuffer_init(struct drm_device *dev, DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", mode_cmd->pitches[0], i915_gem_object_get_stride(obj)); - return -EINVAL; + goto err; } /* Reject formats not supported by any plane early. */ @@ -14493,24 +14473,29 @@ static int intel_framebuffer_init(struct drm_device *dev, /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ if (mode_cmd->offsets[0] != 0) - return -EINVAL; + goto err; - drm_helper_mode_fill_fb_struct(dev, &intel_fb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(&dev_priv->drm, + &intel_fb->base, mode_cmd); intel_fb->obj = obj; ret = intel_fill_fb_info(dev_priv, &intel_fb->base); if (ret) return ret; - ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); + ret = drm_framebuffer_init(obj->base.dev, + &intel_fb->base, + &intel_fb_funcs); if (ret) { DRM_ERROR("framebuffer init failed %d\n", ret); - return ret; + goto err; } - atomic_inc(&intel_fb->obj->framebuffer_references); - return 0; + +err: + atomic_dec(&obj->framebuffer_references); + return ret; } static struct drm_framebuffer * @@ -14526,7 +14511,7 @@ intel_user_framebuffer_create(struct drm_device *dev, if (!obj) return ERR_PTR(-ENOENT); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f1cbeae5cf88..7e27aef0849b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1233,7 +1233,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -unsigned int intel_fb_align_height(struct drm_device *dev, +unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); @@ -1341,9 +1341,8 @@ struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe); void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe); void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 5f8e1d65a4b7..365f08ddf365 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -121,7 +121,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mode_fb_cmd2 mode_cmd = {}; - struct drm_i915_gem_object *obj = NULL; + struct drm_i915_gem_object *obj; int size, ret; /* we don't do packed 24bpp */ @@ -136,14 +136,13 @@ static int intelfb_alloc(struct drm_fb_helper *helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - mutex_lock(&dev->struct_mutex); - size = mode_cmd.pitches[0] * mode_cmd.height; size = PAGE_ALIGN(size); /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ + obj = NULL; if (size * 2 < ggtt->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) @@ -151,24 +150,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper, if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); ret = PTR_ERR(obj); - goto out; + goto err; } - fb = __intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) { - i915_gem_object_put(obj); ret = PTR_ERR(fb); - goto out; + goto err_obj; } - mutex_unlock(&dev->struct_mutex); - ifbdev->fb = to_intel_framebuffer(fb); return 0; -out: - mutex_unlock(&dev->struct_mutex); +err_obj: + i915_gem_object_put(obj); +err: return ret; } @@ -631,7 +628,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } cur_size = intel_crtc->config->base.adjusted_mode.crtc_vdisplay; - cur_size = intel_fb_align_height(dev, cur_size, + cur_size = intel_fb_align_height(to_i915(dev), cur_size, fb->base.format->format, fb->base.modifier); cur_size *= fb->base.pitches[0]; From fabef825626d7bd05a321e4427fdf31a169b5173 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 10:59:19 +0000 Subject: [PATCH 0261/1299] drm/i915: Drop struct_mutex around frontbuffer flushes Since the frontbuffer has self-contained locking, it does not require us to hold the BKL struct_mutex as we send invalidate and flush messages. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170215105919.7347-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_fbdev.c | 43 ++++++++++++------------------ 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 365f08ddf365..c404ab524ccc 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -45,6 +45,14 @@ #include #include "i915_drv.h" +static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) +{ + struct drm_i915_gem_object *obj = ifbdev->fb->obj; + unsigned int origin = ifbdev->vma->fence ? ORIGIN_GTT : ORIGIN_CPU; + + intel_fb_obj_invalidate(obj, origin); +} + static int intel_fbdev_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; @@ -53,12 +61,8 @@ static int intel_fbdev_set_par(struct fb_info *info) int ret; ret = drm_fb_helper_set_par(info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -71,12 +75,8 @@ static int intel_fbdev_blank(int blank, struct fb_info *info) int ret; ret = drm_fb_helper_blank(blank, info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -87,15 +87,11 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, struct drm_fb_helper *fb_helper = info->par; struct intel_fbdev *ifbdev = container_of(fb_helper, struct intel_fbdev, helper); - int ret; - ret = drm_fb_helper_pan_display(var, info); - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + ret = drm_fb_helper_pan_display(var, info); + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -838,11 +834,6 @@ void intel_fbdev_restore_mode(struct drm_device *dev) if (!ifbdev->fb) return; - if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { - DRM_DEBUG("failed to restore crtc mode\n"); - } else { - mutex_lock(&dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&dev->struct_mutex); - } + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0) + intel_fbdev_invalidate(ifbdev); } From f655e67ac8d797425abb0404d0878758f3f71c1a Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 16 Feb 2017 14:10:01 +0800 Subject: [PATCH 0262/1299] drm/i915/gvt: Fix check error on opregion.c As we switched to memremap for opregion, shouldn't use any __iomem for that, and move to use memcpy instead. This fixed static check errors for: CHECK drivers/gpu/drm/i915//gvt/opregion.c drivers/gpu/drm/i915//gvt/opregion.c:142:31: warning: incorrect type in argument 1 (different address spaces) drivers/gpu/drm/i915//gvt/opregion.c:142:31: expected void *addr drivers/gpu/drm/i915//gvt/opregion.c:142:31: got void [noderef] *opregion_va drivers/gpu/drm/i915//gvt/opregion.c:160:35: warning: incorrect type in assignment (different address spaces) drivers/gpu/drm/i915//gvt/opregion.c:160:35: expected void [noderef] *opregion_va drivers/gpu/drm/i915//gvt/opregion.c:160:35: got void * Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 2 +- drivers/gpu/drm/i915/gvt/opregion.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index e227caf5859e..5ee660077b0c 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -203,7 +203,7 @@ struct intel_gvt_firmware { }; struct intel_gvt_opregion { - void __iomem *opregion_va; + void *opregion_va; u32 opregion_pa; }; diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index d9fb41ab7119..5d1caf9daba9 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -27,7 +27,6 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) { - void __iomem *host_va = vgpu->gvt->opregion.opregion_va; u8 *buf; int i; @@ -43,8 +42,8 @@ static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa) if (!vgpu_opregion(vgpu)->va) return -ENOMEM; - memcpy_fromio(vgpu_opregion(vgpu)->va, host_va, - INTEL_GVT_OPREGION_SIZE); + memcpy(vgpu_opregion(vgpu)->va, vgpu->gvt->opregion.opregion_va, + INTEL_GVT_OPREGION_SIZE); for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++) vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i; From fd64be636708d808852c4c8c1efce0a0a51c24c5 Mon Sep 17 00:00:00 2001 From: Min He Date: Fri, 17 Feb 2017 15:02:36 +0800 Subject: [PATCH 0263/1299] drm/i915/gvt: introduced failsafe mode into vgpu New failsafe mode is introduced, when we detect guest not supporting GVT-g. In failsafe mode, we will ignore all the MMIO and cfg space read/write from guest. This patch can fix the issue that when guest kernel or graphics driver version is too low, there will be a lot of kernel traces in host. V5: rebased onto latest gvt-staging V4: changed coding style by Zhenyu and Ping's advice V3: modified coding style and error messages according to Zhenyu's comment V2: 1) implemented MMIO/GTT/WP pages read/write logic; 2) used a unified function to enter failsafe mode Signed-off-by: Min He Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 3 ++ drivers/gpu/drm/i915/gvt/gvt.h | 6 +++ drivers/gpu/drm/i915/gvt/handlers.c | 36 ++++++++++++++++ drivers/gpu/drm/i915/gvt/mmio.c | 62 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gvt/vgpu.c | 6 ++- 5 files changed, 112 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index 4a6a2ed65732..a77e050b85a3 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -237,6 +237,9 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, { int ret; + if (vgpu->failsafe) + return 0; + if (WARN_ON(bytes > 4)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 5ee660077b0c..48ef0771d772 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -143,6 +143,8 @@ struct intel_vgpu { int id; unsigned long handle; /* vGPU handle used by hypervisor MPT modules */ bool active; + bool pv_notified; + bool failsafe; bool resetting; void *sched_data; @@ -449,6 +451,10 @@ struct intel_gvt_ops { }; +enum { + GVT_FAILSAFE_UNSUPPORTED_GUEST, +}; + #include "mpt.h" #endif diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 1d450627ff65..6f098bb110bd 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -150,10 +150,34 @@ static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg) #define fence_num_to_offset(num) \ (num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) + +static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) +{ + switch (reason) { + case GVT_FAILSAFE_UNSUPPORTED_GUEST: + pr_err("Detected your guest driver doesn't support GVT-g.\n"); + break; + default: + break; + } + pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id); + vgpu->failsafe = true; +} + static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int fence_num, void *p_data, unsigned int bytes) { if (fence_num >= vgpu_fence_sz(vgpu)) { + + /* When guest access oob fence regs without access + * pv_info first, we treat guest not supporting GVT, + * and we will let vgpu enter failsafe mode. + */ + if (!vgpu->pv_notified) { + enter_failsafe_mode(vgpu, + GVT_FAILSAFE_UNSUPPORTED_GUEST); + return -EINVAL; + } gvt_err("vgpu%d: found oob fence register access\n", vgpu->id); gvt_err("vgpu%d: total fence num %d access fence num %d\n", @@ -1001,6 +1025,7 @@ static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset, if (invalid_read) gvt_err("invalid pvinfo read: [%x:%x] = %x\n", offset, bytes, *(u32 *)p_data); + vgpu->pv_notified = true; return 0; } @@ -1318,6 +1343,17 @@ static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, bool enable_execlist; write_vreg(vgpu, offset, p_data, bytes); + + /* when PPGTT mode enabled, we will check if guest has called + * pvinfo, if not, we will treat this guest as non-gvtg-aware + * guest, and stop emulating its cfg space, mmio, gtt, etc. + */ + if (((data & _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)) || + (data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE))) + && !vgpu->pv_notified) { + enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); + return 0; + } if ((data & _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)) || (data & _MASKED_BIT_DISABLE(GFX_RUN_LIST_ENABLE))) { enable_execlist = !!(data & GFX_RUN_LIST_ENABLE); diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 4df078bc5d04..b2d72dad1537 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -57,6 +57,58 @@ int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa) (reg >= gvt->device_info.gtt_start_offset \ && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) +static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa, + void *p_data, unsigned int bytes, bool read) +{ + struct intel_gvt *gvt = NULL; + void *pt = NULL; + unsigned int offset = 0; + + if (!vgpu || !p_data) + return; + + gvt = vgpu->gvt; + mutex_lock(&gvt->lock); + offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa); + if (reg_is_mmio(gvt, offset)) { + if (read) + intel_vgpu_default_mmio_read(vgpu, offset, p_data, + bytes); + else + intel_vgpu_default_mmio_write(vgpu, offset, p_data, + bytes); + } else if (reg_is_gtt(gvt, offset) && + vgpu->gtt.ggtt_mm->virtual_page_table) { + offset -= gvt->device_info.gtt_start_offset; + pt = vgpu->gtt.ggtt_mm->virtual_page_table + offset; + if (read) + memcpy(p_data, pt, bytes); + else + memcpy(pt, p_data, bytes); + + } else if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { + struct intel_vgpu_guest_page *gp; + + /* Since we enter the failsafe mode early during guest boot, + * guest may not have chance to set up its ppgtt table, so + * there should not be any wp pages for guest. Keep the wp + * related code here in case we need to handle it in furture. + */ + gp = intel_vgpu_find_guest_page(vgpu, pa >> PAGE_SHIFT); + if (gp) { + /* remove write protection to prevent furture traps */ + intel_vgpu_clean_guest_page(vgpu, gp); + if (read) + intel_gvt_hypervisor_read_gpa(vgpu, pa, + p_data, bytes); + else + intel_gvt_hypervisor_write_gpa(vgpu, pa, + p_data, bytes); + } + } + mutex_unlock(&gvt->lock); +} + /** * intel_vgpu_emulate_mmio_read - emulate MMIO read * @vgpu: a vGPU @@ -75,6 +127,11 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa, unsigned int offset = 0; int ret = -EINVAL; + + if (vgpu->failsafe) { + failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true); + return 0; + } mutex_lock(&gvt->lock); if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { @@ -188,6 +245,11 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, u32 old_vreg = 0, old_sreg = 0; int ret = -EINVAL; + if (vgpu->failsafe) { + failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false); + return 0; + } + mutex_lock(&gvt->lock); if (atomic_read(&vgpu->gtt.n_write_protected_guest_page)) { diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 95a97aa0051e..dcfcce1dc00e 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -387,8 +387,12 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, populate_pvinfo_page(vgpu); intel_vgpu_reset_display(vgpu); - if (dmlr) + if (dmlr) { intel_vgpu_reset_cfg_space(vgpu); + /* only reset the failsafe mode when dmlr reset */ + vgpu->failsafe = false; + vgpu->pv_notified = false; + } } vgpu->resetting = false; From d1be371d4f4c12d11023c9fc795e5d460d960680 Mon Sep 17 00:00:00 2001 From: "Zhao, Xinda" Date: Fri, 17 Feb 2017 14:38:33 +0800 Subject: [PATCH 0264/1299] drm/i915/gvt: handle fence reg access during GPU reset Lots of reduntant log info will be printed out during GPU reset, including accessing untracked mmio register and fence register, variable disable_warn_untrack is added previously to handle the situation, but the accessing of fence register is ignored in the previously patch, so add it back. Besides, set the variable disable_warn_untrack to the defalut value after GPU reset is finished. Signed-off-by: Zhao, Xinda Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++------ drivers/gpu/drm/i915/gvt/mmio.c | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 6f098bb110bd..fd7e789a72c3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -173,16 +173,19 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, * pv_info first, we treat guest not supporting GVT, * and we will let vgpu enter failsafe mode. */ - if (!vgpu->pv_notified) { + if (!vgpu->pv_notified) enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); - return -EINVAL; + + if (!vgpu->mmio.disable_warn_untrack) { + gvt_err("vgpu%d: found oob fence register access\n", + vgpu->id); + gvt_err("vgpu%d: total fence %d, access fence %d\n", + vgpu->id, vgpu_fence_sz(vgpu), + fence_num); } - gvt_err("vgpu%d: found oob fence register access\n", - vgpu->id); - gvt_err("vgpu%d: total fence num %d access fence num %d\n", - vgpu->id, vgpu_fence_sz(vgpu), fence_num); memset(p_data, 0, bytes); + return -EINVAL; } return 0; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index b2d72dad1537..99abb01fa9eb 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -384,6 +384,8 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0; + + vgpu->mmio.disable_warn_untrack = false; } /** From 2f35afe94af0f7dc451253d848af442e5d7a7715 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:21 +0000 Subject: [PATCH 0265/1299] drm/i915: Make int __intel_ring_space static It is only used within intel_ringbuffer.c Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f7499829ecc2..8c17db72489f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -39,7 +39,7 @@ */ #define LEGACY_REQUEST_SIZE 200 -int __intel_ring_space(int head, int tail, int size) +static int __intel_ring_space(int head, int tail, int size) { int space = head - tail; if (space <= 0) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 16714096810d..1d9fd4e42622 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -523,7 +523,6 @@ intel_ring_offset(struct drm_i915_gem_request *req, void *addr) return offset & (req->ring->size - 1); } -int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); From 8ee7c6e23bb1b3c37ef27e81395db056bd7eac53 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:22 +0000 Subject: [PATCH 0266/1299] drm/i915: Simplify cleanup path in intel_engines_init We can call the engine cleanup vfunc instead of duplicating the decision making here. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 538d845d7251..012ac9483491 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -210,10 +210,8 @@ int intel_engines_init(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { if (id >= err_id) kfree(engine); - else if (i915.enable_execlists) - intel_logical_ring_cleanup(engine); else - intel_engine_cleanup(engine); + dev_priv->gt.cleanup_engine(engine); } return err; } From 133b4bd74d89220c612d4adfabb2f41f6f432184 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:23 +0000 Subject: [PATCH 0267/1299] drm/i915: Move common workaround code to intel_engine_cs It is used by all submission backends. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 550 ++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 550 ------------------------ 2 files changed, 550 insertions(+), 550 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 012ac9483491..46c94740be53 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -523,6 +523,556 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, } } +static int wa_add(struct drm_i915_private *dev_priv, + i915_reg_t addr, + const u32 mask, const u32 val) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; +} + +#define WA_REG(addr, mask, val) do { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ + if (r) \ + return r; \ + } while (0) + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, mask, _MASKED_FIELD(mask, value)) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) + +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) + +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, + i915_reg_t reg) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_workarounds *wa = &dev_priv->workarounds; + const uint32_t index = wa->hw_whitelist_count[engine->id]; + + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) + return -EINVAL; + + WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); + wa->hw_whitelist_count[engine->id]++; + + return 0; +} + +static int gen8_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + + return 0; +} + +static int bdw_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + + return 0; +} + +static int chv_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + + return 0; +} + +static int gen9_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_DG_MIRROR_FIX_ENABLE); + + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); + /* + * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set + * but we do that in per ctx batchbuffer as there is an issue + * with this register not getting restored on ctx restore + */ + } + + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaDisableMaskBasedCammingInRCC:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, + PIXEL_MASK_CAMMING_DISABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableHDCInvalidation:skl,bxt,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* WaOCLCoherentLineFlush:skl,bxt,kbl */ + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES)); + + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); + if (ret) + return ret; + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ + ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + if (ret) + return ret; + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ + ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); + if (ret) + return ret; + + return 0; +} + +static int skl_tune_iz_hashing(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + +static int skl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* + * Actual WA is to disable percontext preemption granularity control + * until D0 which is the default case so this is equivalent to + * !WaDisablePerCtxtPreemptionGranularityControl:skl + */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + + /* WaEnableGapsTsvCreditFix:skl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableGafsUnitClkGating:skl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:skl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return skl_tune_iz_hashing(engine); +} + +static int bxt_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaStoreMultiplePTEenable:bxt */ + /* This is a requirement according to Hardware specification */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + + /* WaSetClckGatingDisableMedia:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); + } + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaDisablePooledEuLoadBalancingFix:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, + GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); + } + + /* WaDisableSbeCacheDispatchPortSharing:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + } + + /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ + /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ + /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + /* WaDisableLSQCROPERFforOCL:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); + if (ret) + return ret; + + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + } + + /* WaProgramL3SqcReg1DefaultForPerf:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) + I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaInPlaceDecompressionHang:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + return 0; +} + +static int kbl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + WA_SET_BIT(GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableGafsUnitClkGating:kbl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:kbl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return 0; +} + +static int glk_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +int init_workarounds_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WARN_ON(engine->id != RCS); + + dev_priv->workarounds.count = 0; + dev_priv->workarounds.hw_whitelist_count[RCS] = 0; + + if (IS_BROADWELL(dev_priv)) + return bdw_init_workarounds(engine); + + if (IS_CHERRYVIEW(dev_priv)) + return chv_init_workarounds(engine); + + if (IS_SKYLAKE(dev_priv)) + return skl_init_workarounds(engine); + + if (IS_BROXTON(dev_priv)) + return bxt_init_workarounds(engine); + + if (IS_KABYLAKE(dev_priv)) + return kbl_init_workarounds(engine); + + if (IS_GEMINILAKE(dev_priv)) + return glk_init_workarounds(engine); + + return 0; +} + +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +{ + struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; + int ret, i; + + if (w->count == 0) + return 0; + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(req, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(w->count); + for (i = 0; i < w->count; i++) { + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; + } + *cs++ = MI_NOOP; + + intel_ring_advance(req, cs); + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); + + return 0; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8c17db72489f..629fe6584e61 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -644,41 +644,6 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - struct i915_workarounds *w = &req->i915->workarounds; - u32 *cs; - int ret, i; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(req, (w->count * 2 + 2)); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); - *cs++ = w->reg[i].value; - } - *cs++ = MI_NOOP; - - intel_ring_advance(req, cs); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - - return 0; -} - static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) { int ret; @@ -694,521 +659,6 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) return 0; } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw - * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() - * to disable EUTC clock gating. - */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_DG_MIRROR_FIX_ENABLE); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); - /* - * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set - * but we do that in per ctx batchbuffer as there is an issue - * with this register not getting restored on ctx restore - */ - } - - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaDisableMaskBasedCammingInRCC:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, - PIXEL_MASK_CAMMING_DISABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaOCLCoherentLineFlush:skl,bxt,kbl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaStoreMultiplePTEenable:bxt */ - /* This is a requirement according to Hardware specification */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); - - /* WaSetClckGatingDisableMedia:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); - } - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, - GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); - } - - /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } - - /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ - /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ - /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ - /* WaDisableLSQCROPERFforOCL:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); - if (ret) - return ret; - - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - } - - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - WA_SET_BIT(GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -static int glk_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaToEnableHwFixForPushConstHWBug:glk */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WARN_ON(engine->id != RCS); - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[RCS] = 0; - - if (IS_BROADWELL(dev_priv)) - return bdw_init_workarounds(engine); - - if (IS_CHERRYVIEW(dev_priv)) - return chv_init_workarounds(engine); - - if (IS_SKYLAKE(dev_priv)) - return skl_init_workarounds(engine); - - if (IS_BROXTON(dev_priv)) - return bxt_init_workarounds(engine); - - if (IS_KABYLAKE(dev_priv)) - return kbl_init_workarounds(engine); - - if (IS_GEMINILAKE(dev_priv)) - return glk_init_workarounds(engine); - - return 0; -} - static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; From 097d4f1c121f324d655d17b9bc5238107f7a1761 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 17 Feb 2017 07:58:59 +0000 Subject: [PATCH 0268/1299] drm/i915: Tidy workaround batch buffer emission Use the "*batch++ = " style as in the ring emission for better readability and also simplify the logic a bit by consolidating the offset and size calculations and overflow checking. The latter is a programming error so it is not required to check for it after each write to the object, but rather do it once the whole state has been written and fail the driver if something went wrong. v2: Rebase. v3: Keep track of offsets and sizes in bytes for simplicity and rename function pointer variable to _fn suffix. (Chris Wilson) v4: Fix size calc broken in v3 and add alignment warning. (Chris Wilson) v5: Fix return code. v6: I added an exit from loop in v5 but forgot to put back the object teardown. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson (v5) Cc: Chris Wilson Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 310 +++++++++++-------------------- 1 file changed, 113 insertions(+), 197 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ee431d39ce06..5154661a38cf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -890,18 +890,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) return ret; } -#define wa_ctx_emit(batch, index, cmd) \ - do { \ - int __index = (index)++; \ - if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ - return -ENOSPC; \ - } \ - batch[__index] = (cmd); \ - } while (0) - -#define wa_ctx_emit_reg(batch, index, reg) \ - wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg)) - /* * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after * PIPE_CONTROL instruction. This is required for the flush to happen correctly @@ -918,56 +906,31 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) * This WA is also required for Gen9 so extracting as a function avoids * code duplication. */ -static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *batch, - uint32_t index) +static u32 * +gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { - uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); + *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, l3sqc4_flush); + *batch++ = GFX_OP_PIPE_CONTROL(6); + *batch++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE)); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; - wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - return index; -} - -static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t start_alignment) -{ - return wa_ctx->offset = ALIGN(offset, start_alignment); -} - -static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t size_alignment) -{ - wa_ctx->size = offset - wa_ctx->offset; - - WARN(wa_ctx->size % size_alignment, - "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n", - wa_ctx->size, size_alignment); - return 0; + return batch; } /* @@ -985,42 +948,28 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together * makes a complete batch buffer. */ -static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t scratch_addr; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ - if (IS_BROADWELL(engine->i915)) { - int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (rc < 0) - return rc; - index = rc; - } + if (IS_BROADWELL(engine->i915)) + batch = gen8_emit_flush_coherentl3_wa(engine, batch); + *batch++ = GFX_OP_PIPE_CONTROL(6); + *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; /* * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because @@ -1028,7 +977,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * in the register CTX_RCS_INDIRECT_CTX */ - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } /* @@ -1040,58 +989,38 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ -static int gen8_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *batch++ = MI_BATCH_BUFFER_END; - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); - - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - int ret; - struct drm_i915_private *dev_priv = engine->i915; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ - ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (ret < 0) - return ret; - index = ret; + batch = gen8_emit_flush_coherentl3_wa(engine, batch); /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( - GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); - wa_ctx_emit(batch, index, MI_NOOP); + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2); + *batch++ = _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE); + *batch++ = MI_NOOP; /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - u32 scratch_addr = - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { + *batch++ = GFX_OP_PIPE_CONTROL(6); + *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; + *batch++ = i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; } /* WaMediaPoolStateCmdInWABB:bxt,glk */ @@ -1109,41 +1038,37 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, * possible configurations, to avoid duplication they are * not shown here again. */ - u32 eu_pool_config = 0x00777000; - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE); - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE); - wa_ctx_emit(batch, index, eu_pool_config); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = GEN9_MEDIA_POOL_STATE; + *batch++ = GEN9_MEDIA_POOL_ENABLE; + *batch++ = 0x00777000; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; } /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } -static int gen9_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); + *batch++ = MI_BATCH_BUFFER_END; - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); - - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) +#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) + +static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; int err; - obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size)); + obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1165,78 +1090,70 @@ static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) return err; } -static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) +static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma); } +typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); + static int intel_init_workaround_bb(struct intel_engine_cs *engine) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t *batch; - uint32_t offset; + struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, + &wa_ctx->per_ctx }; + wa_bb_func_t wa_bb_fn[2]; struct page *page; + void *batch, *batch_ptr; + unsigned int i; int ret; - WARN_ON(engine->id != RCS); + if (WARN_ON(engine->id != RCS || !engine->scratch)) + return -EINVAL; - /* update this when WA for higher Gen are added */ - if (INTEL_GEN(engine->i915) > 9) { - DRM_ERROR("WA batch buffer is not initialized for Gen%d\n", - INTEL_GEN(engine->i915)); + switch (INTEL_GEN(engine->i915)) { + case 9: + wa_bb_fn[0] = gen9_init_indirectctx_bb; + wa_bb_fn[1] = gen9_init_perctx_bb; + break; + case 8: + wa_bb_fn[0] = gen8_init_indirectctx_bb; + wa_bb_fn[1] = gen8_init_perctx_bb; + break; + default: + MISSING_CASE(INTEL_GEN(engine->i915)); return 0; } - /* some WA perform writes to scratch page, ensure it is valid */ - if (!engine->scratch) { - DRM_ERROR("scratch page not allocated for %s\n", engine->name); - return -EINVAL; - } - - ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE); + ret = lrc_setup_wa_ctx(engine); if (ret) { DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); return ret; } page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = kmap_atomic(page); - offset = 0; + batch = batch_ptr = kmap_atomic(page); - if (IS_GEN8(engine->i915)) { - ret = gen8_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; - - ret = gen8_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; - } else if (IS_GEN9(engine->i915)) { - ret = gen9_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; - - ret = gen9_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; + /* + * Emit the two workaround batch buffers, recording the offset from the + * start of the workaround batch buffer object for each and their + * respective sizes. + */ + for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { + wa_bb[i]->offset = batch_ptr - batch; + if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) { + ret = -EINVAL; + break; + } + batch_ptr = wa_bb_fn[i](engine, batch_ptr); + wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } -out: + BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + kunmap_atomic(batch); if (ret) - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); return ret; } @@ -1685,7 +1602,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); @@ -1971,15 +1888,14 @@ static void execlists_init_reg_state(u32 *reg_state, u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); reg_state[CTX_RCS_INDIRECT_CTX+1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | - (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS); + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = intel_lr_indirect_ctx_offset(engine) << 6; reg_state[CTX_BB_PER_CTX_PTR+1] = - (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) | - 0x01; + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } } reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; From 9f235dfa495856ed7b264ee08920601df57333da Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:25 +0000 Subject: [PATCH 0269/1299] drm/i915: Consolidate gen8_emit_pipe_control We have a few open coded instances in the execlists code and an almost suitable helper in intel_ringbuf.c We can consolidate to a single helper if we change the existing helper to emit directly to ring buffer memory and move the space reservation outside it. v2: Drop memcpy for memset. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170216122325.31391-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 77 +++++++++---------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++ 3 files changed, 52 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5154661a38cf..afcb1f189162 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -918,12 +918,10 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; - *batch++ = GFX_OP_PIPE_CONTROL(6); - *batch++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); @@ -957,15 +955,15 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) if (IS_BROADWELL(engine->i915)) batch = gen8_emit_flush_coherentl3_wa(engine, batch); - *batch++ = GFX_OP_PIPE_CONTROL(6); - *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - *batch++ = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) @@ -1013,14 +1011,13 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { - *batch++ = GFX_OP_PIPE_CONTROL(6); - *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; - *batch++ = i915_ggtt_offset(engine->scratch) + - 2 * CACHELINE_BYTES; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); } /* WaMediaPoolStateCmdInWABB:bxt,glk */ @@ -1456,39 +1453,17 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - if (vf_flush_wa) { - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - } + if (vf_flush_wa) + cs = gen8_emit_pipe_control(cs, 0, 0); - if (dc_flush_wa) { - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_DC_FLUSH_ENABLE; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - } + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags; - *cs++ = scratch_addr; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); - if (dc_flush_wa) { - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_CS_STALL; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - } + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); intel_ring_advance(request, cs); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 629fe6584e61..d56f384938f7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -334,35 +334,16 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) } static int -gen8_emit_pipe_control(struct drm_i915_gem_request *req, - u32 flags, u32 scratch_addr) +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { + u32 flags; u32 *cs; - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags; - *cs++ = scratch_addr; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - intel_ring_advance(req, cs); - - return 0; -} - -static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) -{ - u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; - - flags |= PIPE_CONTROL_CS_STALL; + flags = PIPE_CONTROL_CS_STALL; if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; @@ -381,15 +362,19 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - ret = gen8_emit_pipe_control(req, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD, - 0); - if (ret) - return ret; + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, + 0); } - return gen8_emit_pipe_control(req, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, + i915_ggtt_offset(req->engine->scratch) + + 2 * CACHELINE_BYTES); + + intel_ring_advance(req, cs); + + return 0; } static void ring_setup_phys_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1d9fd4e42622..4ca5308e12bb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -631,4 +631,15 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + memset(batch, 0, 6 * sizeof(u32)); + + batch[0] = GFX_OP_PIPE_CONTROL(6); + batch[1] = flags; + batch[2] = offset; + + return batch + 6; +} + #endif /* _INTEL_RINGBUFFER_H_ */ From 8d371db4b0a7aa919df9be2be430187fe3e70399 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 17 Feb 2017 14:06:28 +0200 Subject: [PATCH 0270/1299] drm/i915/glk: Load the degamma LUT even in legacy gamma mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Geminilake, the degamma table is enabled or disabled by the pipe CSC enable bit, so its active even when running in the legacy gamma mode. So always set sane values for that table, since the default value is all zeroes. This fixes blank screens after a suspend/resume cycle while legacy gamma is in use. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217120630.6143-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_color.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 0627eee1cb64..b9e5266d933b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -483,12 +483,13 @@ static void glk_load_luts(struct drm_crtc_state *state) struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(crtc)->pipe; + glk_load_degamma_lut(state); + if (crtc_state_is_legacy(state)) { haswell_load_luts(state); return; } - glk_load_degamma_lut(state); bdw_load_gamma_lut(state, 0); intel_state->gamma_mode = GAMMA_MODE_MODE_10BIT; From 3bb56da78153150d72c67ceb8b7df49f36ae269c Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 17 Feb 2017 14:06:29 +0200 Subject: [PATCH 0271/1299] drm/i915/glk: Enable pipe CSC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the pre-csc degamma table is set up correctly in Geminilake, pipe CSC can be enabled without causing a black screen. v2: Rebase. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217120630.6143-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 1 + drivers/gpu/drm/i915/intel_sprite.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5443ba8430da..b60373812d34 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3327,6 +3327,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, if (IS_GEMINILAKE(dev_priv)) { I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | PLANE_COLOR_PLANE_GAMMA_DISABLE); } else { plane_ctl |= diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index b16a29591803..27e0752d1578 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -224,6 +224,7 @@ skl_update_plane(struct drm_plane *drm_plane, if (IS_GEMINILAKE(dev_priv)) { I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | PLANE_COLOR_PLANE_GAMMA_DISABLE); } else { plane_ctl |= From 2246bea6cf773552106d98ab31ba7952c4ddb3f9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:00 +0000 Subject: [PATCH 0272/1299] drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease When the timer expires for checking on interrupt processing, check to see if any interrupts arrived within the last time period. If real interrupts are still being delivered, we can be reassured that we haven't missed the final interrupt as the waiter will still be woken. Only once all activity ceases, do we have to worry about the waiter never being woken and so need to install a timer to kick the waiter for a slow arrival of a seqno. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 1 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0d8ebd147da6..0657ac44a582 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,6 +1033,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { + atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); if (intel_engine_wakeup(engine)) trace_i915_gem_request_notify(engine); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 74cb7b91b5db..4395b177493e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,11 @@ #include "i915_drv.h" +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + static void intel_breadcrumbs_hangcheck(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -34,8 +39,9 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) if (!b->irq_enabled) return; - if (time_before(jiffies, b->timeout)) { - mod_timer(&b->hangcheck, b->timeout); + if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { + b->hangcheck_interrupts = atomic_read(&engine->irq_count); + mod_timer(&b->hangcheck, wait_timeout()); return; } @@ -55,11 +61,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) i915_queue_hangcheck(engine->i915); } -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); -} - static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -140,8 +141,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) i915_queue_hangcheck(i915); } else { /* Ensure we never sleep indefinitely */ - GEM_BUG_ON(!time_after(b->timeout, jiffies)); - mod_timer(&b->hangcheck, b->timeout); + mod_timer(&b->hangcheck, wait_timeout()); } } @@ -258,7 +258,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); - b->timeout = wait_timeout(); b->first_wait = to_wait(next); rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current @@ -286,7 +285,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); - b->timeout = wait_timeout(); b->first_wait = wait; rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must @@ -396,7 +394,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, * the interrupt, or if we have to handle an * exception rather than a seqno completion. */ - b->timeout = wait_timeout(); b->first_wait = to_wait(next); rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) @@ -627,7 +624,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) __intel_breadcrumbs_disable_irq(b); if (intel_engine_has_waiter(engine)) { - b->timeout = wait_timeout(); __intel_breadcrumbs_enable_irq(b); if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) wake_up_process(b->first_wait->tsk); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4ca5308e12bb..4091c97be6ec 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -213,6 +213,7 @@ struct intel_engine_cs { struct intel_render_state *render_state; + atomic_t irq_count; unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 #define ENGINE_IRQ_EXECLIST 1 @@ -245,7 +246,7 @@ struct intel_engine_cs { struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ - unsigned long timeout; + unsigned int hangcheck_interrupts; bool irq_enabled : 1; bool rpm_wakelock : 1; From c33ed067d13b7fae9e19ef1a9e302b8fc90c43d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:01 +0000 Subject: [PATCH 0273/1299] drm/i915: Break i915_spin_request() if we see an interrupt If an interrupt has been posted, and we were spinning on the active seqno waiting for it to advance but it did not, then we can expect that it will not see its advance in the immediate future and should call into the irq-seqno barrier. We can stop spinning at this point, and leave the difficulty of handling the coherency to the caller. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 2f6cfa47dc61..a5fac40d2a4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -963,7 +963,8 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) bool __i915_spin_request(const struct drm_i915_gem_request *req, int state, unsigned long timeout_us) { - unsigned int cpu; + struct intel_engine_cs *engine = req->engine; + unsigned int irq, cpu; /* When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time @@ -975,11 +976,20 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, * takes to sleep on a request, on the order of a microsecond. */ + irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { if (__i915_gem_request_completed(req)) return true; + /* Seqno are meant to be ordered *before* the interrupt. If + * we see an interrupt without a corresponding seqno advance, + * assume we won't see one in the near future but require + * the engine->seqno_barrier() to fixup coherency. + */ + if (atomic_read(&engine->irq_count) != irq) + break; + if (signal_pending_state(state, current)) break; From 8998567b51141f79309d1267640c919dfd23d3a4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:02 +0000 Subject: [PATCH 0274/1299] drm/i915: Defer declaration of missed-interrupt until the waiter is asleep If the waiter was currently running, assume it hasn't had a chance to process the pending interrupt (e.g, low priority task on a loaded system) and wait until it sleeps before declaring a missed interrupt. References: https://bugs.freedesktop.org/show_bug.cgi?id=99816 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4395b177493e..f78b9baf61b6 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) return; } + /* If the waiter was currently running, assume it hasn't had a chance + * to process the pending interrupt (e.g, low priority task on a loaded + * system) and wait until it sleeps before declaring a missed interrupt. + */ + if (!intel_engine_wakeup(engine)) { + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); From 6ef98ea0dacd8bee62d6823b1dea80269137c240 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:03 +0000 Subject: [PATCH 0275/1299] drm/i915: Only start with the fake-irq timer if interrupts are dead As a backup to waiting on a user-interrupt from the GPU, we use a heavy and frequent timer to wake up the waiting process should we detect an inconsistency whilst waiting. After seeing a "missed interrupt", the next time we wait, we restart the heavy timer. This patch is more reluctant to restart the timer and will only do so if we have not see any interrupts since when we started the fake irq timer. If we are seeing interrupts, then the waiters are being woken normally and we had an incoherency that caused to miss last time - that is unlikely to reoccur and so taking the risk of stalling again seems pragmatic. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f78b9baf61b6..5932e2dc0c6f 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -107,6 +107,23 @@ static void irq_disable(struct intel_engine_cs *engine) spin_unlock(&engine->i915->irq_lock); } +static bool use_fake_irq(const struct intel_breadcrumbs *b) +{ + const struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) + return false; + + /* Only start with the heavy weight fake irq timer if we have not + * seen any interrupts since enabling it the first time. If the + * interrupts are still arriving, it means we made a mistake in our + * engine->seqno_barrier(), a timing error that should be transient + * and unlikely to reoccur. + */ + return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; +} + static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = @@ -144,8 +161,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) b->irq_enabled = true; } - if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { + if (!b->irq_enabled || use_fake_irq(b)) { mod_timer(&b->fake_irq, jiffies + 1); i915_queue_hangcheck(i915); } else { From e54ca97747770a6b08413ae9afa776c0d8c4f6c3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:04 +0000 Subject: [PATCH 0276/1299] drm/i915: Remove completed fences after a wait If we wait upon the full (i.e. all shared fences, or upon an exclusive fence) reservation object successfully, we know that all fences beneath it have been signaled, so long as no new fences were added whilst we slept. If the reservation_object remains the same, as detected by its seqcount, we can then reap all the fences upon completion. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5f7b8c88eb7e..d93032875f28 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -427,7 +427,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, long timeout, struct intel_rps_client *rps) { + unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; + bool prune_fences = false; if (flags & I915_WAIT_ALL) { struct dma_fence **shared; @@ -452,15 +454,26 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (; i < count; i++) dma_fence_put(shared[i]); kfree(shared); + + prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) + if (excl && timeout >= 0) { timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + prune_fences = timeout >= 0; + } dma_fence_put(excl); + if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { + reservation_object_lock(resv, NULL); + if (!__read_seqcount_retry(&resv->seq, seq)) + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } + return timeout; } From f4a791819ed00a749a90387aa139706a507aa690 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 16 Feb 2017 17:38:35 -0800 Subject: [PATCH 0277/1299] drm/i915: DMC 1.03 for Geminilake There is a new version of DMC available for Geminilake. It's release notes only mention: - Enhancement in the FW to restore the PG2 state v2: Fixed the platform name on commit message. Noticed by Jani S. v3: cook on top of drm-tip without depending on kbl one so CI can check. v4: make v3 on top of v2. Cc: David Weinehall Cc: Jani Saarinen Cc: Ander Conselvan de Oliveira Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487295515-15396-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 9dcc434d3b74..14659c7e2858 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -34,9 +34,9 @@ * low-power state and comes back to normal. */ -#define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin" +#define I915_CSR_GLK "i915/glk_dmc_ver1_03.bin" MODULE_FIRMWARE(I915_CSR_GLK); -#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) +#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 3) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); From 5a55b527af43423fea51e9927185a1a6e9c80ee7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 14:14:55 +0000 Subject: [PATCH 0278/1299] drm/i915: Only apply legacy PDE overflow detection to 3lvl machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent the overflow check from firing on machines with the full 4lvl page tables, that are not restricted to GEN8_LEGACY_PDES. v2: Also fix the off-by-one in the compare Fixes: 894ccebee2b0 ("drm/i915: Micro-optimise gen8_ppgtt_insert_entries()") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217141455.19877-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 47a38272f54c..a5162cbc0ea0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -854,7 +854,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, break; } - GEM_BUG_ON(pdpe > GEN8_LEGACY_PDPES); + GEM_BUG_ON(!i915_vm_is_48bit(&ppgtt->base) && + pdpe >= GEN8_LEGACY_PDPES); pd = pdp->page_directory[pdpe]; pde = 0; } From 48cb91203e1811e24532ab9204d45f645b69a0e7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 18 Feb 2017 15:00:50 +0000 Subject: [PATCH 0279/1299] drm/i915: Remove unneeded struct_mutex around rpm We don't need struct_mutex for acquiring an rpm wakeref, and do not need to serialise those register read (it's the wrong mutex for those registers in any case). Begone! Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170218150050.10414-1-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_debugfs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 11f02fb1f335..404394ef4b14 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1110,7 +1110,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1173,10 +1172,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } /* RPSTAT1 is in the GT power well */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); reqf = I915_READ(GEN6_RPNSWREQ); @@ -1211,7 +1206,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) cagf = intel_gpu_freq(dev_priv, cagf); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev->struct_mutex); if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { pm_ier = I915_READ(GEN6_PMIER); @@ -1301,7 +1295,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); -out: intel_runtime_pm_put(dev_priv); return ret; } From 23f4a287fc97d94b6110ee0e17ec92670cc8de89 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 18 Feb 2017 11:27:08 +0000 Subject: [PATCH 0280/1299] drm/i915: Prevent divide-by-zero in debugfs/i915_rps_boost_info Either by chance, or by misread, the current evaluation interval may be zero. If that is the case, don't divide by it! Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170218112708.24504-1-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 404394ef4b14..768461f7c7c6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2356,10 +2356,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", rps_power_to_str(dev_priv->rps.power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", - 100 * rpup / rpupei, + rpup && rpupei ? 100 * rpup / rpupei : 0, dev_priv->rps.up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", - 100 * rpdown / rpdownei, + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, dev_priv->rps.down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); From 784f2f1a9da22b0ab00c1b364ee2c10573f5c248 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 20 Feb 2017 10:46:57 +0000 Subject: [PATCH 0281/1299] drm/i915: Fix typo in semaphore debug message Pronounces less rude when fixed. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170220104657.5237-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c992d406ae74..747cd5dfb910 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -999,7 +999,7 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); - DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); + DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores)); } /** From cfd1c48805208b1f1d5f4df153a15fa4a280eb54 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 09:47:07 +0000 Subject: [PATCH 0282/1299] drm/i915: Move the common RPS warnings to intel_set_rps() Instead of having each back-end provide identical guards, just have a singular set in intel_set_rps() to verify that the caller is obeying the rules. Signed-off-by: Chris Wilson Reviewed-by: Radoslaw Szwichtenberg Link: http://patchwork.freedesktop.org/patch/msgid/20170220094713.22874-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fe243c65de1a..e1878cb5d569 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4914,10 +4914,6 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); - /* min/max delay may still have been modified so be sure to * write the limits value. */ @@ -4955,10 +4951,6 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { int err; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); - if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), "Odd GPU freq value\n")) val &= ~1; @@ -5109,6 +5101,10 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { int err; + lockdep_assert_held(&dev_priv->rps.hw_lock); + GEM_BUG_ON(val > dev_priv->rps.max_freq); + GEM_BUG_ON(val < dev_priv->rps.min_freq); + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) err = valleyview_set_rps(dev_priv, val); else From 76e4e4b532bc9f289039c569063ec4b2e0ef2cc4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 09:47:08 +0000 Subject: [PATCH 0283/1299] drm/i915: Store the requested frequency whilst RPS is disabled If intel_set_rps() is called whilst the hw is disabled, just store the requested frequency (from the user) for application when we wake the hw up. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170220094713.22874-2-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/intel_pm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e1878cb5d569..af11c4090c07 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5105,6 +5105,11 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) GEM_BUG_ON(val > dev_priv->rps.max_freq); GEM_BUG_ON(val < dev_priv->rps.min_freq); + if (!dev_priv->rps.enabled) { + dev_priv->rps.cur_freq = val; + return 0; + } + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) err = valleyview_set_rps(dev_priv, val); else From e18b9431e46ac0deada6694e047cf80c043a26c0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 09:47:09 +0000 Subject: [PATCH 0284/1299] drm/i915: Remove unrequired POSTING_READ from gen6_set_rps() The uncached mmio is sufficient to queue the mmio writes without raising forcewake. The forced flush along with acquiring forcewake from the posting read is not required for adjusting the RPS frequency. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170220094713.22874-3-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/intel_pm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index af11c4090c07..169c4908ad5b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4939,8 +4939,6 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - POSTING_READ(GEN6_RPNSWREQ); - dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); From 3c1b38e63018ea717148a0a6817eec27b020212d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:42 +0200 Subject: [PATCH 0285/1299] drm/i915: Remove redundant toggling from the power well sync_hw hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing an explicit enable/disable in the power well sync_hw hook based on the power well's reference count is redundant, since by the time these hooks are called all the power wells are enabled and have a reference. So remove the redundant toggling. This is needed by a follow-up patchset that adds power wells which we can't enable/disable during power domain initialization and so want to preserve their state until modeset init time. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 52 +++++-------------------- 1 file changed, 10 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 879567987201..0f64bc12f308 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -847,8 +847,6 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - hsw_set_power_well(dev_priv, power_well, power_well->count > 0); - /* * We're taking over the BIOS, so clear any requests made by it since * the driver is in charge now. @@ -881,8 +879,6 @@ static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - skl_set_power_well(dev_priv, power_well, power_well->count > 0); - /* Clear any request made by BIOS as driver is taking over */ I915_WRITE(HSW_PWR_WELL_BIOS, 0); } @@ -917,16 +913,6 @@ static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv, return bxt_ddi_phy_is_enabled(dev_priv, power_well->data); } -static void bxt_dpio_cmn_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - if (power_well->count > 0) - bxt_dpio_cmn_power_well_enable(dev_priv, power_well); - else - bxt_dpio_cmn_power_well_disable(dev_priv, power_well); -} - - static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) { struct i915_power_well *power_well; @@ -989,13 +975,9 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, gen9_enable_dc5(dev_priv); } -static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) { - if (power_well->count > 0) - gen9_dc_off_power_well_enable(dev_priv, power_well); - else - gen9_dc_off_power_well_disable(dev_priv, power_well); } static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, @@ -1045,12 +1027,6 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->rps.hw_lock); } -static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, power_well->count > 0); -} - static void vlv_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1661,14 +1637,6 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->rps.hw_lock); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->id != PIPE_A); - - chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); -} - static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1914,21 +1882,21 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { - .sync_hw = i9xx_always_on_power_well_noop, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, .disable = i9xx_always_on_power_well_noop, .is_enabled = i9xx_always_on_power_well_enabled, }; static const struct i915_power_well_ops chv_pipe_power_well_ops = { - .sync_hw = chv_pipe_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_pipe_power_well_enable, .disable = chv_pipe_power_well_disable, .is_enabled = chv_pipe_power_well_enabled, }; static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_dpio_cmn_power_well_enable, .disable = chv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -1958,14 +1926,14 @@ static const struct i915_power_well_ops skl_power_well_ops = { }; static const struct i915_power_well_ops gen9_dc_off_power_well_ops = { - .sync_hw = gen9_dc_off_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = gen9_dc_off_power_well_enable, .disable = gen9_dc_off_power_well_disable, .is_enabled = gen9_dc_off_power_well_enabled, }; static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = { - .sync_hw = bxt_dpio_cmn_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = bxt_dpio_cmn_power_well_enable, .disable = bxt_dpio_cmn_power_well_disable, .is_enabled = bxt_dpio_cmn_power_well_enabled, @@ -2000,21 +1968,21 @@ static struct i915_power_well bdw_power_wells[] = { }; static const struct i915_power_well_ops vlv_display_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_display_power_well_enable, .disable = vlv_display_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_dpio_cmn_power_well_enable, .disable = vlv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_power_well_enable, .disable = vlv_power_well_disable, .is_enabled = vlv_power_well_enabled, From 75ccb2ecb8fc8cfcc676afd24ccad4777f130d58 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:43 +0200 Subject: [PATCH 0286/1299] drm/i915: Call the sync_hw hook for power wells without a domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far the sync_hw hook wasn't called for power wells not belonging to any power domain, that is the GEN9 PW1 and MISC_IO power wells. This wasn't a problem so far since the goal of the sync_hw hook - to clear the corresponding BIOS request bit - was guaranteed by clearing the whole BIOS request register elsewhere. This will change with the next patch, so fix up the inconsistency. While at it clean up the power well iterator helpers and move them to the rest of iterators. v2: - Clean up the power well iterator helpers. (Ander) - Move the helpers to i915_drv.h. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-3-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 20 ++++++++++++++++++ drivers/gpu/drm/i915/intel_runtime_pm.c | 28 ++++--------------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2a7b277a86de..1fe4010d4f2b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -500,6 +500,26 @@ struct i915_hotplug { for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ for_each_if (BIT_ULL(domain) & (mask)) +#define for_each_power_well(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ + (__power_well) - (__dev_priv)->power_domains.power_wells < \ + (__dev_priv)->power_domains.power_well_count; \ + (__power_well)++) + +#define for_each_power_well_rev(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ + (__dev_priv)->power_domains.power_well_count - 1; \ + (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ + (__power_well)--) + +#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + +#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well_rev(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + struct drm_i915_private; struct i915_mm_struct; struct i915_mmu_object; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0f64bc12f308..9bbbdbc1c843 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -49,19 +49,6 @@ * present for a given platform. */ -#define for_each_power_well(i, power_well, domain_mask, power_domains) \ - for (i = 0; \ - i < (power_domains)->power_well_count && \ - ((power_well) = &(power_domains)->power_wells[i]); \ - i++) \ - for_each_if ((power_well)->domains & (domain_mask)) - -#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \ - for (i = (power_domains)->power_well_count - 1; \ - i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\ - i--) \ - for_each_if ((power_well)->domains & (domain_mask)) - bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, int power_well_id); @@ -198,19 +185,15 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; struct i915_power_well *power_well; bool is_enabled; - int i; if (dev_priv->pm.suspended) return false; - power_domains = &dev_priv->power_domains; - is_enabled = true; - for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) { + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) { if (power_well->always_on) continue; @@ -1663,9 +1646,8 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; - for_each_power_well(i, power_well, BIT_ULL(domain), power_domains) + for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -1749,7 +1731,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains; struct i915_power_well *power_well; - int i; power_domains = &dev_priv->power_domains; @@ -1760,7 +1741,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_display_power_domain_str(domain)); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); @@ -2424,10 +2405,9 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { + for_each_power_well(dev_priv, power_well) { power_well->ops->sync_hw(dev_priv, power_well); power_well->hw_enabled = power_well->ops->is_enabled(dev_priv, power_well); From 14544e134905b4276dc72122ad8c09965bbc7f41 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:44 +0200 Subject: [PATCH 0287/1299] drm/i915/gen9: Fix clearing of the BIOS power well request register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, in the power well sync_hw hook we are clearing all BIOS request bits, not just the one corresponding to the given power well. This could turn off an unrelated power well inadvertently if it didn't have a request bit set in the driver request register. This didn't cause a problem so far, since we enabled all power wells explicitly before clearing the BIOS request register. A follow-up patchset will add power wells that won't get enabled this way, so fix up the inconsistency. Note that this patch only makes the clearing of the BIOS req register more logical. Power wells without a reference would still get disabled by the end of power domain initialization, that is fixed by the next patch. v2: - Clarify in the commit log that this patch doesn't address the case of power wells without a reference. (Ander) Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-4-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9bbbdbc1c843..62c99a96c8e7 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -862,8 +862,13 @@ static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { + uint32_t mask = SKL_POWER_WELL_REQ(power_well->id); + uint32_t bios_req = I915_READ(HSW_PWR_WELL_BIOS); + /* Clear any request made by BIOS as driver is taking over */ - I915_WRITE(HSW_PWR_WELL_BIOS, 0); + if (bios_req & mask) { + I915_WRITE(HSW_PWR_WELL_BIOS, bios_req & ~mask); + } } static void skl_power_well_enable(struct drm_i915_private *dev_priv, From 16e849145dc2ba9d9fc2027c530fc0a682d5ae61 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:45 +0200 Subject: [PATCH 0288/1299] drm/i915: Preserve the state of power wells not explicitly enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, power wells that BIOS has enabled, but which we don't explicitly enable during power domain initialization would get disabled as we clear the BIOS request bit in the given power well sync_hw hook. To prevent this copy over any set request bits in the BIOS request register to the driver request register and clear the BIOS request bit only afterwards. This doesn't make a difference now, since we enable all power wells during power domain initialization. A follow-up patchset will add power wells for which this isn't true, so fix up the inconsistency. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-5-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 62c99a96c8e7..44d4da3ad7a1 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -830,12 +830,14 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - /* - * We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. - */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) + /* Take over the request bit if set by BIOS. */ + if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) { + if (!(I915_READ(HSW_PWR_WELL_DRIVER) & + HSW_PWR_WELL_ENABLE_REQUEST)) + I915_WRITE(HSW_PWR_WELL_DRIVER, + HSW_PWR_WELL_ENABLE_REQUEST); I915_WRITE(HSW_PWR_WELL_BIOS, 0); + } } static void hsw_power_well_enable(struct drm_i915_private *dev_priv, @@ -865,8 +867,12 @@ static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, uint32_t mask = SKL_POWER_WELL_REQ(power_well->id); uint32_t bios_req = I915_READ(HSW_PWR_WELL_BIOS); - /* Clear any request made by BIOS as driver is taking over */ + /* Take over the request bit if set by BIOS. */ if (bios_req & mask) { + uint32_t drv_req = I915_READ(HSW_PWR_WELL_DRIVER); + + if (!(drv_req & mask)) + I915_WRITE(HSW_PWR_WELL_DRIVER, drv_req | mask); I915_WRITE(HSW_PWR_WELL_BIOS, bios_req & ~mask); } } From 8d8c386c38692c1183b2c0ecffb84de91b8b32b0 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:46 +0200 Subject: [PATCH 0289/1299] drm/i915: Add power well SW/HW state verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify that the refcount of all power wells match their HW enabled state at the end of modeset HW state readout. Also add documentation on how the reference count for each power well is supposed to be acquired during initialization and HW state readout. Suggested by Ander. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-6-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_display.c | 2 + drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_runtime_pm.c | 85 ++++++++++++++++++++++++- 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b60373812d34..e1e828a8c524 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15540,6 +15540,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) } intel_display_set_init_power(dev_priv, false); + intel_power_domains_verify_state(dev_priv); + intel_fbc_init_pipe_state(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7e27aef0849b..11f2e4163bd4 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1693,6 +1693,7 @@ int intel_power_domains_init(struct drm_i915_private *); void intel_power_domains_fini(struct drm_i915_private *); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); void intel_power_domains_suspend(struct drm_i915_private *dev_priv); +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); void bxt_display_core_uninit(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 44d4da3ad7a1..6b52258152b7 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2683,7 +2683,10 @@ static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all - * power domains using intel_display_set_init_power(). + * power wells belonging to the INIT power domain. Power wells in other + * domains (and not in the INIT domain) are referenced or disabled during the + * modeset state HW readout. After that the reference count of each power well + * must match its HW enabled state, see intel_power_domains_verify_state(). */ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) { @@ -2736,6 +2739,86 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) bxt_display_core_uninit(dev_priv); } +static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + + DRM_DEBUG_DRIVER("%-25s %d\n", + power_well->name, power_well->count); + + for_each_power_domain(domain, power_well->domains) + DRM_DEBUG_DRIVER(" %-23s %d\n", + intel_display_power_domain_str(domain), + power_domains->domain_use_count[domain]); + } +} + +/** + * intel_power_domains_verify_state - verify the HW/SW state for all power wells + * @dev_priv: i915 device instance + * + * Verify if the reference count of each power well matches its HW enabled + * state and the total refcount of the domains it belongs to. This must be + * called after modeset HW state sanitization, which is responsible for + * acquiring reference counts for any power wells in use and disabling the + * ones left on by BIOS but not required by any active output. + */ +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + bool dump_domain_info; + + mutex_lock(&power_domains->lock); + + dump_domain_info = false; + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + int domains_count; + bool enabled; + + /* + * Power wells not belonging to any domain (like the MISC_IO + * and PW1 power wells) are under FW control, so ignore them, + * since their state can change asynchronously. + */ + if (!power_well->domains) + continue; + + enabled = power_well->ops->is_enabled(dev_priv, power_well); + if ((power_well->count || power_well->always_on) != enabled) + DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)", + power_well->name, power_well->count, enabled); + + domains_count = 0; + for_each_power_domain(domain, power_well->domains) + domains_count += power_domains->domain_use_count[domain]; + + if (power_well->count != domains_count) { + DRM_ERROR("power well %s refcount/domain refcount mismatch " + "(refcount %d/domains refcount %d)\n", + power_well->name, power_well->count, + domains_count); + dump_domain_info = true; + } + } + + if (dump_domain_info) { + static bool dumped; + + if (!dumped) { + intel_power_domains_dump_info(dev_priv); + dumped = true; + } + } + + mutex_unlock(&power_domains->lock); +} + /** * intel_runtime_pm_get - grab a runtime pm reference * @dev_priv: i915 device instance From 944a36d472be642d0d082d2480fe2b40046602a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 16:38:33 +0000 Subject: [PATCH 0290/1299] drm/i915: Assert that the request->tail is always qword aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware requires that the tail pointer only advance in qword units, so assert that the value we write is aligned to qwords, and similarly enforce this restriction onto the request->tail. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170217163833.731-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/intel_lrc.c | 4 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index afcb1f189162..d6b67be4715d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -325,6 +325,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; + GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8)); reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP @@ -1283,6 +1284,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, /* Reset WaIdleLiteRestore:bdw,skl as well */ request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1494,6 +1496,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); gen8_emit_wa_tail(request, cs); } @@ -1521,6 +1524,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); gen8_emit_wa_tail(request, cs); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d56f384938f7..f62afffef682 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -784,6 +784,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); I915_WRITE_TAIL(request->engine, request->tail); } @@ -795,6 +796,7 @@ static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) *cs++ = MI_USER_INTERRUPT; req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -833,6 +835,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, *cs++ = MI_NOOP; req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int gen8_render_emit_breadcrumb_sz = 8; From e2989f140e707f31afa5fa617512961b3f692aca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Feb 2017 09:17:23 +0000 Subject: [PATCH 0291/1299] drm/i915: Use reservation_object_lock() Replace the calls to ww_mutex_lock(&resv->lock) with the helper reservation_object_lock(resv) and similarly for unlock. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170221091723.6219-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index b3bc119ec1bb..99ceae7855f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -122,9 +122,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (tmp->base.size >= size) { /* Clear the set of shared fences early */ - ww_mutex_lock(&tmp->resv->lock, NULL); + reservation_object_lock(tmp->resv, NULL); reservation_object_add_excl_fence(tmp->resv, NULL); - ww_mutex_unlock(&tmp->resv->lock); + reservation_object_unlock(tmp->resv); obj = tmp; break; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index da0846fe2ad6..baf9039774d7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1301,12 +1301,12 @@ static void eb_export_fence(struct drm_i915_gem_object *obj, * handle an error right now. Worst case should be missed * synchronisation leading to rendering corruption. */ - ww_mutex_lock(&resv->lock, NULL); + reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) reservation_object_add_excl_fence(resv, &req->fence); else if (reservation_object_reserve_shared(resv) == 0) reservation_object_add_shared_fence(resv, &req->fence); - ww_mutex_unlock(&resv->lock); + reservation_object_unlock(resv); } static void From 56e51bf03666728d4eb1d8319215865c8dbf4338 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:58:39 +0000 Subject: [PATCH 0292/1299] drm/i915: Tidy execlists_init_reg_state Compact the name of the macro and reg_state variable, and cache some data in local variables to make the function more compact and more readable. v2: Fixup some checkpatch warnings. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170221095839.30525-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 127 ++++++++++++++----------------- 1 file changed, 56 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d6b67be4715d..5553f551a1bf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -190,7 +190,7 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ +#define CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ } while (0) @@ -1812,104 +1812,89 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *reg_state, +static void execlists_init_reg_state(u32 *regs, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring) { struct drm_i915_private *dev_priv = engine->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; + u32 base = engine->mmio_base; + bool rcs = engine->id == RCS; + + /* A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + */ + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + (HAS_RESOURCE_STREAMER(dev_priv) ? + CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); + CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), + RING_CTL_SIZE(ring->size) | RING_VALID); + CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); + CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); + CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); + if (rcs) { + CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, + RING_INDIRECT_CTX_OFFSET(base), 0); - /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM - * commands followed by (reg, value) pairs. The values we are setting here are - * only for the first context restore: on a subsequent save, the GPU will - * recreate this batchbuffer with new values (including all the missing - * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ - reg_state[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(engine->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, - RING_CONTEXT_CONTROL(engine), - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); - ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, - RING_START(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, - RING_CTL(engine->mmio_base), - RING_CTL_SIZE(ring->size) | RING_VALID); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, - RING_BBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, - RING_BBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, - RING_BBSTATE(engine->mmio_base), - RING_BB_PPGTT); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, - RING_SBBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, - RING_SBBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, - RING_SBBSTATE(engine->mmio_base), 0); - if (engine->id == RCS) { - ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, - RING_BB_PER_CTX_PTR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, - RING_INDIRECT_CTX(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - reg_state[CTX_RCS_INDIRECT_CTX+1] = + regs[CTX_RCS_INDIRECT_CTX + 1] = (ggtt_offset + wa_ctx->indirect_ctx.offset) | (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = + regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = intel_lr_indirect_ctx_offset(engine) << 6; - reg_state[CTX_BB_PER_CTX_PTR+1] = + regs[CTX_BB_PER_CTX_PTR + 1] = (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } } - reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, - RING_CTX_TIMESTAMP(engine->mmio_base), 0); + + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); /* PDP values well be assigned later if needed */ - ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), - 0); + CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); + CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. */ - ASSIGN_CTX_PML4(ppgtt, reg_state); + ASSIGN_CTX_PML4(ppgtt, regs); } - if (engine->id == RCS) { - reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + if (rcs) { + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + make_rpcs(dev_priv)); } } From e235b53019a3d1225601d535795f9a4f7f44587b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:43 +0000 Subject: [PATCH 0293/1299] drm/i915/tracepoints: Tidy request event class At the moment only the global seqno is logged which is not set until the request is ready for submission. Add the per-contex seqno and the context hardware id which are both interesting data points. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_trace.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 7a547cdfc381..4454872d487f 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -451,18 +451,23 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) + __field(u32, ctx) __field(u32, ring) __field(u32, seqno) + __field(u32, global) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->ctx = req->ctx->hw_id; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_add, From 1cce8922df06ca662da9dff817cae7f3aca4276c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:44 +0000 Subject: [PATCH 0294/1299] drm/i915/tracepoints: Adjust i915_gem_ring_dispatch Rename it to i915_gem_request_queue and fix the logged info equivalent to the i915_gem_request even class. Also moved it a bit further apart from the i915_gem_request_add tracepoint since they otherwise provide similar information too close in time. v2: Remove sw fence singalling. We will rely on the soon to come GuC scheduling backend to enable that. (Chris Wilson) v3: Log hex with 0x prefix for clarity. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_trace.h | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index baf9039774d7..dfed503301a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1493,8 +1493,6 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params->request); return 0; @@ -1836,6 +1834,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->dispatch_flags = dispatch_flags; params->ctx = ctx; + trace_i915_gem_request_queue(params->request, dispatch_flags); + ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4454872d487f..d9ade8c38f92 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -399,13 +399,14 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_ring_dispatch, +TRACE_EVENT(i915_gem_request_queue, TP_PROTO(struct drm_i915_gem_request *req, u32 flags), TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, flags) ), @@ -413,13 +414,14 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - dma_fence_enable_sw_signaling(&req->fence); ), - TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", - __entry->dev, __entry->ring, __entry->seqno, __entry->flags) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->flags) ); TRACE_EVENT(i915_gem_ring_flush, From 936925029808242d6cd0950736046abde5b89e0e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 11:00:24 +0000 Subject: [PATCH 0295/1299] drm/i915/tracepoints: Tidy i915_gem_request_wait_begin Provide the same information as the other request event classes. v2: Pass in flags so we can properly report the blocking status. (Chris Wilson) v3: Log hex with 0x prefix for clarity. v4: Derive blocking status from flags. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/i915_trace.h | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a5fac40d2a4f..543cef57972b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1092,7 +1092,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (!timeout) return -ETIME; - trace_i915_gem_request_wait_begin(req); + trace_i915_gem_request_wait_begin(req, flags); if (!i915_sw_fence_done(&req->execute)) { timeout = __i915_request_wait_for_execute(req, flags, timeout); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index d9ade8c38f92..557195b56087 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -508,14 +508,16 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, ); TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), + TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), + TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) - __field(bool, blocking) + __field(u32, global) + __field(unsigned int, flags) ), /* NB: the blocking information is racy since mutex_is_locked @@ -527,14 +529,16 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; - __entry->blocking = - mutex_is_locked(&req->i915->drm.struct_mutex); + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; + __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s", - __entry->dev, __entry->ring, - __entry->seqno, __entry->blocking ? "yes (NB)" : "no") + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + __entry->flags) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, From 90aa412d50a94b5d48de8991614877d3242f46b1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:46 +0000 Subject: [PATCH 0296/1299] drm/i915/tracepoints: Remove unused i915_gem_request_complete Tracepoint is not used and won't be suitable for its replacement. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_trace.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 557195b56087..85accea70b4d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -502,11 +502,6 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, TP_ARGS(req) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) -); - TRACE_EVENT(i915_gem_request_wait_begin, TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), TP_ARGS(req, flags), From 354d036fcf70654cff2e2cbdda54a835d219b9d2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 11:01:42 +0000 Subject: [PATCH 0297/1299] drm/i915/tracepoints: Add request submit and execute tracepoints These new tracepoints are emitted once the request is ready to be submitted to the GPU and once the request is about to be submitted to the GPU, respectively. Former condition triggers as soon as all the fences and dependencies have been resolved, and the latter once the backend is about to submit it to the GPU. New tracepoint are enabled via the new DRM_I915_LOW_LEVEL_TRACEPOINTS Kconfig option which is disabled by default to alleviate the performance impact concerns. v2: Move execute tracepoint to __i915_gem_request_submit. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/Kconfig.debug | 11 +++++++++++ drivers/gpu/drm/i915/i915_gem_request.c | 2 ++ drivers/gpu/drm/i915/i915_trace.h | 24 ++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 68ff072f8b76..e091809a9a9e 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -76,3 +76,14 @@ config DRM_I915_SELFTEST Recommended for driver developers only. If in doubt, say "N". + +config DRM_I915_LOW_LEVEL_TRACEPOINTS + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 543cef57972b..12b06e055490 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -426,6 +426,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock(&request->timeline->lock); i915_sw_fence_commit(&request->execute); + trace_i915_gem_request_execute(request); } void i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -449,6 +450,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: + trace_i915_gem_request_submit(request); request->engine->submit_request(request); break; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 85accea70b4d..e3c0f9e94488 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -477,6 +477,30 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_add, TP_ARGS(req) ); +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); +#else +#if !defined(TRACE_HEADER_MULTI_READ) +static inline void +trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +{ +} +#endif +#endif + TRACE_EVENT(i915_gem_request_notify, TP_PROTO(struct intel_engine_cs *engine), TP_ARGS(engine), From dffabc8f4ee3fce98821bc212bf3c5e31247e2b9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:48 +0000 Subject: [PATCH 0298/1299] drm/i915/tracepoints: Rename i915_gem_request_notify i915_gem_ring_notify is more appropriate since we do not have the request information at this point, but it is simply a signal from the engine that some request has been completed. v2: * Always trace and log if there were any waiters. * Rename to intel_engine_notify. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- drivers/gpu/drm/i915/i915_trace.h | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0657ac44a582..312d30e96dc2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,10 +1033,12 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { + bool waiters; + atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - if (intel_engine_wakeup(engine)) - trace_i915_gem_request_notify(engine); + waiters = intel_engine_wakeup(engine); + trace_intel_engine_notify(engine, waiters); } static void vlv_c0_read(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index e3c0f9e94488..ca2facac4bca 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -501,24 +501,27 @@ trace_i915_gem_request_execute(struct drm_i915_gem_request *req) #endif #endif -TRACE_EVENT(i915_gem_request_notify, - TP_PROTO(struct intel_engine_cs *engine), - TP_ARGS(engine), +TRACE_EVENT(intel_engine_notify, + TP_PROTO(struct intel_engine_cs *engine, bool waiters), + TP_ARGS(engine, waiters), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) __field(u32, seqno) + __field(bool, waiters) ), TP_fast_assign( __entry->dev = engine->i915->drm.primary->index; __entry->ring = engine->id; __entry->seqno = intel_engine_get_seqno(engine); + __entry->waiters = waiters; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, seqno=%u, waiters=%u", + __entry->dev, __entry->ring, __entry->seqno, + __entry->waiters) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, From d7d96833f277827871f9103def3552fec79cf012 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 11:03:00 +0000 Subject: [PATCH 0299/1299] drm/i915/tracepoints: Add backend level request in and out tracepoints Two new tracepoints placed at the call sites where requests are actually passed to the GPU enable userspace to track engine utilisation. These tracepoints are only enabled when the DRM_I915_LOW_LEVEL_TRACEPOINTS Kconfig option is enabled. v2: Fix compilation with !CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS. v3: Name global seqno consistently across tracepoints. v4: Remove port info from request out tracepoint. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 + drivers/gpu/drm/i915/i915_trace.h | 49 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 2 + 3 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8ced9e26f075..beec88a30347 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -518,6 +518,8 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); + trace_i915_gem_request_in(rq, 0); + b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index ca2facac4bca..0a5f27b34a77 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -487,6 +487,45 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, TP_PROTO(struct drm_i915_gem_request *req), TP_ARGS(req) ); + +DECLARE_EVENT_CLASS(i915_gem_request_hw, + TP_PROTO(struct drm_i915_gem_request *req, + unsigned int port), + TP_ARGS(req, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, ring) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, ctx) + __field(u32, port) + ), + + TP_fast_assign( + __entry->dev = req->i915->drm.primary->index; + __entry->ring = req->engine->id; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global_seqno = req->global_seqno; + __entry->port = port; + ), + + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", + __entry->dev, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global_seqno, + __entry->port) +); + +DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, + TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), + TP_ARGS(req, port) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_out, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void @@ -498,6 +537,16 @@ static inline void trace_i915_gem_request_execute(struct drm_i915_gem_request *req) { } + +static inline void +trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +{ +} + +static inline void +trace_i915_gem_request_out(struct drm_i915_gem_request *req) +{ +} #endif #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5553f551a1bf..39329d40da46 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -479,6 +479,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) cursor->priotree.priority = INT_MAX; __i915_gem_request_submit(cursor); + trace_i915_gem_request_in(cursor, port - engine->execlist_port); last = cursor; submit = true; } @@ -593,6 +594,7 @@ static void intel_lrc_irq_handler(unsigned long data) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_OUT); + trace_i915_gem_request_out(port[0].request); i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); From 99c181a0fac084470842133bdeb3f78de3203ba9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:50 +0000 Subject: [PATCH 0300/1299] drm/i915/tracepoints: Add hw_id to context tracepoints It is useful to provide this info to match the one provided in the request tracepoints. Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170221091350.14605-1-tvrtko.ursulin@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_trace.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0a5f27b34a77..0c2ed1289bfc 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -747,17 +747,19 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) + __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( - __entry->ctx = ctx; - __entry->vm = ctx->ppgtt ? &ctx->ppgtt->base : NULL; __entry->dev = ctx->i915->drm.primary->index; + __entry->ctx = ctx; + __entry->hw_id = ctx->hw_id; + __entry->vm = ctx->ppgtt ? &ctx->ppgtt->base : NULL; ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->ctx, __entry->vm) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", + __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) ) DEFINE_EVENT(i915_context, i915_context_create, From 718e884a0183b09a2af8c06818df5d60f94243ce Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Thu, 16 Feb 2017 14:36:40 +0800 Subject: [PATCH 0301/1299] drm/i915/gvt: set ring buffer size to default for guc submission When not using GuC submission, the ring buffer size for GVT context is 512KB which is the max size. When switching to GuC submission, the ring buffer size is required to be less than 16KB. So use the GVT context default ring buffer size if GuC submission is enabled. Signed-off-by: Chuanxiao Dong Link: http://patchwork.freedesktop.org/patch/msgid/20170216063639.GA17107@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 262452055563..99c46f4dbde6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -431,7 +431,8 @@ i915_gem_context_create_gvt(struct drm_device *dev) i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + if (!i915.enable_guc_submission) + ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); out: From 1d6aa7a339713179e5aa15f11a70e244b2334d32 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Feb 2017 16:26:19 +0000 Subject: [PATCH 0302/1299] drm/i915: Add i915_param charp macro magic Handling the dynamic charp module parameter requires us to copy it for the error state, or remember to lock it when reading (in case it used with 0600). v2: Use __always_inline and __builtin_strcmp Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170221162619.15954-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/i915_debugfs.c | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 768461f7c7c6..655e60d609c2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -72,6 +72,8 @@ static __always_inline void seq_print_param(struct seq_file *m, seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); else if (!__builtin_strcmp(type, "unsigned int")) seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + seq_printf(m, "i915.%s=%s\n", name, *(const char **)x); else BUILD_BUG(); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3a3c7c3c4931..2b1d15668192 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -557,6 +557,8 @@ static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, err_printf(m, "i915.%s=%d\n", name, *(const int *)x); else if (!__builtin_strcmp(type, "unsigned int")) err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + err_printf(m, "i915.%s=%s\n", name, *(const char **)x); else BUILD_BUG(); } @@ -810,6 +812,12 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) kfree(obj); } +static __always_inline void free_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + kfree(*(void **)x); +} + void __i915_gpu_state_free(struct kref *error_ref) { struct i915_gpu_state *error = @@ -840,6 +848,11 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); + +#define FREE(T, x) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE + kfree(error); } @@ -1614,6 +1627,12 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, sizeof(error->device_info)); } +static __always_inline void dup_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1625,6 +1644,9 @@ static int capture(void *data) error->i915->gt.last_init_time)); error->params = i915; +#define DUP(T, x) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); From 208b84a3757e15a1c002820aefa941a3293b9efa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:44 +0000 Subject: [PATCH 0303/1299] drm/i915: Remove change_domain tracepoint The change_domain tracepoint has been inaccurate for a few years - it doesn't fully capture the domains, especially with userspace bypassing them. It is defunct, misleading and time to be removed. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 30 ---------------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 --- drivers/gpu/drm/i915/i915_trace.h | 24 ----------------- 3 files changed, 57 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d93032875f28..9c5f091c771f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3214,9 +3214,6 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -3230,9 +3227,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) intel_fb_obj_flush(obj, false, ORIGIN_CPU); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_CPU); } /** @@ -3246,7 +3240,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3284,9 +3277,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) mb(); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ @@ -3298,10 +3288,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - i915_gem_object_unpin_pages(obj); return 0; } @@ -3538,7 +3524,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { struct i915_vma *vma; - u32 old_read_domains, old_write_domain; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3603,19 +3588,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); } - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return vma; err_unpin_display: @@ -3651,7 +3629,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3670,9 +3647,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_gtt_write_domain(obj); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj, false); @@ -3693,10 +3667,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dfed503301a6..6fb29832bc63 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1317,8 +1317,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - u32 old_read = obj->base.read_domains; - u32 old_write = obj->base.write_domain; obj->base.write_domain = obj->base.pending_write_domain; if (obj->base.write_domain) @@ -1329,7 +1327,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req, vma->exec_entry->flags); eb_export_fence(obj, req, vma->exec_entry->flags); - trace_i915_gem_object_change_domain(obj, old_read, old_write); } } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0c2ed1289bfc..2dbef3147a60 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -175,30 +175,6 @@ TRACE_EVENT(i915_vma_unbind, __entry->obj, __entry->offset, __entry->size, __entry->vm) ); -TRACE_EVENT(i915_gem_object_change_domain, - TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), - TP_ARGS(obj, old_read, old_write), - - TP_STRUCT__entry( - __field(struct drm_i915_gem_object *, obj) - __field(u32, read_domains) - __field(u32, write_domain) - ), - - TP_fast_assign( - __entry->obj = obj; - __entry->read_domains = obj->base.read_domains | (old_read << 16); - __entry->write_domain = obj->base.write_domain | (old_write << 16); - ), - - TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x", - __entry->obj, - __entry->read_domains >> 16, - __entry->read_domains & 0xffff, - __entry->write_domain >> 16, - __entry->write_domain & 0xffff) -); - TRACE_EVENT(i915_gem_object_pwrite, TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), TP_ARGS(obj, offset, len), From e59dc1721180b9499ce409945667763628c678c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:45 +0000 Subject: [PATCH 0304/1299] drm/i915: Move cpu_cache_is_coherent() to header For use in the next patch, take the current is-coherent helper and add it to i915_gem_object.h Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_gem.c | 20 ++++++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1fe4010d4f2b..590c524a1531 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4097,4 +4097,10 @@ int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); +static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj) +{ + return (obj->cache_level != I915_CACHE_NONE || + HAS_LLC(to_i915(obj->base.dev))); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9c5f091c771f..312154b3d04c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -48,18 +48,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static bool cpu_cache_is_coherent(struct drm_device *dev, - enum i915_cache_level level) -{ - return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; -} - static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return false; - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + if (!i915_gem_object_is_coherent(obj)) return true; return obj->pin_display; @@ -255,7 +249,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + !i915_gem_object_is_coherent(obj)) drm_clflush_sg(pages); obj->base.read_domains = I915_GEM_DOMAIN_CPU; @@ -796,8 +790,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush = !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); @@ -853,8 +846,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * before writing. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush |= !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); @@ -3173,7 +3165,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, * snooping behaviour occurs naturally as the result of our domain * tracking. */ - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { + if (!force && i915_gem_object_is_coherent(obj)) { obj->cache_dirty = true; return; } @@ -3412,7 +3404,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, } if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + i915_gem_object_is_coherent(obj)) obj->cache_dirty = true; list_for_each_entry(vma, &obj->vma_list, obj_link) From 5a97bcc69cc02d4da9599ed2c256b65fc3bd4ee7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:46 +0000 Subject: [PATCH 0305/1299] drm/i915: Amalgamate flushing of display objects We have three different paths by which userspace wants to flush the display plane (i.e. objects with obj->pin_display). Use a common helper to identify those paths and to simplify a later change. v2: Include the conditional in the name, i915_gem_object_flush_if_display Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 41 ++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem_object.h | 2 ++ drivers/gpu/drm/i915/intel_display.c | 9 ++---- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 312154b3d04c..c9ee2a99ffc0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1613,23 +1613,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int err = 0; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (READ_ONCE(obj->pin_display)) { - err = i915_mutex_lock_interruptible(dev); - if (!err) { - i915_gem_object_flush_cpu_write_domain(obj); - mutex_unlock(&dev->struct_mutex); - } - } - + i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); - return err; + + return 0; } /** @@ -3221,6 +3214,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) obj->base.write_domain = 0; } +static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) +{ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) + return; + + i915_gem_clflush_object(obj, true); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); + + obj->base.write_domain = 0; +} + +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) +{ + if (!READ_ONCE(obj->pin_display)) + return; + + mutex_lock(&obj->base.dev->struct_mutex); + __i915_gem_object_flush_for_display(obj); + mutex_unlock(&obj->base.dev->struct_mutex); +} + /** * Moves a single object to the GTT read, and possibly write domain. * @obj: object to act on @@ -3575,15 +3589,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - } + __i915_gem_object_flush_for_display(obj); + intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; return vma; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index ef4893a4f08c..2fb30a5eb510 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -364,5 +364,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) return engine; } +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); + #endif diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e1e828a8c524..4d67cbb3185c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14278,15 +14278,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_clip_rect *clips, unsigned num_clips) { - struct drm_device *dev = fb->dev; - struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct drm_i915_gem_object *obj = intel_fb->obj; + struct drm_i915_gem_object *obj = intel_fb_obj(fb); - mutex_lock(&dev->struct_mutex); - if (obj->pin_display && obj->cache_dirty) - i915_gem_clflush_object(obj, true); + i915_gem_object_flush_if_display(obj); intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - mutex_unlock(&dev->struct_mutex); return 0; } From f6aaba4dfbc8eaa1b2b756b989fb423a789ee4e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:47 +0000 Subject: [PATCH 0306/1299] drm/i915: Skip clflushes for all non-page backed objects Generalise the skip for physical and stolen objects by skipping anything we do not have a valid address for inside the sg. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9ee2a99ffc0..00213c282796 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3140,14 +3140,19 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ - if (!obj->mm.pages) + if (!obj->mm.pages) { + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); return; + } /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. */ - if (obj->stolen || obj->phys_handle) + if (!i915_gem_object_has_struct_page(obj)) return; /* If the GPU is snooping the contents of the CPU cache, From 57822dc6b9cfeb5300e467ff83d8371aead90047 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:48 +0000 Subject: [PATCH 0307/1299] drm/i915: Perform object clflushing asynchronously Flushing the cachelines for an object is slow, can be as much as 100ms for a large framebuffer. We currently do this under the struct_mutex BKL on execution or on pageflip. But now with the ability to add fences to obj->resv for both flips and execbuf (and we naturally wait on the fence before CPU access), we can move the clflush operation to a workqueue and signal a fence for completion, thereby doing the work asynchronously and not blocking the driver or its clients. v2: Introduce i915_gem_clflush.h and use a new name, split out some extras into separate patches. Suggested-by: Akash Goel Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 54 +----- drivers/gpu/drm/i915/i915_gem_clflush.c | 189 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_clflush.h | 37 ++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +- drivers/gpu/drm/i915/intel_display.c | 44 ++--- 7 files changed, 264 insertions(+), 72 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d0c9577c7533..53e30fcb2751 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o # GEM code i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ + i915_gem_clflush.o \ i915_gem_context.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 590c524a1531..70fc2fa0eaa8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3383,7 +3383,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); + void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 00213c282796..fad0f5adb970 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,6 +29,7 @@ #include #include #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" @@ -3133,46 +3134,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return 0; } -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) -{ - /* If we don't have a page list set up, then we're not pinned - * to GPU, and we can ignore the cache flush because it'll happen - * again at bind time. - */ - if (!obj->mm.pages) { - GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); - return; - } - - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - * Similarly, we only access struct pages through the CPU cache, so - * anything not backed by physical memory we consider to be always - * coherent and not need clflushing. - */ - if (!i915_gem_object_has_struct_page(obj)) - return; - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!force && i915_gem_object_is_coherent(obj)) { - obj->cache_dirty = true; - return; - } - - trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->mm.pages); - obj->cache_dirty = false; -} - /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) @@ -3213,9 +3174,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, obj->pin_display); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.write_domain = 0; } @@ -3224,9 +3183,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) return; - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); - + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; } @@ -3657,8 +3614,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, false); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -4526,6 +4482,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_clflush_init(dev_priv); + if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; dev_priv->gt.cleanup_engine = intel_engine_cleanup; diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c new file mode 100644 index 000000000000..c5fee02f3173 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -0,0 +1,189 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_frontbuffer.h" +#include "i915_gem_clflush.h" + +static DEFINE_SPINLOCK(clflush_lock); +static u64 clflush_context; + +struct clflush { + struct dma_fence dma; /* Must be first for dma_fence_free() */ + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *obj; +}; + +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) +{ + return "clflush"; +} + +static bool i915_clflush_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void i915_clflush_release(struct dma_fence *fence) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), dma); + + i915_sw_fence_fini(&clflush->wait); + + BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); + dma_fence_free(&clflush->dma); +} + +static const struct dma_fence_ops i915_clflush_ops = { + .get_driver_name = i915_clflush_get_driver_name, + .get_timeline_name = i915_clflush_get_timeline_name, + .enable_signaling = i915_clflush_enable_signaling, + .wait = dma_fence_default_wait, + .release = i915_clflush_release, +}; + +static void __i915_do_clflush(struct drm_i915_gem_object *obj) +{ + drm_clflush_sg(obj->mm.pages); + obj->cache_dirty = false; + + intel_fb_obj_flush(obj, false, ORIGIN_CPU); +} + +static void i915_clflush_work(struct work_struct *work) +{ + struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct drm_i915_gem_object *obj = clflush->obj; + + if (!obj->cache_dirty) + goto out; + + if (i915_gem_object_pin_pages(obj)) { + DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); + goto out; + } + + __i915_do_clflush(obj); + + i915_gem_object_unpin_pages(obj); + +out: + i915_gem_object_put(obj); + + dma_fence_signal(&clflush->dma); + dma_fence_put(&clflush->dma); +} + +static int __i915_sw_fence_call +i915_clflush_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&clflush->work); + break; + + case FENCE_FREE: + dma_fence_put(&clflush->dma); + break; + } + + return NOTIFY_DONE; +} + +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct clflush *clflush; + + /* + * Stolen memory is always coherent with the GPU as it is explicitly + * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. + */ + if (!i915_gem_object_has_struct_page(obj)) + return; + + obj->cache_dirty = true; + + /* If the GPU is snooping the contents of the CPU cache, + * we do not need to manually clear the CPU cache lines. However, + * the caches are only snooped when the render cache is + * flushed/invalidated. As we always have to emit invalidations + * and flushes when moving into and out of the RENDER domain, correct + * snooping behaviour occurs naturally as the result of our domain + * tracking. + */ + if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj)) + return; + + trace_i915_gem_object_clflush(obj); + + clflush = NULL; + if (!(flags & I915_CLFLUSH_SYNC)) + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (clflush) { + dma_fence_init(&clflush->dma, + &i915_clflush_ops, + &clflush_lock, + clflush_context, + 0); + i915_sw_fence_init(&clflush->wait, i915_clflush_notify); + + clflush->obj = i915_gem_object_get(obj); + INIT_WORK(&clflush->work, i915_clflush_work); + + dma_fence_get(&clflush->dma); + + i915_sw_fence_await_reservation(&clflush->wait, + obj->resv, NULL, + false, I915_FENCE_TIMEOUT, + GFP_KERNEL); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &clflush->dma); + reservation_object_unlock(obj->resv); + + i915_sw_fence_commit(&clflush->wait); + } else if (obj->mm.pages) { + __i915_do_clflush(obj); + } else { + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); + } +} + +void i915_gem_clflush_init(struct drm_i915_private *i915) +{ + clflush_context = dma_fence_context_alloc(1); +} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h new file mode 100644 index 000000000000..b62d61a2d15f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_CLFLUSH_H__ +#define __I915_GEM_CLFLUSH_H__ + +struct drm_i915_private; +struct drm_i915_gem_object; + +void i915_gem_clflush_init(struct drm_i915_private *i915); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + +#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6fb29832bc63..35d2cb979452 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -35,6 +35,7 @@ #include #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -1114,13 +1115,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) continue; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { + i915_gem_clflush_object(obj, 0); + obj->base.write_domain = 0; + } + ret = i915_gem_request_await_object (req, obj, obj->base.pending_write_domain); if (ret) return ret; - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj, false); } /* Unconditionally flush any chipset caches (for streaming writes). */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4d67cbb3185c..2806d6b0570d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,6 +37,7 @@ #include "intel_frontbuffer.h" #include #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "intel_dsi.h" #include "i915_trace.h" #include @@ -13188,6 +13189,29 @@ intel_prepare_plane_fb(struct drm_plane *plane, struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; + if (obj) { + if (plane->type == DRM_PLANE_TYPE_CURSOR && + INTEL_INFO(dev_priv)->cursor_needs_physical) { + const int align = IS_I830(dev_priv) ? 16 * 1024 : 256; + + ret = i915_gem_object_attach_phys(obj, align); + if (ret) { + DRM_DEBUG_KMS("failed to attach phys object\n"); + return ret; + } + } else { + struct i915_vma *vma; + + vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + if (IS_ERR(vma)) { + DRM_DEBUG_KMS("failed to pin object\n"); + return PTR_ERR(vma); + } + + to_intel_plane_state(new_state)->vma = vma; + } + } + if (!obj && !old_obj) return 0; @@ -13240,26 +13264,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); } - if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = IS_I830(dev_priv) ? 16 * 1024 : 256; - ret = i915_gem_object_attach_phys(obj, align); - if (ret) { - DRM_DEBUG_KMS("failed to attach phys object\n"); - return ret; - } - } else { - struct i915_vma *vma; - - vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (IS_ERR(vma)) { - DRM_DEBUG_KMS("failed to pin object\n"); - return PTR_ERR(vma); - } - - to_intel_plane_state(new_state)->vma = vma; - } - return 0; } From d59b21ec6f33b6df7933e86b9906e9f4ee9b218d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:49 +0000 Subject: [PATCH 0308/1299] drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Setting retire=true is identical to using origin=ORIGIN_CS, so make the same simplification to intel_fb_obj_flush() as already employed for intel_fb_obj_invalidate(). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++------ drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_frontbuffer.c | 3 +-- drivers/gpu/drm/i915/intel_frontbuffer.h | 8 ++------ 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fad0f5adb970..e29b6f0dda29 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -628,7 +628,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, drm_clflush_virt_range(vaddr, args->size); i915_gem_chipset_flush(to_i915(obj->base.dev)); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); return 0; } @@ -1275,7 +1275,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, user_data += page_length; offset += page_length; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); mutex_lock(&i915->drm.struct_mutex); out_unpin: @@ -1411,7 +1411,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = 0; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -3162,7 +3162,7 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; } @@ -3552,7 +3552,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ __i915_gem_object_flush_for_display(obj); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. @@ -3953,7 +3953,7 @@ frontbuffer_retire(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); - intel_fb_obj_flush(obj, true, ORIGIN_CS); + intel_fb_obj_flush(obj, ORIGIN_CS); } void i915_gem_object_init(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index c5fee02f3173..d925fb582ba7 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -74,7 +74,7 @@ static void __i915_do_clflush(struct drm_i915_gem_object *obj) drm_clflush_sg(obj->mm.pages); obj->cache_dirty = false; - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); } static void i915_clflush_work(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2806d6b0570d..33bde1751679 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14285,7 +14285,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_i915_gem_object *obj = intel_fb_obj(fb); i915_gem_object_flush_if_display(obj); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); return 0; } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 966de4c7c7a2..fcfc217e754e 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -114,13 +114,12 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, } void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - if (retire) { + if (origin == ORIGIN_CS) { spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 7bab41218cf7..63cd9a753a72 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -38,7 +38,6 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, enum fb_op_origin origin, unsigned int frontbuffer_bits); void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits); @@ -69,15 +68,12 @@ static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, /** * intel_fb_obj_flush - flush frontbuffer object * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering * @origin: which operation caused the flush * * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. + * completed and frontbuffer caching can be started again. */ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) { unsigned int frontbuffer_bits; @@ -86,7 +82,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, if (!frontbuffer_bits) return; - __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); + __intel_fb_obj_flush(obj, origin, frontbuffer_bits); } #endif /* __INTEL_FRONTBUFFER_H__ */ From c74fd80f2f41d05f350bb478151021f88551afe8 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 13 Jan 2017 15:07:51 -0500 Subject: [PATCH 0309/1299] xen: do not re-use pirq number cached in pci device msi msg data Revert the main part of commit: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") That commit introduced reading the pci device's msi message data to see if a pirq was previously configured for the device's msi/msix, and re-use that pirq. At the time, that was the correct behavior. However, a later change to Qemu caused it to call into the Xen hypervisor to unmap all pirqs for a pci device, when the pci device disables its MSI/MSIX vectors; specifically the Qemu commit: c976437c7dba9c7444fb41df45468968aaa326ad ("qemu-xen: free all the pirqs for msi/msix when driver unload") Once Qemu added this pirq unmapping, it was no longer correct for the kernel to re-use the pirq number cached in the pci device msi message data. All Qemu releases since 2.1.0 contain the patch that unmaps the pirqs when the pci device disables its MSI/MSIX vectors. This bug is causing failures to initialize multiple NVMe controllers under Xen, because the NVMe driver sets up a single MSIX vector for each controller (concurrently), and then after using that to talk to the controller for some configuration data, it disables the single MSIX vector and re-configures all the MSIX vectors it needs. So the MSIX setup code tries to re-use the cached pirq from the first vector for each controller, but the hypervisor has already given away that pirq to another controller, and its initialization fails. This is discussed in more detail at: https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") Signed-off-by: Dan Streetman Reviewed-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Boris Ostrovsky --- arch/x86/pci/xen.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e1fb269c87af..292ab0364a89 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? From 7ee686034b8b008f19bfd913473acffc1ad51735 Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 22 Feb 2017 22:31:02 -0500 Subject: [PATCH 0310/1299] drm/i915/dp: Ratelimit DP aux timeout messages Right now this is just leaving a lot of spam in dmesg that makes real issues more difficult to debug. As well (as noted by the comment right above the DRM_DEBUG_KMS() call) this is normal behavior when there's nothing connected to the DisplayPort connector. Signed-off-by: Lyude --- drivers/gpu/drm/i915/intel_dp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 024798a9c016..fca0fcade2d6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1121,7 +1121,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, /* Timeouts occur when the device isn't connected, so they're * "normal" -- don't fill the kernel log with these */ if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) { - DRM_DEBUG_KMS("dp_aux_ch timeout status 0x%08x\n", status); + DRM_DEBUG_KMS_RATELIMITED("dp_aux_ch timeout status 0x%08x\n", + status); ret = -ETIMEDOUT; goto out; } From 4509276ee824bb967885c095c610767e42345c36 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 12:47:18 +0000 Subject: [PATCH 0311/1299] drm/i915: Remove Braswell GGTT update w/a Testing with concurrent GGTT accesses no longer show the coherency problems from yonder, commit 5bab6f60cb4d ("drm/i915: Serialise updates to GGTT with access through GGTT on Braswell"). My presumption is that the root cause was more likely fixed by commit 3b5724d702ef ("drm/i915: Wait for writes through the GTT to land before reading back"), along with the use of WC updates to the global gTT in commit 8448661d65f6 ("drm/i915: Convert clflushed pagetables over to WC maps". Given that the original symptoms can no longer be reproduced, time to remove the workaround. Testcase: igt/gem_concurrent_blit Signed-off-by: Chris Wilson Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170220124718.14796-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5162cbc0ea0..24fa262c2b61 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2085,32 +2085,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, ggtt->invalidate(vm->i915); } -struct insert_entries { - struct i915_address_space *vm; - struct sg_table *st; - u64 start; - enum i915_cache_level level; - u32 flags; -}; - -static int gen8_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->st, - arg->start, arg->level, arg->flags); - return 0; -} - -static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct sg_table *st, - u64 start, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, st, start, level, flags }; - stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); -} - static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, @@ -2762,8 +2736,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = gen8_ggtt_clear_range; ggtt->base.insert_entries = gen8_ggtt_insert_entries; - if (IS_CHERRYVIEW(dev_priv)) - ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; ggtt->invalidate = gen6_ggtt_invalidate; From a33fc7a0482a40068c022aefcefd50f9f0f44f87 Mon Sep 17 00:00:00 2001 From: Min He Date: Fri, 17 Feb 2017 16:42:38 +0800 Subject: [PATCH 0312/1299] drm/i915/gvt: enter failsafe mode when guest requires more resources Windows guest will notitfy GVT-g to request more resources through g2v interface, when its resources are not enough. This patch is to handle this case and let vgpu enter failsafe mode to avoid too many error messages. Signed-off-by: Min He Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 48ef0771d772..a41b322d8957 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -453,6 +453,7 @@ struct intel_gvt_ops { enum { GVT_FAILSAFE_UNSUPPORTED_GUEST, + GVT_FAILSAFE_INSUFFICIENT_RESOURCE, }; #include "mpt.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index fd7e789a72c3..e1a382645112 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -157,6 +157,8 @@ static void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason) case GVT_FAILSAFE_UNSUPPORTED_GUEST: pr_err("Detected your guest driver doesn't support GVT-g.\n"); break; + case GVT_FAILSAFE_INSUFFICIENT_RESOURCE: + pr_err("Graphics resource is not enough for the guest\n"); default: break; } @@ -1106,6 +1108,9 @@ static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, case _vgtif_reg(execlist_context_descriptor_lo): case _vgtif_reg(execlist_context_descriptor_hi): break; + case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]): + enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE); + break; default: gvt_err("invalid pvinfo write offset %x bytes %x data %x\n", offset, bytes, data); From 9272f73f79bd780502134f227fa52fd280ecda17 Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Fri, 17 Feb 2017 19:29:52 +0800 Subject: [PATCH 0313/1299] drm/i915/gvt: add a NULL pointer check to avoid kernel panic Due to the request replay, context switch interrupt may come after gvt free the workload thus can cause a kernel NULL pointer kernel panic. This patch will add a simple check to avoid this for a short term. From long term, gvt workload lifecycle doesn't match with i915 request and need to find a proper way to manage this. v4: simplify the NULL pointer check. v5: add unlikely to optimize. Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d6b6d0efdd1a..e355a82ccabd 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -139,6 +139,9 @@ static int shadow_context_status_change(struct notifier_block *nb, struct intel_vgpu_workload *workload = scheduler->current_workload[req->engine->id]; + if (unlikely(!workload)) + return NOTIFY_OK; + switch (action) { case INTEL_CONTEXT_SCHEDULE_IN: intel_gvt_load_render_mmio(workload->vgpu, From 593e59b4b941910e4f7ba87d3c02d63e38e960d0 Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Mon, 20 Feb 2017 15:51:13 +0800 Subject: [PATCH 0314/1299] drm/i915/gvt: fix unhandled mmio warnings some registers were missing or treated as BDW only. This patch is to fix it avoid unhandled mmio wanrings v2: update commit message according to zhenyu's comment Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index e1a382645112..e1abece853d4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1519,6 +1519,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL); @@ -2390,9 +2392,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW); - MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW); - MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW); + MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW_PLUS); + MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW_PLUS); + MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS); MMIO_D(WM_MISC, D_BDW); MMIO_D(BDW_EDP_PSR_BASE, D_BDW); @@ -2406,7 +2408,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS); MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS); - MMIO_D(0xfdc, D_BDW); + MMIO_D(0xfdc, D_BDW_PLUS); MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS); MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS); @@ -2423,6 +2425,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_D(0x22040, D_BDW_PLUS); + MMIO_D(0x44484, D_BDW_PLUS); + MMIO_D(0x4448c, D_BDW_PLUS); + MMIO_D(0x83a4, D_BDW); MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS); @@ -2668,6 +2674,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); MMIO_D(0x44500, D_SKL); + MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS); return 0; } From d8e9b2b9097c117880dc22933239d05199c60b96 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 20 Feb 2017 14:58:25 +0100 Subject: [PATCH 0315/1299] drm/i915/gvt: Fix superfluous newline in GVT_DISPLAY_READY env var send_display_send_uevent() sends two environment variable, and the first one GVT_DISPLAY_READY is set including a new line at the end of the string; that is obviously superfluous and wrong -- at least, it *looks* so when you only read the code. However, it doesn't appear in the actual output by a (supposedly unexpected) trick. The code uses snprintf() and truncates the string in size 20 bytes. This makes the string as GVT_DISPLAY_READY=0 or ...=1 including the trailing NUL-letter. That is, the '\n' found in the format string is always cut off as a result. Although the code gives the correct result, it is confusing. This patch addresses it, just removing the superfluous '\n' from the format string for avoiding further confusion. If the argument "ready" were not a bool, the size 20 should be corrected as well. But it's a bool, so we can leave the magic number 20 as is for now. FWIW, the bug was spotted by a new GCC7 warning: drivers/gpu/drm/i915/gvt/handlers.c: In function 'pvinfo_mmio_write': drivers/gpu/drm/i915/gvt/handlers.c:1042:34: error: 'snprintf' output truncated before the last format character [-Werror=format-truncation=] snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready); ^~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gvt/handlers.c:1042:2: note: 'snprintf' output 21 bytes into a destination of size 20 snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fixes: 04d348ae3f0a ("drm/i915/gvt: vGPU display virtualization") Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1025903 Reported-by: Richard Biener Cc: Signed-off-by: Takashi Iwai Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index e1abece853d4..a1880424ad32 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1069,7 +1069,7 @@ static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready) char vmid_str[20]; char display_ready_str[20]; - snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d\n", ready); + snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d", ready); env[0] = display_ready_str; snprintf(vmid_str, 20, "VMID=%d", vgpu->id); From 4c4b22abb3d405c5c194b02255eecc1a9eff42cf Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 21 Feb 2017 09:39:00 +0800 Subject: [PATCH 0316/1299] drm/i915/gvt: add more registers to context save/restore list the value of those registers should be applied to hardware on context restoring Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index 2b3a642284b6..73f052a4f424 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -53,6 +53,14 @@ static struct render_mmio gen8_render_mmio_list[] = { {RCS, _MMIO(0x24d4), 0, false}, {RCS, _MMIO(0x24d8), 0, false}, {RCS, _MMIO(0x24dc), 0, false}, + {RCS, _MMIO(0x24e0), 0, false}, + {RCS, _MMIO(0x24e4), 0, false}, + {RCS, _MMIO(0x24e8), 0, false}, + {RCS, _MMIO(0x24ec), 0, false}, + {RCS, _MMIO(0x24f0), 0, false}, + {RCS, _MMIO(0x24f4), 0, false}, + {RCS, _MMIO(0x24f8), 0, false}, + {RCS, _MMIO(0x24fc), 0, false}, {RCS, _MMIO(0x7004), 0xffff, true}, {RCS, _MMIO(0x7008), 0xffff, true}, {RCS, _MMIO(0x7000), 0xffff, true}, @@ -76,6 +84,14 @@ static struct render_mmio gen9_render_mmio_list[] = { {RCS, _MMIO(0x24d4), 0, false}, {RCS, _MMIO(0x24d8), 0, false}, {RCS, _MMIO(0x24dc), 0, false}, + {RCS, _MMIO(0x24e0), 0, false}, + {RCS, _MMIO(0x24e4), 0, false}, + {RCS, _MMIO(0x24e8), 0, false}, + {RCS, _MMIO(0x24ec), 0, false}, + {RCS, _MMIO(0x24f0), 0, false}, + {RCS, _MMIO(0x24f4), 0, false}, + {RCS, _MMIO(0x24f8), 0, false}, + {RCS, _MMIO(0x24fc), 0, false}, {RCS, _MMIO(0x7004), 0xffff, true}, {RCS, _MMIO(0x7008), 0xffff, true}, {RCS, _MMIO(0x7000), 0xffff, true}, From e6cedfea6b8dcb205f75ea632570f52d2ffd1251 Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 21 Feb 2017 10:38:53 +0800 Subject: [PATCH 0317/1299] drm/i915/gvt: force-nopriv register handling add a whitelist to check the content of force-nonpriv registers v3: per He Min's comment, modify in_whitelist()'s return type to bool, and use negative value as the return value for failure for force_nonpriv_write(). v2: 1. split a big patch into two smaller ones per zhenyu's comment. this patch is the mmio handling part for force-nopriv registers 2. per zhenyu's comment, combine all non-priv registers into a single MMIO_DFH entry Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 74 +++++++++++++++++++++++++++-- 1 file changed, 70 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a1880424ad32..af0c0d1ec9a8 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -398,6 +398,74 @@ static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, return 0; } +/* ascendingly sorted */ +static i915_reg_t force_nonpriv_white_list[] = { + GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec) + GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248) + GEN8_CS_CHICKEN1,//_MMIO(0x2580) + _MMIO(0x2690), + _MMIO(0x2694), + _MMIO(0x2698), + _MMIO(0x4de0), + _MMIO(0x4de4), + _MMIO(0x4dfc), + GEN7_COMMON_SLICE_CHICKEN1,//_MMIO(0x7010) + _MMIO(0x7014), + HDC_CHICKEN0,//_MMIO(0x7300) + GEN8_HDC_CHICKEN1,//_MMIO(0x7304) + _MMIO(0x7700), + _MMIO(0x7704), + _MMIO(0x7708), + _MMIO(0x770c), + _MMIO(0xb110), + GEN8_L3SQCREG4,//_MMIO(0xb118) + _MMIO(0xe100), + _MMIO(0xe18c), + _MMIO(0xe48c), + _MMIO(0xe5f4), +}; + +/* a simple bsearch */ +static inline bool in_whitelist(unsigned int reg) +{ + int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list); + i915_reg_t *array = force_nonpriv_white_list; + + while (left < right) { + int mid = (left + right)/2; + + if (reg > array[mid].reg) + left = mid + 1; + else if (reg < array[mid].reg) + right = mid; + else + return true; + } + return false; +} + +static int force_nonpriv_write(struct intel_vgpu *vgpu, + unsigned int offset, void *p_data, unsigned int bytes) +{ + u32 reg_nonpriv = *(u32 *)p_data; + int ret = -EINVAL; + + if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", + vgpu->id, offset, bytes); + return ret; + } + + if (in_whitelist(reg_nonpriv)) { + ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, + bytes); + } else { + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", + vgpu->id, reg_nonpriv); + } + return ret; +} + static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { @@ -2420,10 +2488,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0xb10c, D_BDW); MMIO_D(0xb110, D_BDW); - MMIO_DFH(0x24d0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24d4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, + NULL, force_nonpriv_write); MMIO_D(0x22040, D_BDW_PLUS); MMIO_D(0x44484, D_BDW_PLUS); From 187447a106fc9caca45f10413845678d3666556c Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Tue, 21 Feb 2017 21:58:14 +0800 Subject: [PATCH 0318/1299] drm/i915/gvt: add cmd_access to GEN7_HALF_SLICE_CHICKEN1 Linux guest is using this MMIO in lri command. Add cmd_access flag for this mmio in gvt to avoid error log. v2: change the mmio address to its macro name Signed-off-by: Pei Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index af0c0d1ec9a8..bfe12ddb0210 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1607,7 +1607,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x243c, D_ALL); MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0xe100, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* display */ MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL); From ec162aa84c9057e196f3a844c0a8d569f5095e6c Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 21 Feb 2017 14:00:37 +0800 Subject: [PATCH 0319/1299] drm/i915/gvt: set default value to 0 for unhandled mmio regs for a handled mmio reg, its default value is read from hardware, while for an unhandled mmio regs, its default value would be random if not explicitly set to 0 Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/firmware.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c index 1cb29b2d7dc6..933a7c211a1c 100644 --- a/drivers/gpu/drm/i915/gvt/firmware.c +++ b/drivers/gpu/drm/i915/gvt/firmware.c @@ -80,7 +80,7 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt) int ret; size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1; - firmware = vmalloc(size); + firmware = vzalloc(size); if (!firmware) return -ENOMEM; From bab059304314e127cf4a5330c6bd5a71fd27c022 Mon Sep 17 00:00:00 2001 From: "Zhao, Xinda" Date: Tue, 21 Feb 2017 15:07:31 +0800 Subject: [PATCH 0320/1299] drm/i915/gvt: decrease priority of output msg for untracked mmio When untracked mmio is visited, too many log info will be printed out, it may confuse the user, but most of the time, it is not the urgent case, so use gvt_dbg_mmio() instead. Signed-off-by: Zhao, Xinda Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 99abb01fa9eb..60b698cb8365 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -298,7 +298,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa, mmio = intel_gvt_find_mmio_info(gvt, rounddown(offset, 4)); if (!mmio && !vgpu->mmio.disable_warn_untrack) - gvt_err("vgpu%d: write untracked MMIO %x len %d val %x\n", + gvt_dbg_mmio("vgpu%d: write untracked MMIO %x len %d val %x\n", vgpu->id, offset, bytes, *(u32 *)p_data); if (!intel_gvt_mmio_is_unalign(gvt, offset)) { From da9cc8de22aa6bd6ed51c406432d599ab520a6e3 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Tue, 21 Feb 2017 15:52:56 +0800 Subject: [PATCH 0321/1299] drm/i915/gvt: clear the vGPU reset logic Releasing shadow PPGTT pages is not enough when vGPU reset, the guest page table tracking data should has same life-cycle with all the shadow PPGTT pages; Otherwise there is no chance to re-shadow the PPGTT pages without free the guest page table tracking data. This patch clear the PPGTT reset logic and make the vGPU reset in working order. v2: refactor some logic to avoid code duplicated. v3: remove useless macro and add comments from Christophe. v4: keep reset logic in reset function. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 35 ++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 28c92346db0e..b5c833287d39 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -2015,6 +2015,22 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return create_scratch_page_tree(vgpu); } +static void intel_vgpu_free_mm(struct intel_vgpu *vgpu, int type) +{ + struct list_head *pos, *n; + struct intel_vgpu_mm *mm; + + list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { + mm = container_of(pos, struct intel_vgpu_mm, list); + if (mm->type == type) { + vgpu->gvt->gtt.mm_free_page_table(mm); + list_del(&mm->list); + list_del(&mm->lru_list); + kfree(mm); + } + } +} + /** * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization * @vgpu: a vGPU @@ -2027,19 +2043,11 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) */ void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu) { - struct list_head *pos, *n; - struct intel_vgpu_mm *mm; - ppgtt_free_all_shadow_page(vgpu); release_scratch_page_tree(vgpu); - list_for_each_safe(pos, n, &vgpu->gtt.mm_list_head) { - mm = container_of(pos, struct intel_vgpu_mm, list); - vgpu->gvt->gtt.mm_free_page_table(mm); - list_del(&mm->list); - list_del(&mm->lru_list); - kfree(mm); - } + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_GGTT); } static void clean_spt_oos(struct intel_gvt *gvt) @@ -2322,6 +2330,13 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu, bool dmlr) int i; ppgtt_free_all_shadow_page(vgpu); + + /* Shadow pages are only created when there is no page + * table tracking data, so remove page tracking data after + * removing the shadow pages. + */ + intel_vgpu_free_mm(vgpu, INTEL_GVT_MM_PPGTT); + if (!dmlr) return; From d8a355be0b2b5613f7b34aee1394369d45d50586 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Wed, 22 Feb 2017 11:03:24 +0800 Subject: [PATCH 0322/1299] drm/i915/gvt: refine pcode write emulation In GVT-g we always emulate as pcode read/write success and ready for access anytime, since we don't touch real physical registers here. Add 'SKL_PCODE_CDCLK_CONTROL' write emulation, without it will cause skl_set_cdclk fail in guest. Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index bfe12ddb0210..f89b183488e9 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1315,6 +1315,9 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, else *data0 = 0x61514b3d; break; + case SKL_PCODE_CDCLK_CONTROL: + *data0 = SKL_CDCLK_READY_FOR_CHANGE; + break; case 0x5: *data0 |= 0x1; break; @@ -1322,8 +1325,13 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, gvt_dbg_core("VM(%d) write %x to mailbox, return data0 %x\n", vgpu->id, value, *data0); - - value &= ~(1 << 31); + /** + * PCODE_READY clear means ready for pcode read/write, + * PCODE_ERROR_MASK clear means no error happened. In GVT-g we + * always emulate as pcode read/write success and ready for access + * anytime, since we don't touch real physical registers here. + */ + value &= ~(GEN6_PCODE_READY | GEN6_PCODE_ERROR_MASK); return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes); } From 7c28135c77414327523b89bfc3f13096e095f5ac Mon Sep 17 00:00:00 2001 From: "Zhao, Xinda" Date: Tue, 21 Feb 2017 15:54:56 +0800 Subject: [PATCH 0323/1299] drm/i915/gvt: remove unnecessary error msg from gtt write The guest VM may initialize the whole GTT table during boot up, so the warning msg in emulate_gtt_mmio_write is not necessary, it is the expected behavior and it may confuse the user if error msg is printed out, so remove the msg from emulate_gtt_mmio_write(), Signed-off-by: Zhao, Xinda Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index b5c833287d39..6a5ff23ded90 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1825,11 +1825,8 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, gma = g_gtt_index << GTT_PAGE_SHIFT; /* the VM may configure the whole GM space when ballooning is used */ - if (WARN_ONCE(!vgpu_gmadr_is_valid(vgpu, gma), - "vgpu%d: found oob ggtt write, offset %x\n", - vgpu->id, off)) { + if (!vgpu_gmadr_is_valid(vgpu, gma)) return 0; - } ggtt_get_guest_entry(ggtt_mm, &e, g_gtt_index); From 191020b670f84f5e2edfaa906c3801df20485610 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Thu, 23 Feb 2017 14:46:23 +0800 Subject: [PATCH 0324/1299] drm/i915/gvt: adjust to fixed vGPU types Previous vGPU type create tried to determine vGPU type name e.g _1, _2 based on the number of mdev devices can be created, but different type might have very different resource size depending on physical device. We need to split type name vs. actual mdev resource and create fixed vGPU type with determined size for consistence. With this we'd like to fix vGPU types for _1, _2, _4 and _8 now, each type has fixed defined resource size. Available mdev instances that could be created is determined by physical resource, and user should query for that before creating. Cc: Kevin Tian Reviewed-by: Kevin Tian Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 1 - drivers/gpu/drm/i915/gvt/vgpu.c | 58 +++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index a41b322d8957..faff044c6434 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -212,7 +212,6 @@ struct intel_gvt_opregion { #define NR_MAX_INTEL_VGPU_TYPES 20 struct intel_vgpu_type { char name[16]; - unsigned int max_instance; unsigned int avail_instance; unsigned int low_gm_size; unsigned int high_gm_size; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index dcfcce1dc00e..c27c13c3cc48 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -64,6 +64,19 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu) WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE); } +static struct { + unsigned int low_mm; + unsigned int high_mm; + unsigned int fence; + char *name; +} vgpu_types[] = { +/* Fixed vGPU type table */ + { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" }, + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" }, + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" }, + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" }, +}; + /** * intel_gvt_init_vgpu_types - initialize vGPU type list * @gvt : GVT device @@ -78,9 +91,8 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) unsigned int min_low; /* vGPU type name is defined as GVTg_Vx_y which contains - * physical GPU generation type and 'y' means maximum vGPU - * instances user can create on one physical GPU for this - * type. + * physical GPU generation type (e.g V4 as BDW server, V5 as + * SKL server). * * Depend on physical SKU resource, might see vGPU types like * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create @@ -92,7 +104,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) */ low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE; high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE; - num_types = 4; + num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]); gvt->types = kzalloc(num_types * sizeof(struct intel_vgpu_type), GFP_KERNEL); @@ -101,25 +113,24 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) min_low = MB_TO_BYTES(32); for (i = 0; i < num_types; ++i) { - if (low_avail / min_low == 0) + if (low_avail / vgpu_types[i].low_mm == 0) break; - gvt->types[i].low_gm_size = min_low; - gvt->types[i].high_gm_size = max((min_low<<3), MB_TO_BYTES(384U)); - gvt->types[i].fence = 4; - gvt->types[i].max_instance = min(low_avail / min_low, - high_avail / gvt->types[i].high_gm_size); - gvt->types[i].avail_instance = gvt->types[i].max_instance; + + gvt->types[i].low_gm_size = vgpu_types[i].low_mm; + gvt->types[i].high_gm_size = vgpu_types[i].high_mm; + gvt->types[i].fence = vgpu_types[i].fence; + gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, + high_avail / vgpu_types[i].high_mm); if (IS_GEN8(gvt->dev_priv)) - sprintf(gvt->types[i].name, "GVTg_V4_%u", - gvt->types[i].max_instance); + sprintf(gvt->types[i].name, "GVTg_V4_%s", + vgpu_types[i].name); else if (IS_GEN9(gvt->dev_priv)) - sprintf(gvt->types[i].name, "GVTg_V5_%u", - gvt->types[i].max_instance); + sprintf(gvt->types[i].name, "GVTg_V5_%s", + vgpu_types[i].name); - min_low <<= 1; - gvt_dbg_core("type[%d]: %s max %u avail %u low %u high %u fence %u\n", - i, gvt->types[i].name, gvt->types[i].max_instance, + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, gvt->types[i].high_gm_size, gvt->types[i].fence); @@ -138,7 +149,7 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) { int i; unsigned int low_gm_avail, high_gm_avail, fence_avail; - unsigned int low_gm_min, high_gm_min, fence_min, total_min; + unsigned int low_gm_min, high_gm_min, fence_min; /* Need to depend on maxium hw resource size but keep on * static config for now. @@ -154,12 +165,11 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt) low_gm_min = low_gm_avail / gvt->types[i].low_gm_size; high_gm_min = high_gm_avail / gvt->types[i].high_gm_size; fence_min = fence_avail / gvt->types[i].fence; - total_min = min(min(low_gm_min, high_gm_min), fence_min); - gvt->types[i].avail_instance = min(gvt->types[i].max_instance, - total_min); + gvt->types[i].avail_instance = min(min(low_gm_min, high_gm_min), + fence_min); - gvt_dbg_core("update type[%d]: %s max %u avail %u low %u high %u fence %u\n", - i, gvt->types[i].name, gvt->types[i].max_instance, + gvt_dbg_core("update type[%d]: %s avail %u low %u high %u fence %u\n", + i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, gvt->types[i].high_gm_size, gvt->types[i].fence); } From 19c3164db457e0fc65d4501fd354506228576241 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:57 +0200 Subject: [PATCH 0325/1299] drm/i915/glk: Fix watermark computations for third sprite plane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake has a third sprite plane (or fourth universal plane) that is independent from the cursor. Make sure that for_each_plane_id_on_crtc() is aware of that extra plane so that the watermark code takes it into account. Fixes: e9c9882556fc ("drm/i915/glk: Configure number of sprite planes properly") Cc: Ander Conselvan de Oliveira Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 70fc2fa0eaa8..e89f56476759 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -293,6 +293,7 @@ enum plane_id { PLANE_PRIMARY, PLANE_SPRITE0, PLANE_SPRITE1, + PLANE_SPRITE2, PLANE_CURSOR, I915_MAX_PLANES, }; From 5b7280f03cdcfcc8af7f087771bb3e49c4caa81b Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:58 +0200 Subject: [PATCH 0326/1299] drm/i915/glk: Fix maximum scaling factor for Geminilake scalers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake can output two pixels per clock, and that affects the maximum scaling factor for its scalers. Take that into account and avoid the following warning: WARNING: CPU: 1 PID: 593 at drivers/gpu/drm/i915/intel_display.c:13223 skl_max_scale.part.129+0x78/0x80 [i915] WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock) Modules linked in: x86_pkg_temp_thermal i915 coretemp kvm_intel kvm i2c_algo_bit drm_kms_helper irqbypass crct10dif_pclmul prime_numbers crc32_pclmul drm ghash_clmulni_intel shpchp tpm_tis tpm_tis_core tpm nfsd authw CPU: 1 PID: 593 Comm: kworker/u8:3 Tainted: G W 4.10.0-rc8ander+ #330 Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0035.B33.1702150552 02/15/2017 Workqueue: events_unbound async_run_entry_fn Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5f/0x80 skl_max_scale.part.129+0x78/0x80 [i915] intel_check_primary_plane+0xa6/0xc0 [i915] intel_plane_atomic_check_with_state+0xd1/0x1a0 [i915] ? drm_printk+0xb5/0xc0 [drm] intel_plane_atomic_check+0x3d/0x80 [i915] drm_atomic_helper_check_planes+0x7c/0x200 [drm_kms_helper] intel_atomic_check+0xa5b/0x11a0 [i915] drm_atomic_check_only+0x353/0x600 [drm] ? drm_atomic_add_affected_connectors+0x10c/0x120 [drm] drm_atomic_commit+0x18/0x50 [drm] restore_fbdev_mode+0x14c/0x2a0 [drm_kms_helper] drm_fb_helper_restore_fbdev_mode_unlocked+0x34/0x80 [drm_kms_helper] drm_fb_helper_set_par+0x2d/0x60 [drm_kms_helper] intel_fbdev_set_par+0x1a/0x70 [i915] fbcon_init+0x582/0x610 visual_init+0xd6/0x130 do_bind_con_driver+0x1da/0x3c0 do_take_over_console+0x116/0x180 do_fbcon_takeover+0x5c/0xb0 fbcon_event_notify+0x772/0x8a0 ? __blocking_notifier_call_chain+0x35/0x70 notifier_call_chain+0x4a/0x70 __blocking_notifier_call_chain+0x4d/0x70 blocking_notifier_call_chain+0x16/0x20 fb_notifier_call_chain+0x1b/0x20 register_framebuffer+0x278/0x360 drm_fb_helper_initial_config+0x253/0x440 [drm_kms_helper] intel_fbdev_initial_config+0x18/0x30 [i915] async_run_entry_fn+0x39/0x170 process_one_work+0x212/0x670 ? process_one_work+0x197/0x670 worker_thread+0x4e/0x490 kthread+0x101/0x140 ? process_one_work+0x670/0x670 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x31/0x40 v2: s/max_pixclk/max_dotclk/ (Ville) Cc: Rodrigo Vivi Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 33bde1751679..1e0280e5c9c7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13291,16 +13291,22 @@ intel_cleanup_plane_fb(struct drm_plane *plane, int skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv; int max_scale; - int crtc_clock, cdclk; + int crtc_clock, max_dotclk; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; - crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; + dev_priv = to_i915(intel_crtc->base.dev); - if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock)) + crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; + max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; + + if (IS_GEMINILAKE(dev_priv)) + max_dotclk *= 2; + + if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) return DRM_PLANE_HELPER_NO_SCALING; /* @@ -13309,7 +13315,8 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state * or * cdclk/crtc_clock */ - max_scale = min((1 << 16) * 3 - 1, (1 << 8) * ((cdclk << 8) / crtc_clock)); + max_scale = min((1 << 16) * 3 - 1, + (1 << 8) * ((max_dotclk << 8) / crtc_clock)); return max_scale; } From 6ebc69238df36bc61380a68f607a1c2039af134f Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:59 +0200 Subject: [PATCH 0327/1299] drm/i915/glk: Pass dev_priv to intel_atomic_setup_scalers() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass dev_priv to intel_atomic_setup_scalers(). The next patch will need a dev_priv pointer. Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-4-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 10 +++++----- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index aa9160e7f1d8..58916e32c6c4 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -121,7 +121,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, /** * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests - * @dev: DRM device + * @dev_priv: i915 device * @crtc: intel crtc * @crtc_state: incoming crtc_state to validate and setup scalers * @@ -136,9 +136,9 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, * 0 - scalers were setup succesfully * error code - otherwise */ -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state) { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; @@ -199,7 +199,7 @@ int intel_atomic_setup_scalers(struct drm_device *dev, */ if (!plane) { struct drm_plane_state *state; - plane = drm_plane_from_index(dev, i); + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { DRM_DEBUG_KMS("Failed to add [PLANE:%d] to drm_state\n", diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1e0280e5c9c7..5a91f6741fed 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11036,7 +11036,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, ret = skl_update_scaler_crtc(pipe_config); if (!ret) - ret = intel_atomic_setup_scalers(dev, intel_crtc, + ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, pipe_config); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 11f2e4163bd4..9b5fcc1423eb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1894,9 +1894,9 @@ intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, return to_intel_plane_state(plane_state); } -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state); /* intel_atomic_plane.c */ struct intel_plane_state *intel_create_plane_state(struct drm_plane *plane); From 08f5ba97aa9c975e8b3113620123c25998348dc4 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:16:00 +0200 Subject: [PATCH 0328/1299] drm/i915/glk: Fix Geminilake scalers mode programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake scalers can do 7x7 filtering for all supported input sizes, so it doesn't need the "high quality" mode programming, which was actually removed from that platform. v2: Split dev_priv parameter change out. (Ville) Cc: Ville Syrjälä , Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-5-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index 58916e32c6c4..e65818e5d2ab 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -247,7 +247,9 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, } /* set scaler mode */ - if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { + if (IS_GEMINILAKE(dev_priv)) { + scaler_state->scalers[*scaler_id].mode = 0; + } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* * when only 1 scaler is in use on either pipe A or B, * scaler 0 operates in high quality (HQ) mode. From a5570fe5c2ca012f92666eb790973827554e19df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 11:51:02 +0000 Subject: [PATCH 0329/1299] Revert "drm/i915/dp: Ratelimit DP aux timeout messages" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7ee686034b8b "drm/i915/dp: Ratelimit DP aux timeout messages" as although it successfully squelches the debug messages, when it does so it generates a warning instead. CI lights up orange with all the warnings! In its current incarnation DRM_DEBUG_RATELIMITED is not usable for us, and we need to first teach lib/ratelimit.c not to warn when used for debug messages. Fixes: 7ee686034b8b ("drm/i915/dp: Ratelimit DP aux timeout messages") Signed-off-by: Chris Wilson Cc: Lyude Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/20170223115102.7059-1-chris@chris-wilson.co.uk Acked-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_dp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fca0fcade2d6..024798a9c016 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1121,8 +1121,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, /* Timeouts occur when the device isn't connected, so they're * "normal" -- don't fill the kernel log with these */ if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) { - DRM_DEBUG_KMS_RATELIMITED("dp_aux_ch timeout status 0x%08x\n", - status); + DRM_DEBUG_KMS("dp_aux_ch timeout status 0x%08x\n", status); ret = -ETIMEDOUT; goto out; } From 309663ab7b4f0de1540aff212fd067e3dd92acf3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:07 +0000 Subject: [PATCH 0330/1299] drm/i915: Check against the signaled bit for fences/requests When dma_fence_signal() is called, it sets a flag to indicate the fence is complete. Before the dma_fence is signaled, the seqno check will first be passed. During an unlocked check (such as inside a waiter), it is possible for the fence to be signaled even though the seqno has been reset (by engine wraparound). In this case the waiter will be kicked, but for an extra layer of protection we can check the persistent signaled bit from the fence. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e89f56476759..9220a28fbaa8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4013,6 +4013,15 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + /* Note that the engine may have wrapped around the seqno, and + * so our request->global_seqno will be ahead of the hardware, + * even though it completed the request before wrapping. We catch + * this by kicking all the waiters before resetting the seqno + * in hardware, and also signal the fence. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + return true; + /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ From 9b6586ae9f6bf2115cf414d6004bbe69a4255692 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:08 +0000 Subject: [PATCH 0331/1299] drm/i915: Keep a global seqno per-engine Replace the global device seqno with one for each engine, and account for in-flight seqno on each separately. This is consistent with dma-fence as each timeline has separate fence-contexts for each engine and a seqno is only ordered within a fence-context (i.e. seqno do not need to be ordered wrt to other engines, just ordered within a single engine). This is required to enable request rewinding for preemption on individual engines (we have to rewind the global seqno to avoid overflow, and we do not have to rewind all engines just to preempt one.) v2: Rename active_seqno to inflight_seqnos to more clearly indicate that it is a counter and not equivalent to the existing seqno. Update functions that operated on active_seqno similarly. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 11 +--- drivers/gpu/drm/i915/i915_gem_request.c | 83 +++++++++++++----------- drivers/gpu/drm/i915/i915_gem_request.h | 8 +-- drivers/gpu/drm/i915/i915_gem_timeline.h | 9 ++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 35 +++++----- drivers/gpu/drm/i915/intel_engine_cs.c | 2 - drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +- 7 files changed, 71 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 655e60d609c2..1a28b5279bec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1079,15 +1079,6 @@ static const struct file_operations i915_error_state_fops = { }; #endif -static int -i915_next_seqno_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); - return 0; -} - static int i915_next_seqno_set(void *data, u64 val) { @@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, - i915_next_seqno_get, i915_next_seqno_set, + NULL, i915_next_seqno_set, "0x%llx\n"); static int i915_frequency_info(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 12b06e055490..b94ae6cf9837 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static void unreserve_seqno(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + void i915_gem_retire_noop(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) &request->i915->gt.idle_work, msecs_to_jiffies(100)); } + unreserve_seqno(request->engine); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct i915_gem_timeline *timeline = &i915->gt.global_timeline; struct intel_engine_cs *engine; @@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) GEM_BUG_ON(i915->gt.active_requests > 1); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { - while (intel_breadcrumbs_busy(i915)) - cond_resched(); /* spin until threads are complete */ - } - atomic_set(&timeline->seqno, seqno); + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; - /* Finally reset hw state */ - for_each_engine(engine, i915, id) + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; intel_engine_init_global_seqno(engine, seqno); + } list_for_each_entry(timeline, &i915->gt.timelines, link) { for_each_engine(engine, i915, id) { @@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) /* HWS page needs to be set less than what we * will inject to ring */ - return i915_gem_init_global_seqno(dev_priv, seqno - 1); + return reset_all_global_seqno(dev_priv, seqno - 1); } -static int reserve_global_seqno(struct drm_i915_private *i915) +static int reserve_seqno(struct intel_engine_cs *engine) { - u32 active_requests = ++i915->gt.active_requests; - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; int ret; /* Reservation is fine until we need to wrap around */ - if (likely(seqno + active_requests > seqno)) + if (likely(!add_overflows(seqno, active))) return 0; - ret = i915_gem_init_global_seqno(i915, 0); + /* Even though we are tracking inflight seqno individually on each + * engine, other engines may be observing us using hw semaphores and + * so we need to idle all engines before wrapping around this engine. + * As all engines are then idle, we can reset the seqno on all, so + * we don't stall in quick succession if each engine is being + * similarly utilized. + */ + ret = reset_all_global_seqno(engine->i915, 0); if (ret) { - i915->gt.active_requests--; + engine->timeline->inflight_seqnos--; return ret; } return 0; } -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) +static u32 timeline_get_seqno(struct intel_timeline *tl) { - /* seqno only incremented under a mutex */ - return ++tl->seqno.counter; -} - -static u32 timeline_get_seqno(struct i915_gem_timeline *tl) -{ - return atomic_inc_return(&tl->seqno); + return ++tl->seqno; } void __i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) GEM_BUG_ON(timeline == request->timeline); assert_spin_locked(&timeline->lock); - seqno = timeline_get_seqno(timeline->common); + seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); - request->previous_seqno = timeline->last_submitted_seqno; - timeline->last_submitted_seqno = seqno; - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; @@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + ret = reserve_seqno(engine); if (ret) goto err_unpin; @@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - __timeline_get_seqno(req->timeline->common)); + timeline_get_seqno(req->timeline)); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); @@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, */ req->head = req->ring->tail; + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; err_ctx: @@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, kmem_cache_free(dev_priv->requests, req); err_unreserve: - dev_priv->gt.active_requests--; + unreserve_seqno(engine); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); @@ -836,8 +846,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * our i915_gem_request_alloc() and called __i915_add_request() before * us, the timeline will hold its seqno which is later than ours. */ - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); + GEM_BUG_ON(timeline->seqno != request->fence.seqno); /* * To ensure that this call will not fail, space for its emissions @@ -891,16 +900,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) list_add_tail(&request->link, &timeline->requests); spin_unlock_irq(&timeline->lock); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); - - timeline->last_submitted_seqno = request->fence.seqno; + GEM_BUG_ON(timeline->seqno != request->fence.seqno); i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); request->emitted_jiffies = jiffies; - i915_gem_mark_busy(engine); + if (!request->i915->gt.active_requests++) + i915_gem_mark_busy(engine); /* Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..9049936c571c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -145,12 +145,6 @@ struct drm_i915_gem_request { u32 global_seqno; - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - /** Position in the ring of the start of the request */ u32 head; @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req) { GEM_BUG_ON(!req->global_seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); + req->global_seqno - 1); } static inline bool diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index f2e51f42cc2f..6c53e14cab2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -33,7 +33,13 @@ struct i915_gem_timeline; struct intel_timeline { u64 fence_context; - u32 last_submitted_seqno; + u32 seqno; + + /** + * Count of outstanding requests, from the time they are constructed + * to the moment they are retired. Loosely coupled to hardware. + */ + u32 inflight_seqnos; spinlock_t lock; @@ -56,7 +62,6 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - atomic_t seqno; struct drm_i915_private *i915; const char *name; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5932e2dc0c6f..4f4e703d1b14 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool busy = false; - for_each_engine(engine, i915, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; + spin_lock_irq(&b->lock); - spin_lock_irq(&b->lock); - - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); - mask |= intel_engine_flag(engine); - } - - if (b->first_signal) { - wake_up_process(b->signaler); - mask |= intel_engine_flag(engine); - } - - spin_unlock_irq(&b->lock); + if (b->first_wait) { + wake_up_process(b->first_wait->tsk); + busy |= intel_engine_flag(engine); } - return mask; + if (b->first_signal) { + wake_up_process(b->signaler); + busy |= intel_engine_flag(engine); + } + + spin_unlock_irq(&b->lock); + + return busy; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 46c94740be53..c4d4698f98e7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) engine->irq_seqno_barrier(engine); GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->timeline->last_submitted_seqno = seqno; - engine->hangcheck.seqno = seqno; /* After manually advancing the seqno, fake the interrupt in case diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4091c97be6ec..b91c2c7ef5a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->last_submitted_seqno); + return READ_ONCE(engine->timeline->seqno); } int init_workarounds_ring(struct intel_engine_cs *engine); @@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { From 12d3173b2e55b4abeb6710dbd7b823f5241c4a45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:09 +0000 Subject: [PATCH 0332/1299] drm/i915: Move reserve_seqno() next to unreserve_seqno() Move the companion functions next to each other. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 161 ++++++++++++------------ 1 file changed, 77 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b94ae6cf9837..f33cfc19550c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -198,6 +198,83 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +{ + struct i915_gem_timeline *timeline = &i915->gt.global_timeline; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + /* Carefully retire all requests without writing to the rings */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests > 1); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; + intel_engine_init_global_seqno(engine, seqno); + } + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); + } + } + + return 0; +} + +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + return reset_all_global_seqno(dev_priv, seqno - 1); +} + +static int reserve_seqno(struct intel_engine_cs *engine) +{ + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; + int ret; + + /* Reservation is fine until we need to wrap around */ + if (likely(!add_overflows(seqno, active))) + return 0; + + ret = reset_all_global_seqno(engine->i915, 0); + if (ret) { + engine->timeline->inflight_seqnos--; + return ret; + } + + return 0; +} + static void unreserve_seqno(struct intel_engine_cs *engine) { GEM_BUG_ON(!engine->timeline->inflight_seqnos); @@ -314,90 +391,6 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) -{ - struct i915_gem_timeline *timeline = &i915->gt.global_timeline; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests > 1); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - - if (!i915_seqno_passed(seqno, tl->seqno)) { - /* spin until threads are complete */ - while (intel_breadcrumbs_busy(engine)) - cond_resched(); - } - - /* Finally reset hw state */ - tl->seqno = seqno; - intel_engine_init_global_seqno(engine, seqno); - } - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - - memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); - } - } - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - return reset_all_global_seqno(dev_priv, seqno - 1); -} - -static int reserve_seqno(struct intel_engine_cs *engine) -{ - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; - int ret; - - /* Reservation is fine until we need to wrap around */ - if (likely(!add_overflows(seqno, active))) - return 0; - - /* Even though we are tracking inflight seqno individually on each - * engine, other engines may be observing us using hw semaphores and - * so we need to idle all engines before wrapping around this engine. - * As all engines are then idle, we can reset the seqno on all, so - * we don't stall in quick succession if each engine is being - * similarly utilized. - */ - ret = reset_all_global_seqno(engine->i915, 0); - if (ret) { - engine->timeline->inflight_seqnos--; - return ret; - } - - return 0; -} - static u32 timeline_get_seqno(struct intel_timeline *tl) { return ++tl->seqno; From 4b36b2e506779008c302784437dc102d1ccc7537 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:10 +0000 Subject: [PATCH 0333/1299] drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Use a local variable to avoid having to type out the full name of the gpu_error wait_queue. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f33cfc19550c..a40b825763a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1077,6 +1077,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; DEFINE_WAIT(reset); struct intel_wait wait; @@ -1112,7 +1113,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, set_current_state(state); if (flags & I915_WAIT_LOCKED) - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + add_wait_queue(errq, &reset); intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) @@ -1163,8 +1164,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, i915_reset_in_progress(&req->i915->gpu_error)) { __set_current_state(TASK_RUNNING); i915_reset(req->i915); - reset_wait_queue(&req->i915->gpu_error.wait_queue, - &reset); + reset_wait_queue(errq, &reset); continue; } @@ -1175,7 +1175,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, intel_engine_remove_wait(req->engine, &wait); if (flags & I915_WAIT_LOCKED) - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + remove_wait_queue(errq, &reset); __set_current_state(TASK_RUNNING); complete: From 7de53bf7e6f3f823267f77b1fe19e6db7532000d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:11 +0000 Subject: [PATCH 0334/1299] drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Add ourselves to the gpu error waitqueue earlier on, even before we determine we have to wait on the seqno. This is so that we can then share the waitqueue between stages in subsequent patches. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a40b825763a3..80142a6027da 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1097,6 +1097,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, trace_i915_gem_request_wait_begin(req, flags); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(errq, &reset); + if (!i915_sw_fence_done(&req->execute)) { timeout = __i915_request_wait_for_execute(req, flags, timeout); if (timeout < 0) @@ -1112,9 +1115,6 @@ long i915_wait_request(struct drm_i915_gem_request *req, goto complete; set_current_state(state); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(errq, &reset); - intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt @@ -1174,11 +1174,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, } intel_engine_remove_wait(req->engine, &wait); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(errq, &reset); __set_current_state(TASK_RUNNING); complete: + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(errq, &reset); trace_i915_gem_request_wait_end(req); return timeout; From 541ca6ed79f0058f8885b2b41409204731946767 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:12 +0000 Subject: [PATCH 0335/1299] drm/i915: Inline __i915_gem_request_wait_for_execute() It had only one callsite and existed to keep the code clearer. Now having shared the wait-on-error between phases and with plans to change the wait-for-execute in the next few patches, remove the out of line wait loop and move it into the main body of i915_wait_request. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 78 +++++++++---------------- 1 file changed, 29 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 80142a6027da..34d4c123546e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1004,54 +1004,6 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } -static long -__i915_request_wait_for_execute(struct drm_i915_gem_request *request, - unsigned int flags, - long timeout) -{ - const int state = flags & I915_WAIT_INTERRUPTIBLE ? - TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *q = &request->i915->gpu_error.wait_queue; - DEFINE_WAIT(reset); - DEFINE_WAIT(wait); - - if (flags & I915_WAIT_LOCKED) - add_wait_queue(q, &reset); - - do { - prepare_to_wait(&request->execute.wait, &wait, state); - - if (i915_sw_fence_done(&request->execute)) - break; - - if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&request->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(request->i915); - reset_wait_queue(q, &reset); - continue; - } - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - finish_wait(&request->execute.wait, &wait); - - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(q, &reset); - - return timeout; -} - /** * i915_wait_request - wait until execution of request has finished * @req: the request to wait upon @@ -1101,7 +1053,35 @@ long i915_wait_request(struct drm_i915_gem_request *req, add_wait_queue(errq, &reset); if (!i915_sw_fence_done(&req->execute)) { - timeout = __i915_request_wait_for_execute(req, flags, timeout); + DEFINE_WAIT(exec); + + do { + prepare_to_wait(&req->execute.wait, &exec, state); + if (i915_sw_fence_done(&req->execute)) + break; + + if (flags & I915_WAIT_LOCKED && + i915_reset_in_progress(&req->i915->gpu_error)) { + __set_current_state(TASK_RUNNING); + i915_reset(req->i915); + reset_wait_queue(errq, &reset); + continue; + } + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + } while (1); + finish_wait(&req->execute.wait, &exec); + if (timeout < 0) goto complete; From fe49789fab974dd04c0fef26a4ce7f7f411b40dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:13 +0000 Subject: [PATCH 0336/1299] drm/i915: Deconstruct execute fence On reflection, we are only using the execute fence as a waitqueue on the global_seqno and not using it for dependency tracking between fences (unlike the submit and dma fences). By only treating it as a waitqueue, we can then treat it similar to the other waitqueues during submit, making the code simpler. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 50 ++++++------------------- drivers/gpu/drm/i915/i915_gem_request.h | 10 +---- 2 files changed, 13 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 34d4c123546e..37ef7084fce5 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -69,7 +69,6 @@ static void i915_fence_release(struct dma_fence *fence) * caught trying to reuse dead objects. */ i915_sw_fence_fini(&req->submit); - i915_sw_fence_fini(&req->execute); kmem_cache_free(req->i915->requests, req); } @@ -294,7 +293,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute)); GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!request->i915->gt.active_requests); @@ -402,6 +400,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; u32 seqno; + trace_i915_gem_request_execute(request); + /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); @@ -426,8 +426,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); - i915_sw_fence_commit(&request->execute); - trace_i915_gem_request_execute(request); + wake_up_all(&request->execute); } void i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -463,24 +462,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static int __i915_sw_fence_call -execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), execute); - - switch (state) { - case FENCE_COMPLETE: - break; - - case FENCE_FREE: - i915_gem_request_put(request); - break; - } - - return NOTIFY_DONE; -} - /** * i915_gem_request_alloc - allocate a request structure * @@ -573,13 +554,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify); - - /* Ensure that the execute fence completes after the submit fence - - * as we complete the execute fence from within the submit fence - * callback, its completion would otherwise be visible first. - */ - i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq); + init_waitqueue_head(&req->execute); i915_priotree_init(&req->priotree); @@ -1031,6 +1006,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; DEFINE_WAIT(reset); + DEFINE_WAIT(exec); struct intel_wait wait; might_sleep(); @@ -1052,12 +1028,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - if (!i915_sw_fence_done(&req->execute)) { - DEFINE_WAIT(exec); - + reset_wait_queue(&req->execute, &exec); + if (!req->global_seqno) { do { - prepare_to_wait(&req->execute.wait, &exec, state); - if (i915_sw_fence_done(&req->execute)) + set_current_state(state); + if (req->global_seqno) break; if (flags & I915_WAIT_LOCKED && @@ -1080,15 +1055,14 @@ long i915_wait_request(struct drm_i915_gem_request *req, timeout = io_schedule_timeout(timeout); } while (1); - finish_wait(&req->execute.wait, &exec); + finish_wait(&req->execute, &exec); if (timeout < 0) goto complete; - GEM_BUG_ON(!i915_sw_fence_done(&req->execute)); + GEM_BUG_ON(!req->global_seqno); } - GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 9049936c571c..467d3e13fce0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -119,18 +119,10 @@ struct drm_i915_gem_request { * The submit fence is used to await upon all of the request's * dependencies. When it is signaled, the request is ready to run. * It is used by the driver to then queue the request for execution. - * - * The execute fence is used to signal when the request has been - * sent to hardware. - * - * It is illegal for the submit fence of one request to wait upon the - * execute fence of an earlier request. It should be sufficient to - * wait upon the submit fence of the earlier request. */ struct i915_sw_fence submit; - struct i915_sw_fence execute; wait_queue_t submitq; - wait_queue_t execq; + wait_queue_head_t execute; /* A list of everyone we wait upon, and everyone who waits upon us. * Even though we will not be submitted to the hardware before the From 754c9fd5764909bc7c3ba9779355d55cb357be8a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:14 +0000 Subject: [PATCH 0337/1299] drm/i915: Protect the request->global_seqno with the engine->timeline lock A request is assigned a global seqno only when it is on the hardware execution queue. The global seqno can be used to maintain a list of requests on the same engine in retirement order, for example for constructing a priority queue for waiting. Prior to its execution, or if it is subsequently removed in the event of preemption, its global seqno is zero. As both insertion and removal from the execution queue may operate in IRQ context, it is not guarded by the usual struct_mutex BKL. Instead those relying on the global seqno must be prepared for its value to change between reads. Only when the request is complete can the global seqno be stable (due to the memory barriers on submitting the commands to the hardware to write the breadcrumb, if the HWS shows that it has passed the global seqno and the global seqno is unchanged after the read, it is indeed complete). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 17 ++++- drivers/gpu/drm/i915/i915_gem.c | 18 +++-- drivers/gpu/drm/i915/i915_gem_request.c | 45 ++++++++----- drivers/gpu/drm/i915/i915_gem_request.h | 66 +++++++++++++++---- drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 39 ++++++++++- .../drm/i915/selftests/intel_breadcrumbs.c | 6 +- 7 files changed, 156 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9220a28fbaa8..e5b34eaf41ca 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4009,9 +4009,10 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + u32 seqno; /* Note that the engine may have wrapped around the seqno, and * so our request->global_seqno will be ahead of the hardware, @@ -4022,10 +4023,20 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) return true; + /* The request was dequeued before we were awoken. We check after + * inspecting the hw to confirm that this was the same request + * that generated the HWS update. The memory barriers within + * the request execution are sufficient to ensure that a check + * after reading the value from hw matches this request. + */ + seqno = i915_gem_request_global_seqno(req); + if (!seqno) + return false; + /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -4076,7 +4087,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) wake_up_process(tsk); rcu_read_unlock(); - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e29b6f0dda29..a8167003c10b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -397,7 +397,7 @@ i915_gem_object_wait_fence(struct dma_fence *fence, if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) { + if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -2608,7 +2608,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request *request, *active = NULL; + unsigned long flags; /* We are called by the error capture and reset at a random * point in time. In particular, note that neither is crucially @@ -2618,17 +2619,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ + spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request)) + if (__i915_gem_request_completed(request, + request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)); - return request; - } - return NULL; + active = request; + break; + } + spin_unlock_irqrestore(&engine->timeline->lock, flags); + + return active; } static bool engine_stalled(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 37ef7084fce5..d394cbe8de11 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -418,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) intel_engine_enable_signaling(request); spin_unlock(&request->lock); - GEM_BUG_ON(!request->global_seqno); engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); @@ -505,7 +504,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); - if (req && __i915_gem_request_completed(req)) + if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); /* Beware: Dragons be flying overhead. @@ -611,6 +610,7 @@ static int i915_gem_request_await_request(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { + u32 seqno; int ret; GEM_BUG_ON(to == from); @@ -633,14 +633,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - if (!from->global_seqno) { + seqno = i915_gem_request_global_seqno(from); + if (!seqno) { ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, GFP_KERNEL); return ret < 0 ? ret : 0; } - if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id]) + if (seqno <= to->timeline->sync_seqno[from->engine->id]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -658,7 +659,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - to->timeline->sync_seqno[from->engine->id] = from->global_seqno; + to->timeline->sync_seqno[from->engine->id] = seqno; return 0; } @@ -938,7 +939,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) } bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) + u32 seqno, int state, unsigned long timeout_us) { struct intel_engine_cs *engine = req->engine; unsigned int irq, cpu; @@ -956,7 +957,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { - if (__i915_gem_request_completed(req)) + if (seqno != i915_gem_request_global_seqno(req)) + break; + + if (i915_seqno_passed(intel_engine_get_seqno(req->engine), + seqno)) return true; /* Seqno are meant to be ordered *before* the interrupt. If @@ -1028,11 +1033,14 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); + intel_wait_init(&wait); + reset_wait_queue(&req->execute, &exec); - if (!req->global_seqno) { + if (!intel_wait_update_request(&wait, req)) { do { set_current_state(state); - if (req->global_seqno) + + if (intel_wait_update_request(&wait, req)) break; if (flags & I915_WAIT_LOCKED && @@ -1060,7 +1068,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (timeout < 0) goto complete; - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); } GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); @@ -1069,7 +1077,6 @@ long i915_wait_request(struct drm_i915_gem_request *req, goto complete; set_current_state(state); - intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a @@ -1090,7 +1097,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, timeout = io_schedule_timeout(timeout); - if (intel_wait_complete(&wait)) + if (intel_wait_complete(&wait) && + intel_wait_check_request(&wait, req)) break; set_current_state(state); @@ -1141,14 +1149,21 @@ long i915_wait_request(struct drm_i915_gem_request *req, static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; + u32 seqno = intel_engine_get_seqno(engine); + LIST_HEAD(retire); + spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { - if (!__i915_gem_request_completed(request)) - return; + if (!i915_seqno_passed(seqno, request->global_seqno)) + break; - i915_gem_request_retire(request); + list_move_tail(&request->link, &retire); } + spin_unlock_irq(&engine->timeline->lock); + + list_for_each_entry_safe(request, next, &retire, link) + i915_gem_request_retire(request); } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 467d3e13fce0..b81f6709905c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -135,6 +135,11 @@ struct drm_i915_gem_request { struct i915_priotree priotree; struct i915_dependency dep; + /** GEM sequence number associated with this request on the + * global execution timeline. It is zero when the request is not + * on the HW queue (i.e. not on the engine timeline list). + * Its value is guarded by the timeline spinlock. + */ u32 global_seqno; /** Position in the ring of the start of the request */ @@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, *pdst = src; } +/** + * i915_gem_request_global_seqno - report the current global seqno + * @request - the request + * + * A request is assigned a global seqno only when it is on the hardware + * execution queue. The global seqno can be used to maintain a list of + * requests on the same engine in retirement order, for example for + * constructing a priority queue for waiting. Prior to its execution, or + * if it is subsequently removed in the event of preemption, its global + * seqno is zero. As both insertion and removal from the execution queue + * may operate in IRQ context, it is not guarded by the usual struct_mutex + * BKL. Instead those relying on the global seqno must be prepared for its + * value to change between reads. Only when the request is complete can + * the global seqno be stable (due to the memory barriers on submitting + * the commands to the hardware to write the breadcrumb, if the HWS shows + * that it has passed the global seqno and the global seqno is unchanged + * after the read, it is indeed complete). + */ +static u32 +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) +{ + return READ_ONCE(request->global_seqno); +} + int i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, @@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -__i915_gem_request_started(const struct drm_i915_gem_request *req) +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->global_seqno - 1); + seqno - 1); } static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_started(req); + return __i915_gem_request_started(req, seqno); } static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req) +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->global_seqno); + GEM_BUG_ON(!seqno); + return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && + seqno == i915_gem_request_global_seqno(req); } static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_completed(req); + return __i915_gem_request_completed(req, seqno); } bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); + u32 seqno, int state, unsigned long timeout_us); static inline bool i915_spin_request(const struct drm_i915_gem_request *request, int state, unsigned long timeout_us) { - return (__i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); + u32 seqno; + + seqno = i915_gem_request_global_seqno(request); + return (__i915_gem_request_started(request, seqno) && + __i915_spin_request(request, seqno, state, timeout_us)); } /* We treat requests as fences. This is not be to confused with our diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4f4e703d1b14..d5bf4c0b2deb 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -545,6 +545,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; bool first, wakeup; + u32 seqno; /* Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us @@ -555,11 +556,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ assert_spin_locked(&request->lock); - if (!request->global_seqno) + + seqno = i915_gem_request_global_seqno(request); + if (!seqno) return; request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->global_seqno; + request->signaling.wait.seqno = seqno; i915_gem_request_get(request); spin_lock(&b->lock); @@ -583,8 +586,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->global_seqno, - to_signaler(parent)->global_seqno)) { + if (i915_seqno_passed(seqno, + to_signaler(parent)->signaling.wait.seqno)) { p = &parent->rb_right; first = false; } else { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b91c2c7ef5a6..04e7d5a9ee3a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -582,12 +582,49 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) +static inline void intel_wait_init(struct intel_wait *wait) +{ + wait->tsk = current; +} + +static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) { wait->tsk = current; wait->seqno = seqno; } +static inline bool intel_wait_has_seqno(const struct intel_wait *wait) +{ + return wait->seqno; +} + +static inline bool +intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) +{ + wait->seqno = seqno; + return intel_wait_has_seqno(wait); +} + +static inline bool +intel_wait_update_request(struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); +} + +static inline bool +intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) +{ + return wait->seqno == seqno; +} + +static inline bool +intel_wait_check_request(const struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); +} + static inline bool intel_wait_complete(const struct intel_wait *wait) { return RB_EMPTY_NODE(&wait->node); diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 6426acc9fdca..621be1ca53d8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -131,7 +131,7 @@ static int igt_random_insert_remove(void *arg) goto out_bitmap; for (n = 0; n < count; n++) - intel_wait_init(&waiters[n], seqno_bias + n); + intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); err = check_rbtree(engine, bitmap, waiters, count); if (err) @@ -197,7 +197,7 @@ static int igt_insert_complete(void *arg) goto out_waiters; for (n = 0; n < count; n++) { - intel_wait_init(&waiters[n], n + seqno_bias); + intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); intel_engine_add_wait(engine, &waiters[n]); __set_bit(n, bitmap); } @@ -318,7 +318,7 @@ static int igt_wakeup_thread(void *arg) while (wait_for_ready(w)) { GEM_BUG_ON(kthread_should_stop()); - intel_wait_init(&wait, w->seqno); + intel_wait_init_for_seqno(&wait, w->seqno); intel_engine_add_wait(w->engine, &wait); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); From cced5e2f098d9db26cf4119e309068b79cca1274 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:15 +0000 Subject: [PATCH 0338/1299] drm/i915: Take a reference whilst processing the signaler request The plan in the near-future is to allow requests to be removed from the signaler. We can no longer then rely on holding a reference to the request for the duration it is in the signaling tree, and instead must obtain a reference to the request for the current operation using RCU. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index d5bf4c0b2deb..a35d59d00bfb 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -496,7 +496,11 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - request = READ_ONCE(b->first_signal); + rcu_read_lock(); + request = rcu_dereference(b->first_signal); + if (request) + request = i915_gem_request_get_rcu(request); + rcu_read_unlock(); if (signal_complete(request)) { local_bh_disable(); dma_fence_signal(&request->fence); @@ -515,24 +519,28 @@ static int intel_breadcrumbs_signaler(void *arg) * the oldest before picking the next one. */ spin_lock_irq(&b->lock); - if (request == b->first_signal) { + if (request == rcu_access_pointer(b->first_signal)) { struct rb_node *rb = rb_next(&request->signaling.node); - b->first_signal = rb ? to_signaler(rb) : NULL; + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); } rb_erase(&request->signaling.node, &b->signals); spin_unlock_irq(&b->lock); i915_gem_request_put(request); } else { - if (kthread_should_stop()) + if (kthread_should_stop()) { + GEM_BUG_ON(request); break; + } schedule(); if (kthread_should_park()) kthread_parkme(); } + i915_gem_request_put(request); } while (1); __set_current_state(TASK_RUNNING); @@ -597,7 +605,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) rb_link_node(&request->signaling.node, parent, p); rb_insert_color(&request->signaling.node, &b->signals); if (first) - smp_store_mb(b->first_signal, request); + rcu_assign_pointer(b->first_signal, request); spin_unlock(&b->lock); @@ -670,7 +678,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) /* The engines should be idle and all requests accounted for! */ WARN_ON(READ_ONCE(b->first_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(READ_ONCE(b->first_signal)); + WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!RB_EMPTY_ROOT(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) @@ -691,7 +699,7 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) busy |= intel_engine_flag(engine); } - if (b->first_signal) { + if (rcu_access_pointer(b->first_signal)) { wake_up_process(b->signaler); busy |= intel_engine_flag(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 04e7d5a9ee3a..fbf34af7bc77 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -242,7 +242,7 @@ struct intel_engine_cs { struct rb_root signals; /* sorted by retirement */ struct intel_wait *first_wait; /* oldest waiter by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request *first_signal; + struct drm_i915_gem_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ From 9eb143bbec7dfdfce8aa7f6a3b0f40af4e95e7b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:16 +0000 Subject: [PATCH 0339/1299] drm/i915: Allow a request to be cancelled If we preempt a request and remove it from the execution queue, we need to undo its global seqno and restart any waiters. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 71 +++++++++++++++++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index a35d59d00bfb..dd39e4f7a560 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -356,22 +356,15 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b, return tsk->prio; } -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +static void __intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) - return; - - spin_lock_irq(&b->lock); + assert_spin_locked(&b->lock); if (RB_EMPTY_NODE(&wait->node)) - goto out_unlock; + goto out; if (b->first_wait == wait) { const int priority = wakeup_priority(b, wait->tsk); @@ -436,11 +429,27 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); -out_unlock: +out: GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); +} + +void intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + /* Quick check to see if this waiter was already decoupled from + * the tree by the bottom-half to avoid contention on the spinlock + * by the herd. + */ + if (RB_EMPTY_NODE(&wait->node)) + return; + + spin_lock_irq(&b->lock); + __intel_engine_remove_wait(engine, wait); spin_unlock_irq(&b->lock); } @@ -506,11 +515,13 @@ static int intel_breadcrumbs_signaler(void *arg) dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ + spin_lock_irq(&b->lock); + /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. */ - intel_engine_remove_wait(engine, - &request->signaling.wait); + __intel_engine_remove_wait(engine, + &request->signaling.wait); /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may @@ -518,7 +529,6 @@ static int intel_breadcrumbs_signaler(void *arg) * we just completed - so double check we are still * the oldest before picking the next one. */ - spin_lock_irq(&b->lock); if (request == rcu_access_pointer(b->first_signal)) { struct rb_node *rb = rb_next(&request->signaling.node); @@ -526,6 +536,8 @@ static int intel_breadcrumbs_signaler(void *arg) rb ? to_signaler(rb) : NULL); } rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + spin_unlock_irq(&b->lock); i915_gem_request_put(request); @@ -613,6 +625,35 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) wake_up_process(b->signaler); } +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + assert_spin_locked(&request->lock); + GEM_BUG_ON(!request->signaling.wait.seqno); + + spin_lock(&b->lock); + + if (!RB_EMPTY_NODE(&request->signaling.node)) { + if (request == rcu_access_pointer(b->first_signal)) { + struct rb_node *rb = + rb_next(&request->signaling.node); + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); + } + rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + i915_gem_request_put(request); + } + + __intel_engine_remove_wait(engine, &request->signaling.wait); + + spin_unlock(&b->lock); + + request->signaling.wait.seqno = 0; +} + int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fbf34af7bc77..0f29e07a9581 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -635,6 +635,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_enable_signaling(struct drm_i915_gem_request *request); +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { From d6a2289d9d6b3ea47514cf29fed56340fb8fec7e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:17 +0000 Subject: [PATCH 0340/1299] drm/i915: Remove the preempted request from the execution queue After the request is cancelled, we then need to remove it from the global execution timeline and return it to the context timeline, the inverse of submit_request(). v2: Move manipulation of struct intel_wait to helpers Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-12-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 55 ++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_request.h | 3 ++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 17 +++++++- 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d394cbe8de11..013abdfa65a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + + assert_spin_locked(&engine->timeline->lock); + + /* Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + engine->timeline->seqno--; + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_gem_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -1035,6 +1084,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, intel_wait_init(&wait); +restart: reset_wait_queue(&req->execute, &exec); if (!intel_wait_update_request(&wait, req)) { do { @@ -1133,6 +1183,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) break; + + if (!intel_wait_check_request(&wait, req)) { + intel_engine_remove_wait(req->engine, &wait); + goto restart; + } } intel_engine_remove_wait(req->engine, &wait); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index b81f6709905c..5f73d8c0a38a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index dd39e4f7a560..027c93e34c97 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -453,7 +453,12 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->lock); } -static bool signal_complete(struct drm_i915_gem_request *request) +static bool signal_valid(const struct drm_i915_gem_request *request) +{ + return intel_wait_check_request(&request->signaling.wait, request); +} + +static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; @@ -462,7 +467,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) * signalled that this wait is already completed. */ if (intel_wait_complete(&request->signaling.wait)) - return true; + return signal_valid(request); /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. @@ -542,13 +547,21 @@ static int intel_breadcrumbs_signaler(void *arg) i915_gem_request_put(request); } else { + DEFINE_WAIT(exec); + if (kthread_should_stop()) { GEM_BUG_ON(request); break; } + if (request) + add_wait_queue(&request->execute, &exec); + schedule(); + if (request) + remove_wait_queue(&request->execute, &exec); + if (kthread_should_park()) kthread_parkme(); } From 591c0fb85d1c351d28334fb249bf1faaafcf326b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:18 +0000 Subject: [PATCH 0341/1299] drm/i915: Exercise request cancellation using a mock selftest Add a mock selftest to preempt a request and check that we cancel it, requeue the request and then complete its execution. v2: Error leaks no more. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-13-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- .../gpu/drm/i915/selftests/i915_gem_request.c | 64 +++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_request.c | 19 ++++++ drivers/gpu/drm/i915/selftests/mock_request.h | 2 + 3 files changed, 85 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 9d056a86723d..42bdeac93324 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -26,6 +26,7 @@ #include "../i915_selftest.h" +#include "mock_context.h" #include "mock_gem_device.h" static int igt_add_request(void *arg) @@ -181,12 +182,75 @@ static int igt_fence_wait(void *arg) return err; } +static int igt_request_rewind(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request, *vip; + struct i915_gem_context *ctx[2]; + int err = -EINVAL; + + mutex_lock(&i915->drm.struct_mutex); + ctx[0] = mock_context(i915, "A"); + request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + if (!request) { + err = -ENOMEM; + goto err_context_0; + } + + i915_gem_request_get(request); + i915_add_request(request); + + ctx[1] = mock_context(i915, "B"); + vip = mock_request(i915->engine[RCS], ctx[1], 0); + if (!vip) { + err = -ENOMEM; + goto err_context_1; + } + + /* Simulate preemption by manual reordering */ + if (!mock_cancel_request(request)) { + pr_err("failed to cancel request (already executed)!\n"); + i915_add_request(vip); + goto err_context_1; + } + i915_gem_request_get(vip); + i915_add_request(vip); + request->engine->submit_request(request); + + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_wait_request(vip, 0, HZ) == -ETIME) { + pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", + vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + goto err; + } + + if (i915_gem_request_completed(request)) { + pr_err("low priority request already completed\n"); + goto err; + } + + err = 0; +err: + i915_gem_request_put(vip); + mutex_lock(&i915->drm.struct_mutex); +err_context_1: + mock_context_close(ctx[1]); + i915_gem_request_put(request); +err_context_0: + mock_context_close(ctx[0]); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_add_request), SUBTEST(igt_wait_request), SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), }; struct drm_i915_private *i915; int err; diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index e23242d1b88a..0e8d2e7f8c70 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -22,6 +22,7 @@ * */ +#include "mock_engine.h" #include "mock_request.h" struct drm_i915_gem_request * @@ -42,3 +43,21 @@ mock_request(struct intel_engine_cs *engine, return &mock->base; } + +bool mock_cancel_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + bool was_queued; + + spin_lock_irq(&engine->hw_lock); + was_queued = !list_empty(&mock->link); + list_del_init(&mock->link); + spin_unlock_irq(&engine->hw_lock); + + if (was_queued) + i915_gem_request_unsubmit(request); + + return was_queued; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h index cc76d4f4eb4e..4dea74c8e96d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.h +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -41,4 +41,6 @@ mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay); +bool mock_cancel_request(struct drm_i915_gem_request *request); + #endif /* !__MOCK_REQUEST__ */ From a49625f90662799426dd99daab97559ef620dce1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:19 +0000 Subject: [PATCH 0342/1299] drm/i915: Replace reset_wait_queue with default_wake_function If we change the wait_queue_t from using the autoremove_wake_function to the default_wake_function, we no longer have to restore the wait_queue_t entry on the wait_queue_head_t list after being woken up by it, as we are unusual in sleeping multiple times on the same wait_queue_t. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 30 ++++++------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 013abdfa65a6..43904f412e76 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -945,16 +945,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ } -static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) -{ - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - static unsigned long local_clock_us(unsigned int *cpu) { unsigned long t; @@ -1059,8 +1049,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; - DEFINE_WAIT(reset); - DEFINE_WAIT(exec); + DEFINE_WAIT_FUNC(reset, default_wake_function); + DEFINE_WAIT_FUNC(exec, default_wake_function); struct intel_wait wait; might_sleep(); @@ -1079,13 +1069,13 @@ long i915_wait_request(struct drm_i915_gem_request *req, trace_i915_gem_request_wait_begin(req, flags); + add_wait_queue(&req->execute, &exec); if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); intel_wait_init(&wait); restart: - reset_wait_queue(&req->execute, &exec); if (!intel_wait_update_request(&wait, req)) { do { set_current_state(state); @@ -1097,26 +1087,21 @@ long i915_wait_request(struct drm_i915_gem_request *req, i915_reset_in_progress(&req->i915->gpu_error)) { __set_current_state(TASK_RUNNING); i915_reset(req->i915); - reset_wait_queue(errq, &reset); continue; } if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; - break; + goto complete; } if (!timeout) { timeout = -ETIME; - break; + goto complete; } timeout = io_schedule_timeout(timeout); } while (1); - finish_wait(&req->execute, &exec); - - if (timeout < 0) - goto complete; GEM_BUG_ON(!intel_wait_has_seqno(&wait)); } @@ -1176,7 +1161,6 @@ long i915_wait_request(struct drm_i915_gem_request *req, i915_reset_in_progress(&req->i915->gpu_error)) { __set_current_state(TASK_RUNNING); i915_reset(req->i915); - reset_wait_queue(errq, &reset); continue; } @@ -1191,11 +1175,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, } intel_engine_remove_wait(req->engine, &wait); - __set_current_state(TASK_RUNNING); - complete: + __set_current_state(TASK_RUNNING); if (flags & I915_WAIT_LOCKED) remove_wait_queue(errq, &reset); + remove_wait_queue(&req->execute, &exec); trace_i915_gem_request_wait_end(req); return timeout; From e07051142cbcb991e4fedf86afd8f00425dbc89a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:20 +0000 Subject: [PATCH 0343/1299] drm/i915: Refactor direct GPU reset from request waiters Combine the common code for the pair of waiters into a single function. v2: Rename reset_request to wait_request_check_and_reset Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-15-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 43904f412e76..0582bb7a0b5b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1023,6 +1023,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } +static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) +{ + if (likely(!i915_reset_in_progress(&request->i915->gpu_error))) + return false; + + __set_current_state(TASK_RUNNING); + i915_reset(request->i915); + return true; +} + /** * i915_wait_request - wait until execution of request has finished * @req: the request to wait upon @@ -1084,11 +1094,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, break; if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&req->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(req->i915); + __i915_wait_request_check_and_reset(req)) continue; - } if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1158,11 +1165,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, * itself, or indirectly by recovering the GPU). */ if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&req->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(req->i915); + __i915_wait_request_check_and_reset(req)) continue; - } /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) From 24f417ec0c41a8e2b27ae7d2d64fe0ea13f322dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:21 +0000 Subject: [PATCH 0344/1299] drm/i915: Immediately process a reset before starting waiting As we handoff the GPU reset to the waiter, we need to check we don't miss a wakeup if it has already been sent prior to us starting the wait. v2: Tweak checking for reset to be clear to the need before sleeping after changing the task state. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-16-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 0582bb7a0b5b..9669747d1822 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1126,6 +1126,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, */ goto wakeup; + if (flags & I915_WAIT_LOCKED) + __i915_wait_request_check_and_reset(req); + for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; From 0f2f61d4a8d4611155e7d1633db271ead9057981 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:22 +0000 Subject: [PATCH 0345/1299] drm/i915: Remove one level of indention from wait-for-execute Now that the code is getting simpler, we can reduce the indentation when waiting for the global_seqno. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 39 ++++++++++++------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9669747d1822..fbfeb5f8d069 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1086,32 +1086,29 @@ long i915_wait_request(struct drm_i915_gem_request *req, intel_wait_init(&wait); restart: - if (!intel_wait_update_request(&wait, req)) { - do { - set_current_state(state); + do { + set_current_state(state); + if (intel_wait_update_request(&wait, req)) + break; - if (intel_wait_update_request(&wait, req)) - break; + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(req)) + continue; - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) - continue; + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + goto complete; + } - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - goto complete; - } + if (!timeout) { + timeout = -ETIME; + goto complete; + } - if (!timeout) { - timeout = -ETIME; - goto complete; - } + timeout = io_schedule_timeout(timeout); + } while (1); - timeout = io_schedule_timeout(timeout); - } while (1); - - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - } + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ From 8d0e9bcb2b0cbc2d5c3668e1a445571b4b73317f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 12:20:37 +0000 Subject: [PATCH 0346/1299] drm/i915: Suppress fbc suggestion to increase stolen if disabled If the reserved region of memory has not been setup (most probably because it has been limited by hardware or virtualisation), don't tell the user to try and increase the amount of memory reserved for graphics. Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/20170223122037.16174-1-chris@chris-wilson.co.uk eviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_fbc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 22c56ae086f2..17d418b23d77 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -627,7 +627,8 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc) kfree(compressed_llb); i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); err_llb: - pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + if (drm_mm_initialized(&dev_priv->mm.stolen)) + pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } From bca5609fcc401b0055a3c01ecc33c5f6fb67af7b Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Feb 2017 10:58:20 +0800 Subject: [PATCH 0347/1299] drm/i915/gvt: Add more edid definition support We'll need to apply different resolution for vgpu types, so this adds more EDID types definition. v2: fix typo for actual 1920x1200 resolution Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 112 +++++++++++++++++++---------- drivers/gpu/drm/i915/gvt/display.h | 6 ++ 2 files changed, 80 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 6d8fde880c39..d346e43656f7 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -83,44 +83,80 @@ static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe) return 0; } +static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = { + { +/* EDID with 1024x768 as its resolution */ + /*Header*/ + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + /* Vendor & Product Identification */ + 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, + /* Version & Revision */ + 0x01, 0x04, + /* Basic Display Parameters & Features */ + 0xa5, 0x34, 0x20, 0x78, 0x23, + /* Color Characteristics */ + 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, + /* Established Timings: maximum resolution is 1024x768 */ + 0x21, 0x08, 0x00, + /* Standard Timings. All invalid */ + 0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00, + 0x00, 0x40, 0x00, 0x00, 0x00, 0x01, + /* 18 Byte Data Blocks 1: invalid */ + 0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0, + 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, + /* 18 Byte Data Blocks 2: invalid */ + 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + /* 18 Byte Data Blocks 3: invalid */ + 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, + 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, + /* 18 Byte Data Blocks 4: invalid */ + 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, + 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, + /* Extension Block Count */ + 0x00, + /* Checksum */ + 0xef, + }, + { /* EDID with 1920x1200 as its resolution */ -static unsigned char virtual_dp_monitor_edid[] = { - /*Header*/ - 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, - /* Vendor & Product Identification */ - 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, - /* Version & Revision */ - 0x01, 0x04, - /* Basic Display Parameters & Features */ - 0xa5, 0x34, 0x20, 0x78, 0x23, - /* Color Characteristics */ - 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, - /* Established Timings: maximum resolution is 1024x768 */ - 0x21, 0x08, 0x00, - /* - * Standard Timings. - * below new resolutions can be supported: - * 1920x1080, 1280x720, 1280x960, 1280x1024, - * 1440x900, 1600x1200, 1680x1050 - */ - 0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00, - 0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01, - /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */ - 0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0, - 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, - /* 18 Byte Data Blocks 2: invalid */ - 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - /* 18 Byte Data Blocks 3: invalid */ - 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, - 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, - /* 18 Byte Data Blocks 4: invalid */ - 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, - 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, - /* Extension Block Count */ - 0x00, - /* Checksum */ - 0x45, + /*Header*/ + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, + /* Vendor & Product Identification */ + 0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17, + /* Version & Revision */ + 0x01, 0x04, + /* Basic Display Parameters & Features */ + 0xa5, 0x34, 0x20, 0x78, 0x23, + /* Color Characteristics */ + 0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54, + /* Established Timings: maximum resolution is 1024x768 */ + 0x21, 0x08, 0x00, + /* + * Standard Timings. + * below new resolutions can be supported: + * 1920x1080, 1280x720, 1280x960, 1280x1024, + * 1440x900, 1600x1200, 1680x1050 + */ + 0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00, + 0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01, + /* 18 Byte Data Blocks 1: max resolution is 1920x1200 */ + 0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0, + 0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a, + /* 18 Byte Data Blocks 2: invalid */ + 0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + /* 18 Byte Data Blocks 3: invalid */ + 0x00, 0x00, 0x00, 0xfc, 0x00, 0x48, + 0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20, + /* 18 Byte Data Blocks 4: invalid */ + 0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30, + 0x44, 0x58, 0x51, 0x0a, 0x20, 0x20, + /* Extension Block Count */ + 0x00, + /* Checksum */ + 0x45, + }, }; #define DPCD_HEADER_SIZE 0xb @@ -189,7 +225,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, return -ENOMEM; } - memcpy(port->edid->edid_block, virtual_dp_monitor_edid, + memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200], EDID_SIZE); port->edid->data_valid = true; diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index 8b234ea961f6..84f160bb7bc2 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -154,6 +154,12 @@ struct intel_vgpu_port { int type; }; +enum intel_vgpu_edid { + GVT_EDID_1024_768, + GVT_EDID_1920_1200, + GVT_EDID_NUM, +}; + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); From d1a513be1f0a25f094e1577d059b9aebaa279bb2 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 24 Feb 2017 10:58:21 +0800 Subject: [PATCH 0348/1299] drm/i915/gvt: add resolution definition for vGPU type This assigns resolution definition for each vGPU type. For smaller resource type we should limit max resolution, so e.g limit to 1024x768 for 64M type, others are still default to 1920x1200. v2: Fix for actual 1920x1200 resolution Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 15 ++++++++++----- drivers/gpu/drm/i915/gvt/display.h | 14 +++++++++++++- drivers/gpu/drm/i915/gvt/gvt.h | 2 ++ drivers/gpu/drm/i915/gvt/kvmgt.c | 8 ++++---- drivers/gpu/drm/i915/gvt/vgpu.c | 18 +++++++++++------- 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index d346e43656f7..43e02e038375 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -211,10 +211,13 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num) } static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, - int type) + int type, unsigned int resolution) { struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num); + if (WARN_ON(resolution >= GVT_EDID_NUM)) + return -EINVAL; + port->edid = kzalloc(sizeof(*(port->edid)), GFP_KERNEL); if (!port->edid) return -ENOMEM; @@ -225,7 +228,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num, return -ENOMEM; } - memcpy(port->edid->edid_block, virtual_dp_monitor_edid[GVT_EDID_1920_1200], + memcpy(port->edid->edid_block, virtual_dp_monitor_edid[resolution], EDID_SIZE); port->edid->data_valid = true; @@ -358,16 +361,18 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu) * Zero on success, negative error code if failed. * */ -int intel_vgpu_init_display(struct intel_vgpu *vgpu) +int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; intel_vgpu_init_i2c_edid(vgpu); if (IS_SKYLAKE(dev_priv)) - return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D); + return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D, + resolution); else - return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B); + return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B, + resolution); } /** diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h index 84f160bb7bc2..d73de22102e2 100644 --- a/drivers/gpu/drm/i915/gvt/display.h +++ b/drivers/gpu/drm/i915/gvt/display.h @@ -160,10 +160,22 @@ enum intel_vgpu_edid { GVT_EDID_NUM, }; +static inline char *vgpu_edid_str(enum intel_vgpu_edid id) +{ + switch (id) { + case GVT_EDID_1024_768: + return "1024x768"; + case GVT_EDID_1920_1200: + return "1920x1200"; + default: + return ""; + } +} + void intel_gvt_emulate_vblank(struct intel_gvt *gvt); void intel_gvt_check_vblank_emulation(struct intel_gvt *gvt); -int intel_vgpu_init_display(struct intel_vgpu *vgpu); +int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution); void intel_vgpu_reset_display(struct intel_vgpu *vgpu); void intel_vgpu_clean_display(struct intel_vgpu *vgpu); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index faff044c6434..23791920ced1 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -216,6 +216,7 @@ struct intel_vgpu_type { unsigned int low_gm_size; unsigned int high_gm_size; unsigned int fence; + enum intel_vgpu_edid resolution; }; struct intel_gvt { @@ -318,6 +319,7 @@ struct intel_vgpu_creation_params { __u64 low_gm_sz; /* in MB */ __u64 high_gm_sz; /* in MB */ __u64 fence_sz; + __u64 resolution; __s32 primary; __u64 vgpu_id; }; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 10c3a4b95a92..182914c22ac5 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -295,10 +295,10 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev, return 0; return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n" - "fence: %d\n", - BYTES_TO_MB(type->low_gm_size), - BYTES_TO_MB(type->high_gm_size), - type->fence); + "fence: %d\nresolution: %s\n", + BYTES_TO_MB(type->low_gm_size), + BYTES_TO_MB(type->high_gm_size), + type->fence, vgpu_edid_str(type->resolution)); } static MDEV_TYPE_ATTR_RO(available_instance); diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index c27c13c3cc48..41cfa5ccae84 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -68,13 +68,14 @@ static struct { unsigned int low_mm; unsigned int high_mm; unsigned int fence; + enum intel_vgpu_edid edid; char *name; } vgpu_types[] = { /* Fixed vGPU type table */ - { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, "8" }, - { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, "4" }, - { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, "2" }, - { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, "1" }, + { MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" }, + { MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" }, + { MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" }, + { MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" }, }; /** @@ -119,6 +120,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) gvt->types[i].low_gm_size = vgpu_types[i].low_mm; gvt->types[i].high_gm_size = vgpu_types[i].high_mm; gvt->types[i].fence = vgpu_types[i].fence; + gvt->types[i].resolution = vgpu_types[i].edid; gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm, high_avail / vgpu_types[i].high_mm); @@ -129,11 +131,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) sprintf(gvt->types[i].name, "GVTg_V5_%s", vgpu_types[i].name); - gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u\n", + gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n", i, gvt->types[i].name, gvt->types[i].avail_instance, gvt->types[i].low_gm_size, - gvt->types[i].high_gm_size, gvt->types[i].fence); + gvt->types[i].high_gm_size, gvt->types[i].fence, + vgpu_edid_str(gvt->types[i].resolution)); } gvt->num_types = i; @@ -258,7 +261,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_detach_hypervisor_vgpu; - ret = intel_vgpu_init_display(vgpu); + ret = intel_vgpu_init_display(vgpu, param->resolution); if (ret) goto out_clean_gtt; @@ -322,6 +325,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt, param.low_gm_sz = type->low_gm_size; param.high_gm_sz = type->high_gm_size; param.fence_sz = type->fence; + param.resolution = type->resolution; /* XXX current param based on MB */ param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz); From 8bcd7c188bc479bea866d286a7dc0af9734c3c64 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Fri, 24 Feb 2017 17:07:38 +0800 Subject: [PATCH 0349/1299] drm/i915/gvt: fix pcode mailbox write emulation of BDW Add pcode mailbox write emulation in gvt for BDW, reuse emulation code of Skylake. V2: refine comments, remove duplication defination of 0x138124, add IS_SKYLAKE() check for Skylake only pcode commands. Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 33 ++++++++++++++++------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index f89b183488e9..ebbe74072dc3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1304,21 +1304,24 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset, u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA); switch (cmd) { - case 0x6: - /** - * "Read memory latency" command on gen9. - * Below memory latency values are read - * from skylake platform. - */ - if (!*data0) - *data0 = 0x1e1a1100; - else - *data0 = 0x61514b3d; + case GEN9_PCODE_READ_MEM_LATENCY: + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) { + /** + * "Read memory latency" command on gen9. + * Below memory latency values are read + * from skylake platform. + */ + if (!*data0) + *data0 = 0x1e1a1100; + else + *data0 = 0x61514b3d; + } break; case SKL_PCODE_CDCLK_CONTROL: - *data0 = SKL_CDCLK_READY_FOR_CHANGE; + if (IS_SKYLAKE(vgpu->gvt->dev_priv)) + *data0 = SKL_CDCLK_READY_FOR_CHANGE; break; - case 0x5: + case GEN6_PCODE_READ_RC6VIDS: *data0 |= 0x1; break; } @@ -2202,7 +2205,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_SKL); + MMIO_D(GEN6_PCODE_MAILBOX, D_PRE_BDW); MMIO_D(GEN6_PCODE_DATA, D_ALL); MMIO_D(0x13812c, D_ALL); MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL); @@ -2281,7 +2284,6 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x1a054, D_ALL); MMIO_D(0x44070, D_ALL); - MMIO_D(0x215c, D_HSW_PLUS); MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); @@ -2453,6 +2455,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS); MMIO_D(0x1c054, D_BDW_PLUS); + MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write); + MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS); MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS); @@ -2544,7 +2548,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(HSW_PWR_WELL_BIOS, D_SKL); MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write); - MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL, NULL, mailbox_write); MMIO_D(0xa210, D_SKL_PLUS); MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS); MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS); From 41bfab35b3d0e0a85ed0c12d7bafd7484e96ca78 Mon Sep 17 00:00:00 2001 From: Pei Zhang Date: Fri, 24 Feb 2017 16:03:28 +0800 Subject: [PATCH 0350/1299] drm/i915/gvt: add some new MMIOs to cmd_access white list Guest is now acces some MMIOs (0x215c, RING_INSTPM) through command which is not originally in gvt's white list. This cause huge error log printed in gvt. This patch addes these MMIOs to the white list. V2. change the commit message content. V3. remove duplicate defination of 0x20c0. V4. refine code style. Signed-off-by: Pei Zhang Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index ebbe74072dc3..57b4d538f370 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1589,7 +1589,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) #undef RING_REG MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, @@ -2284,7 +2285,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(0x1a054, D_ALL); MMIO_D(0x44070, D_ALL); - MMIO_D(0x215c, D_HSW_PLUS); + MMIO_DFH(0x215c, D_HSW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); From 754a76591b12c88f57ad8b4ca533a5c9566a1922 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Feb 2017 11:43:06 +0000 Subject: [PATCH 0351/1299] drm/i915/fbdev: Stop repeating tile configuration on stagnation If we cease making progress in finding matching outputs for a tiled configuration, stop looping over the remaining unconfigured outputs. v2: Use conn_seq (instead of pass) to only apply tile configuration on first pass. Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. (v2)") Signed-off-by: Chris Wilson Cc: Tomasz Lis Cc: Dave Airlie Cc: Daniel Vetter Cc: Jani Nikula Cc: Sean Paul Cc: # v3.19+ Reviewed-by: Tomasz Lis Link: http://patchwork.freedesktop.org/patch/msgid/20170224114306.4400-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index c404ab524ccc..82993a87654c 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -350,14 +350,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, mask; + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; bool *save_enabled; bool fallback = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; - int pass = 0; save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); if (!save_enabled) @@ -367,6 +366,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, mask = GENMASK(count - 1, 0); conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -380,7 +380,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, if (conn_configured & BIT(i)) continue; - if (pass == 0 && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -491,10 +491,8 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask) { - pass++; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the From 7d1c42e679f920529df5cc11c6f827f17d4f8022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Feb 2017 19:35:05 +0200 Subject: [PATCH 0352/1299] drm/i915: Refactor code to select the DDI buf translation table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the code to select the correct translation table into DP, eDP and FDI specific helpers. This reduces the clutter in intel_prepare_dp_ddi_buffers(), and we'll have other uses for some of these new helper functions later on. v2: Fix typo in commit message (David) Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223173507.17600-1-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall --- drivers/gpu/drm/i915/intel_ddi.c | 126 ++++++++++++++++++------------- 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index cd6fedd229a0..5e4b0172810d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -468,6 +468,59 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por return hdmi_level; } +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv)) { + return kbl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + return bdw_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } + + *n_entries = 0; + return NULL; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. This function programs the correct values for @@ -477,45 +530,29 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_dp_entries, n_edp_entries, size; + int i, n_entries; enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_fdi; - const struct ddi_buf_trans *ddi_translations_dp; - const struct ddi_buf_trans *ddi_translations_edp; const struct ddi_buf_trans *ddi_translations; if (IS_GEN9_LP(dev_priv)) return; - if (IS_KABYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - kbl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_SKYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - skl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_fdi = hsw_ddi_translations_fdi; - ddi_translations_dp = hsw_ddi_translations_dp; - ddi_translations_edp = hsw_ddi_translations_dp; - n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_edp = bdw_ddi_translations_dp; - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + switch (encoder->type) { + case INTEL_OUTPUT_EDP: + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, + &n_entries); + break; + case INTEL_OUTPUT_DP: + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, + &n_entries); + break; + case INTEL_OUTPUT_ANALOG: + ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, + &n_entries); + break; + default: + MISSING_CASE(encoder->type); + return; } if (IS_GEN9_BC(dev_priv)) { @@ -525,28 +562,11 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && port != PORT_A && port != PORT_E && - n_edp_entries > 9)) - n_edp_entries = 9; + n_entries > 9)) + n_entries = 9; } - switch (encoder->type) { - case INTEL_OUTPUT_EDP: - ddi_translations = ddi_translations_edp; - size = n_edp_entries; - break; - case INTEL_OUTPUT_DP: - ddi_translations = ddi_translations_dp; - size = n_dp_entries; - break; - case INTEL_OUTPUT_ANALOG: - ddi_translations = ddi_translations_fdi; - size = n_dp_entries; - break; - default: - BUG(); - } - - for (i = 0; i < size; i++) { + for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations[i].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, i), From 97eeb872764ce524d87d0e0e9e25b1c9fae10b3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Feb 2017 19:35:06 +0200 Subject: [PATCH 0353/1299] drm/i915: Refactor translate_signal_level() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the big switch statement in translate_signal_level() into a neat table. The table also serves as documentation for the translation tables. We'll also have other uses for this table later on. v2: Remove superfluous space (David) Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223173507.17600-2-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall --- drivers/gpu/drm/i915/intel_ddi.c | 60 +++++++++++--------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5e4b0172810d..7694f4fcce4e 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -34,6 +34,19 @@ struct ddi_buf_trans { u8 i_boost; /* SKL: I_boost; valid: 0x0, 0x1, 0x3, 0x7 */ }; +static const u8 index_to_dp_signal_levels[] = { + [0] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [1] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [2] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [3] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3, + [4] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [5] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [6] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [7] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [8] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [9] = DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0, +}; + /* HDMI/DVI modes ignore everything but the last 2 items. So we share * them for both DP and FDI transports, allowing those ports to * automatically adapt to HDMI connections as well @@ -1604,48 +1617,17 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, static uint32_t translate_signal_level(int signal_levels) { - uint32_t level; + int i; - switch (signal_levels) { - default: - DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level: 0x%x\n", - signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 0; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 1; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 2; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3: - level = 3; - break; - - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 4; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 5; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 6; - break; - - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 7; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 8; - break; - - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 9; - break; + for (i = 0; i < ARRAY_SIZE(index_to_dp_signal_levels); i++) { + if (index_to_dp_signal_levels[i] == signal_levels) + return i; } - return level; + WARN(1, "Unsupported voltage swing/pre-emphasis level: 0x%x\n", + signal_levels); + + return 0; } uint32_t ddi_signal_levels(struct intel_dp *intel_dp) From ffe5111e28e52212040db1617a75035d6d6f9447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 23 Feb 2017 19:49:01 +0200 Subject: [PATCH 0354/1299] drm/i915: Introduce intel_ddi_dp_voltage_max() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than sprinkling ideas of how big the DDI buf translation tables are somewhere in intel_dp.c, let's concentrate it all in intel_ddi.c where the actual tables are defined. To that end we introduce intel_ddi_dp_voltage_max() which will actually look at the proper translation table to determine what is the maximum voltage swing level supported. v2: Mask out the preemphasis bits from the return value of intel_ddi_dp_voltage_max() Signed-off-by: Ville Syrjälä Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/20170223174901.26749-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/intel_dp.c | 5 ++--- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 7694f4fcce4e..e2947b7f5079 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1615,6 +1615,25 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, ddi_translations[level].deemphasis); } +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int n_entries; + + if (encoder->type == INTEL_OUTPUT_EDP) + intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); + else + intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); + + if (WARN_ON(n_entries < 1)) + n_entries = 1; + if (WARN_ON(n_entries > ARRAY_SIZE(index_to_dp_signal_levels))) + n_entries = ARRAY_SIZE(index_to_dp_signal_levels); + + return index_to_dp_signal_levels[n_entries - 1] & + DP_TRAIN_VOLTAGE_SWING_MASK; +} + static uint32_t translate_signal_level(int signal_levels) { int i; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 024798a9c016..e72c92a08c81 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3098,9 +3098,8 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) if (IS_GEN9_LP(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (INTEL_GEN(dev_priv) >= 9) { - if (dev_priv->vbt.edp.low_vswing && port == PORT_A) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (IS_GEN7(dev_priv) && port == PORT_A) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9b5fcc1423eb..c3c8ed726717 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1233,6 +1233,8 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); + unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv, unsigned int height, uint32_t pixel_format, From fe9ae7a3bfdb1d5933210f53e9efa7655768d238 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 14:50:31 +0000 Subject: [PATCH 0355/1299] drm/i915/execlists: Detect an out-of-order context switch We require that the request is completed before the context is switched away. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223145031.26210-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 39329d40da46..1c6c71673bfa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -591,6 +591,7 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_OUT); From e393d0d6c12527b2bd7faf0b5ca7edde2543c451 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 22 Feb 2017 17:10:52 +0200 Subject: [PATCH 0356/1299] drm/i915/lspcon: Switch back to PCON mode after output replug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least a ParadTech PS175 LSPCON chip/firmware uses long instead of short pulses to signal output unplug/plug events. This is contrary to how branch devices normally work which use short HPD signaling. This chip will also switch to LS mode after an unplug event, which could be the consequence of the long HPD signaling semantics and an effort to save power automatically. Because of this we'll fail to do AUX and detect the output after a replug event. To fix this make sure we are in PCON mode during connector detection. v2: - Switch the mode in the proper spot. Cc: raptorteak@gmail.com Cc: Shashank Sharma Cc: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98912 Reported-and-tested-by: raptorteak@gmail.com Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1487776252-6288-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e72c92a08c81..6c661c64cff6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4325,9 +4325,13 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { + struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); uint8_t *dpcd = intel_dp->dpcd; uint8_t type; + if (lspcon->active) + lspcon_resume(lspcon); + if (!intel_dp_get_dpcd(intel_dp)) return connector_status_disconnected; From ef0f411f51475f4eebf9fc1b19a85be698af19ff Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 15 Feb 2017 01:34:46 -0800 Subject: [PATCH 0357/1299] drm/i915: Drop support for I915_EXEC_CONSTANTS_* execbuf parameters. This patch makes the I915_PARAM_HAS_EXEC_CONSTANTS getparam return 0 (indicating the optional feature is not supported), and makes execbuf always return -EINVAL if the flags are used. Apparently, no userspace ever shipped which used this optional feature: I checked the git history of Mesa, xf86-video-intel, libva, and Beignet, and there were zero commits showing a use of these flags. Kernel commit 72bfa19c8deb4 apparently introduced the feature prematurely. According to Chris, the intention was to use this in cairo-drm, but "the use was broken for gen6", so I don't think it ever happened. 'relative_constants_mode' has always been tracked per-device, but this has actually been wrong ever since hardware contexts were introduced, as the INSTPM register is saved (and automatically restored) as part of the render ring context. The software per-device value could therefore get out of sync with the hardware per-context value. This meant that using them is actually unsafe: a client which tried to use them could damage the state of other clients, causing the GPU to interpret their BO offsets as absolute pointers, leading to bogus memory reads. These flags were also never ported to execlist mode, making them no-ops on Gen9+ (which requires execlists), and Gen8 in the default mode. On Gen8+, userspace can write these registers directly, achieving the same effect. On Gen6-7.5, it likely makes sense to extend the command parser to support them. I don't think anyone wants this on Gen4-5. Based on a patch by Dave Gordon. v3: Return -ENODEV for the getparam, as this is what we do for other obsolete features. Suggested by Chris Wilson. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92448 Signed-off-by: Kenneth Graunke Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170215093446.21291-1-kenneth@whitecape.org Acked-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/i915_gem.c | 2 - drivers/gpu/drm/i915/i915_gem_execbuffer.c | 50 +--------------------- 4 files changed, 3 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 747cd5dfb910..51241dec3f83 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -249,6 +249,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_IRQ_ACTIVE: case I915_PARAM_ALLOW_BATCHBUFFER: case I915_PARAM_LAST_DISPATCH: + case I915_PARAM_HAS_EXEC_CONSTANTS: /* Reject all old ums/dri params. */ return -ENODEV; case I915_PARAM_CHIPSET_ID: @@ -275,9 +276,6 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_BSD2: value = !!dev_priv->engine[VCS2]; break; - case I915_PARAM_HAS_EXEC_CONSTANTS: - value = INTEL_GEN(dev_priv) >= 4; - break; case I915_PARAM_HAS_LLC: value = HAS_LLC(dev_priv); break; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e5b34eaf41ca..0a6fdb024b86 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2098,8 +2098,6 @@ struct drm_i915_private { const struct intel_device_info info; - int relative_constants_mode; - void __iomem *regs; struct intel_uncore uncore; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a8167003c10b..01dbba3813c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4632,8 +4632,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 35d2cb979452..aa75ea2d0578 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1412,10 +1412,7 @@ execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; - int instp_mode; - u32 instp_mask, *cs; int ret; ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); @@ -1426,54 +1423,11 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && params->engine->id != RCS) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev_priv)->gen < 4) { - DRM_DEBUG("no rel constants on pre-gen4\n"); - return -EINVAL; - } - - if (INTEL_INFO(dev_priv)->gen > 5 && - instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev_priv)->gen >= 6) - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + if (args->flags & I915_EXEC_CONSTANTS_MASK) { + DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n"); return -EINVAL; } - if (params->engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - cs = intel_ring_begin(params->request, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_NOOP; - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(INSTPM); - *cs++ = instp_mask << 16 | instp_mode; - intel_ring_advance(params->request, cs); - - dev_priv->relative_constants_mode = instp_mode; - } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { ret = i915_reset_gen7_sol_offsets(params->request); if (ret) From 91e32157de148ce37422d8110f6f8a84d05da1ec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Feb 2017 19:33:15 +0000 Subject: [PATCH 0358/1299] drm/i915: Timeout lowlevel_hole GTT selftest early Check for a timeout in the lowlevel_hole GTT before we allocate state for that pass, as our cleanup phase stops on the iteration before the timeout. References: https://bugs.freedesktop.org/show_bug.cgi?id=99947 Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170224193315.21072-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e23753181720..6bac267914df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -237,18 +237,19 @@ static int lowlevel_hole(struct drm_i915_private *i915, GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + if (igt_timeout(end_time, + "%s timed out before %d/%d\n", + __func__, n, count)) { + hole_end = hole_start; /* quit */ + break; + } + if (vm->allocate_va_range && vm->allocate_va_range(vm, addr, BIT_ULL(size))) break; vm->insert_entries(vm, obj->mm.pages, addr, I915_CACHE_NONE, 0); - if (igt_timeout(end_time, - "%s timed out after %d/%d\n", - __func__, n, count)) { - hole_end = hole_start; /* quit */ - break; - } } count = n; From b3bb82885feb74feadc101a700e64ed7ede84b04 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:19 +0000 Subject: [PATCH 0359/1299] drm/i915: Assert all sg are initialised in fake_dma_object for selftests Double check that we allocated the right amount of scatterlist elements for our obj->size. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6bac267914df..0f3fa34377c6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -62,12 +62,14 @@ fake_get_pages(struct drm_i915_gem_object *obj) for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); + GEM_BUG_ON(!len); sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; rem -= len; } + GEM_BUG_ON(rem); obj->mm.madv = I915_MADV_DONTNEED; return pages; From 357480ce2fe4dc11ff0b77706d77e2319c96fe31 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:20 +0000 Subject: [PATCH 0360/1299] drm/i915: Assert we do not overflow 4lvl page directories Before looking up the page directory entry, check we are still within bounds. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 24fa262c2b61..e6e0dfa761dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -830,6 +830,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, gen8_pte_t *vaddr; bool ret; + GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); pd = pdp->page_directory[pdpe]; vaddr = kmap_atomic_px(pd->page_table[pde]); do { @@ -854,8 +855,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, break; } - GEM_BUG_ON(!i915_vm_is_48bit(&ppgtt->base) && - pdpe >= GEN8_LEGACY_PDPES); + GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); pd = pdp->page_directory[pdpe]; pde = 0; } @@ -904,7 +904,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter, start, cache_level)) - ; + GEM_BUG_ON(pml4e >= GEN8_PML4ES_PER_PML4); } static void gen8_free_page_tables(struct i915_address_space *vm, From aa149431279166025bc457007e5a1fdcb4d2db9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:21 +0000 Subject: [PATCH 0361/1299] drm/i915: Sanity check the vma->node prior to binding into the GTT We rely on the VMA being allocated inside the drm_mm and for its allotted node being large enough to accommodate all the vma->pages. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index c1abfe7b48ea..6e9eade304b8 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -241,7 +241,15 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 vma_flags; int ret; - if (WARN_ON(flags == 0)) + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->size > vma->node.size); + + if (GEM_WARN_ON(range_overflows(vma->node.start, + vma->node.size, + vma->vm->total))) + return -ENODEV; + + if (GEM_WARN_ON(!flags)) return -EINVAL; bind_flags = 0; @@ -258,11 +266,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; - if (GEM_WARN_ON(range_overflows(vma->node.start, - vma->node.size, - vma->vm->total))) - return -ENODEV; - trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) From 9e89f9ee3b16cca56bed5fa45e63f422d3ac2c3a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:22 +0000 Subject: [PATCH 0362/1299] drm/i915: Advance start address on crossing PML (48b ppgtt) boundary When advancing onto the next 4th level page table entry, we need to reset our indices to 0. Currently we restart from the original address which means we start with an offset into the next PML table. Fixes: 894ccebee2b0 ("drm/i915: Micro-optimise gen8_ppgtt_insert_entries()") Reported-by: Matthew Auld Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99948 Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Tested-by: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 63 +++++++++++++++++++---------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e6e0dfa761dd..6fdbd5ae4fcb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -815,26 +815,41 @@ struct sgt_dma { dma_addr_t dma, max; }; +struct gen8_insert_pte { + u16 pml4e; + u16 pdpe; + u16 pde; + u16 pte; +}; + +static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) +{ + return (struct gen8_insert_pte) { + gen8_pml4e_index(start), + gen8_pdpe_index(start), + gen8_pde_index(start), + gen8_pte_index(start), + }; +} + static __always_inline bool gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, struct sgt_dma *iter, - u64 start, + struct gen8_insert_pte *idx, enum i915_cache_level cache_level) { - unsigned int pdpe = gen8_pdpe_index(start); - unsigned int pde = gen8_pde_index(start); - unsigned int pte = gen8_pte_index(start); struct i915_page_directory *pd; const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); gen8_pte_t *vaddr; bool ret; - GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); - pd = pdp->page_directory[pdpe]; - vaddr = kmap_atomic_px(pd->page_table[pde]); + GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + pd = pdp->page_directory[idx->pdpe]; + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); do { - vaddr[pte] = pte_encode | iter->dma; + vaddr[idx->pte] = pte_encode | iter->dma; + iter->dma += PAGE_SIZE; if (iter->dma >= iter->max) { iter->sg = __sg_next(iter->sg); @@ -847,22 +862,25 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, iter->max = iter->dma + iter->sg->length; } - if (++pte == GEN8_PTES) { - if (++pde == I915_PDES) { + if (++idx->pte == GEN8_PTES) { + idx->pte = 0; + + if (++idx->pde == I915_PDES) { + idx->pde = 0; + /* Limited by sg length for 3lvl */ - if (++pdpe == GEN8_PML4ES_PER_PML4) { + if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { + idx->pdpe = 0; ret = true; break; } - GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); - pd = pdp->page_directory[pdpe]; - pde = 0; + GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + pd = pdp->page_directory[idx->pdpe]; } kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(pd->page_table[pde]); - pte = 0; + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); } } while (1); kunmap_atomic(vaddr); @@ -882,9 +900,10 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, .dma = sg_dma_address(iter.sg), .max = iter.dma + iter.sg->length, }; + struct gen8_insert_pte idx = gen8_insert_pte(start); - gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, - start, cache_level); + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, + cache_level); } static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, @@ -900,11 +919,11 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, .max = iter.dma + iter.sg->length, }; struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; - unsigned int pml4e = gen8_pml4e_index(start); + struct gen8_insert_pte idx = gen8_insert_pte(start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter, - start, cache_level)) - GEM_BUG_ON(pml4e >= GEN8_PML4ES_PER_PML4); + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, + &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); } static void gen8_free_page_tables(struct i915_address_space *vm, From 9fc5cf6e14fc1db40ea8a1c10061bb9586e78585 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:24 +0100 Subject: [PATCH 0363/1299] platform/x86: fujitsu-laptop: clearly denote backlight-related symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify naming for all backlight-related functions, structures, variables and constants by using a consistent "_bl"/"_BL" suffix/infix. Adjust indentation to make checkpatch happy. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 214 +++++++++++++------------- 1 file changed, 107 insertions(+), 107 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 2b218b1d13e5..e1737b9d9d95 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -78,10 +78,10 @@ #define FUJITSU_LCD_N_LEVELS 8 -#define ACPI_FUJITSU_CLASS "fujitsu" -#define ACPI_FUJITSU_HID "FUJ02B1" -#define ACPI_FUJITSU_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" -#define ACPI_FUJITSU_DEVICE_NAME "Fujitsu FUJ02B1" +#define ACPI_FUJITSU_CLASS "fujitsu" +#define ACPI_FUJITSU_BL_HID "FUJ02B1" +#define ACPI_FUJITSU_BL_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" +#define ACPI_FUJITSU_BL_DEVICE_NAME "Fujitsu FUJ02B1" #define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3" #define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" #define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3" @@ -136,7 +136,7 @@ #endif /* Device controlling the backlight and associated keys */ -struct fujitsu_t { +struct fujitsu_bl { acpi_handle acpi_handle; struct acpi_device *dev; struct input_dev *input; @@ -150,7 +150,7 @@ struct fujitsu_t { unsigned int brightness_level; }; -static struct fujitsu_t *fujitsu; +static struct fujitsu_bl *fujitsu_bl; static int use_alt_lcd_levels = -1; static int disable_brightness_adjust = -1; @@ -222,7 +222,7 @@ static struct led_classdev eco_led = { static u32 dbg_level = 0x03; #endif -static void acpi_fujitsu_notify(struct acpi_device *device, u32 event); +static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event); /* Fujitsu ACPI interface function */ @@ -373,10 +373,10 @@ static int set_lcd_level(int level) vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBLL [%d]\n", level); - if (level < 0 || level >= fujitsu->max_brightness) + if (level < 0 || level >= fujitsu_bl->max_brightness) return -EINVAL; - status = acpi_get_handle(fujitsu->acpi_handle, "SBLL", &handle); + status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBLL", &handle); if (ACPI_FAILURE(status)) { vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBLL not present\n"); return -ENODEV; @@ -398,10 +398,10 @@ static int set_lcd_level_alt(int level) vdbg_printk(FUJLAPTOP_DBG_TRACE, "set lcd level via SBL2 [%d]\n", level); - if (level < 0 || level >= fujitsu->max_brightness) + if (level < 0 || level >= fujitsu_bl->max_brightness) return -EINVAL; - status = acpi_get_handle(fujitsu->acpi_handle, "SBL2", &handle); + status = acpi_get_handle(fujitsu_bl->acpi_handle, "SBL2", &handle); if (ACPI_FAILURE(status)) { vdbg_printk(FUJLAPTOP_DBG_ERROR, "SBL2 not present\n"); return -ENODEV; @@ -421,19 +421,19 @@ static int get_lcd_level(void) vdbg_printk(FUJLAPTOP_DBG_TRACE, "get lcd level via GBLL\n"); - status = - acpi_evaluate_integer(fujitsu->acpi_handle, "GBLL", NULL, &state); + status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "GBLL", NULL, + &state); if (ACPI_FAILURE(status)) return 0; - fujitsu->brightness_level = state & 0x0fffffff; + fujitsu_bl->brightness_level = state & 0x0fffffff; if (state & 0x80000000) - fujitsu->brightness_changed = 1; + fujitsu_bl->brightness_changed = 1; else - fujitsu->brightness_changed = 0; + fujitsu_bl->brightness_changed = 0; - return fujitsu->brightness_level; + return fujitsu_bl->brightness_level; } static int get_max_brightness(void) @@ -443,14 +443,14 @@ static int get_max_brightness(void) vdbg_printk(FUJLAPTOP_DBG_TRACE, "get max lcd level via RBLL\n"); - status = - acpi_evaluate_integer(fujitsu->acpi_handle, "RBLL", NULL, &state); + status = acpi_evaluate_integer(fujitsu_bl->acpi_handle, "RBLL", NULL, + &state); if (ACPI_FAILURE(status)) return -1; - fujitsu->max_brightness = state; + fujitsu_bl->max_brightness = state; - return fujitsu->max_brightness; + return fujitsu_bl->max_brightness; } /* Backlight device stuff */ @@ -483,7 +483,7 @@ static int bl_update_status(struct backlight_device *b) return ret; } -static const struct backlight_ops fujitsubl_ops = { +static const struct backlight_ops fujitsu_bl_ops = { .get_brightness = bl_get_brightness, .update_status = bl_update_status, }; @@ -511,7 +511,7 @@ show_brightness_changed(struct device *dev, int ret; - ret = fujitsu->brightness_changed; + ret = fujitsu_bl->brightness_changed; if (ret < 0) return ret; @@ -539,7 +539,7 @@ static ssize_t store_lcd_level(struct device *dev, int level, ret; if (sscanf(buf, "%i", &level) != 1 - || (level < 0 || level >= fujitsu->max_brightness)) + || (level < 0 || level >= fujitsu_bl->max_brightness)) return -EINVAL; if (use_alt_lcd_levels) @@ -644,25 +644,25 @@ static void __init dmi_check_cb_common(const struct dmi_system_id *id) static int __init dmi_check_cb_s6410(const struct dmi_system_id *id) { dmi_check_cb_common(id); - fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */ - fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */ + fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */ + fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */ return 1; } static int __init dmi_check_cb_s6420(const struct dmi_system_id *id) { dmi_check_cb_common(id); - fujitsu->keycode1 = KEY_SCREENLOCK; /* "Lock" */ - fujitsu->keycode2 = KEY_HELP; /* "Mobility Center" */ + fujitsu_bl->keycode1 = KEY_SCREENLOCK; /* "Lock" */ + fujitsu_bl->keycode2 = KEY_HELP; /* "Mobility Center" */ return 1; } static int __init dmi_check_cb_p8010(const struct dmi_system_id *id) { dmi_check_cb_common(id); - fujitsu->keycode1 = KEY_HELP; /* "Support" */ - fujitsu->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */ - fujitsu->keycode4 = KEY_WWW; /* "Internet" */ + fujitsu_bl->keycode1 = KEY_HELP; /* "Support" */ + fujitsu_bl->keycode3 = KEY_SWITCHVIDEOMODE; /* "Presentation" */ + fujitsu_bl->keycode4 = KEY_WWW; /* "Internet" */ return 1; } @@ -693,7 +693,7 @@ static const struct dmi_system_id fujitsu_dmi_table[] __initconst = { /* ACPI device for LCD brightness control */ -static int acpi_fujitsu_add(struct acpi_device *device) +static int acpi_fujitsu_bl_add(struct acpi_device *device) { int state = 0; struct input_dev *input; @@ -702,22 +702,22 @@ static int acpi_fujitsu_add(struct acpi_device *device) if (!device) return -EINVAL; - fujitsu->acpi_handle = device->handle; - sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_DEVICE_NAME); + fujitsu_bl->acpi_handle = device->handle; + sprintf(acpi_device_name(device), "%s", ACPI_FUJITSU_BL_DEVICE_NAME); sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); - device->driver_data = fujitsu; + device->driver_data = fujitsu_bl; - fujitsu->input = input = input_allocate_device(); + fujitsu_bl->input = input = input_allocate_device(); if (!input) { error = -ENOMEM; goto err_stop; } - snprintf(fujitsu->phys, sizeof(fujitsu->phys), + snprintf(fujitsu_bl->phys, sizeof(fujitsu_bl->phys), "%s/video/input0", acpi_device_hid(device)); input->name = acpi_device_name(device); - input->phys = fujitsu->phys; + input->phys = fujitsu_bl->phys; input->id.bustype = BUS_HOST; input->id.product = 0x06; input->dev.parent = &device->dev; @@ -730,7 +730,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) if (error) goto err_free_input_dev; - error = acpi_bus_update_power(fujitsu->acpi_handle, &state); + error = acpi_bus_update_power(fujitsu_bl->acpi_handle, &state); if (error) { pr_err("Error reading power state\n"); goto err_unregister_input_dev; @@ -740,7 +740,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); - fujitsu->dev = device; + fujitsu_bl->dev = device; if (acpi_has_method(device->handle, METHOD_NAME__INI)) { vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); @@ -758,7 +758,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) use_alt_lcd_levels, disable_brightness_adjust); if (get_max_brightness() <= 0) - fujitsu->max_brightness = FUJITSU_LCD_N_LEVELS; + fujitsu_bl->max_brightness = FUJITSU_LCD_N_LEVELS; get_lcd_level(); return 0; @@ -772,38 +772,38 @@ static int acpi_fujitsu_add(struct acpi_device *device) return error; } -static int acpi_fujitsu_remove(struct acpi_device *device) +static int acpi_fujitsu_bl_remove(struct acpi_device *device) { - struct fujitsu_t *fujitsu = acpi_driver_data(device); - struct input_dev *input = fujitsu->input; + struct fujitsu_bl *fujitsu_bl = acpi_driver_data(device); + struct input_dev *input = fujitsu_bl->input; input_unregister_device(input); - fujitsu->acpi_handle = NULL; + fujitsu_bl->acpi_handle = NULL; return 0; } /* Brightness notify */ -static void acpi_fujitsu_notify(struct acpi_device *device, u32 event) +static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event) { struct input_dev *input; int keycode; int oldb, newb; - input = fujitsu->input; + input = fujitsu_bl->input; switch (event) { case ACPI_FUJITSU_NOTIFY_CODE1: keycode = 0; - oldb = fujitsu->brightness_level; + oldb = fujitsu_bl->brightness_level; get_lcd_level(); - newb = fujitsu->brightness_level; + newb = fujitsu_bl->brightness_level; vdbg_printk(FUJLAPTOP_DBG_TRACE, "brightness button event [%i -> %i (%i)]\n", - oldb, newb, fujitsu->brightness_changed); + oldb, newb, fujitsu_bl->brightness_changed); if (oldb < newb) { if (disable_brightness_adjust != 1) { @@ -882,11 +882,11 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) input->dev.parent = &device->dev; set_bit(EV_KEY, input->evbit); - set_bit(fujitsu->keycode1, input->keybit); - set_bit(fujitsu->keycode2, input->keybit); - set_bit(fujitsu->keycode3, input->keybit); - set_bit(fujitsu->keycode4, input->keybit); - set_bit(fujitsu->keycode5, input->keybit); + set_bit(fujitsu_bl->keycode1, input->keybit); + set_bit(fujitsu_bl->keycode2, input->keybit); + set_bit(fujitsu_bl->keycode3, input->keybit); + set_bit(fujitsu_bl->keycode4, input->keybit); + set_bit(fujitsu_bl->keycode5, input->keybit); set_bit(KEY_TOUCHPAD_TOGGLE, input->keybit); set_bit(KEY_UNKNOWN, input->keybit); @@ -937,7 +937,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) #if IS_ENABLED(CONFIG_LEDS_CLASS) if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &logolamp_led); if (result == 0) { fujitsu_hotkey->logolamp_registered = 1; @@ -949,7 +949,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & KEYBOARD_LAMPS) && (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) == 0x0)) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &kblamps_led); if (result == 0) { fujitsu_hotkey->kblamps_registered = 1; @@ -966,7 +966,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) * that an RF LED is present. */ if (call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0) & BIT(24)) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &radio_led); if (result == 0) { fujitsu_hotkey->radio_led_registered = 1; @@ -983,7 +983,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) */ if ((call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & BIT(14)) && (call_fext_func(FUNC_LEDS, 0x2, ECO_LED, 0x0) != UNSUPPORTED_CMD)) { - result = led_classdev_register(&fujitsu->pf_device->dev, + result = led_classdev_register(&fujitsu_bl->pf_device->dev, &eco_led); if (result == 0) { fujitsu_hotkey->eco_led_registered = 1; @@ -1103,19 +1103,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) && (i++) < MAX_HOTKEY_RINGBUFFER_SIZE) { switch (irb & 0x4ff) { case KEY1_CODE: - keycode = fujitsu->keycode1; + keycode = fujitsu_bl->keycode1; break; case KEY2_CODE: - keycode = fujitsu->keycode2; + keycode = fujitsu_bl->keycode2; break; case KEY3_CODE: - keycode = fujitsu->keycode3; + keycode = fujitsu_bl->keycode3; break; case KEY4_CODE: - keycode = fujitsu->keycode4; + keycode = fujitsu_bl->keycode4; break; case KEY5_CODE: - keycode = fujitsu->keycode5; + keycode = fujitsu_bl->keycode5; break; case 0: keycode = 0; @@ -1150,19 +1150,19 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) /* Initialization */ -static const struct acpi_device_id fujitsu_device_ids[] = { - {ACPI_FUJITSU_HID, 0}, +static const struct acpi_device_id fujitsu_bl_device_ids[] = { + {ACPI_FUJITSU_BL_HID, 0}, {"", 0}, }; -static struct acpi_driver acpi_fujitsu_driver = { - .name = ACPI_FUJITSU_DRIVER_NAME, +static struct acpi_driver acpi_fujitsu_bl_driver = { + .name = ACPI_FUJITSU_BL_DRIVER_NAME, .class = ACPI_FUJITSU_CLASS, - .ids = fujitsu_device_ids, + .ids = fujitsu_bl_device_ids, .ops = { - .add = acpi_fujitsu_add, - .remove = acpi_fujitsu_remove, - .notify = acpi_fujitsu_notify, + .add = acpi_fujitsu_bl_add, + .remove = acpi_fujitsu_bl_remove, + .notify = acpi_fujitsu_bl_notify, }, }; @@ -1183,7 +1183,7 @@ static struct acpi_driver acpi_fujitsu_hotkey_driver = { }; static const struct acpi_device_id fujitsu_ids[] __used = { - {ACPI_FUJITSU_HID, 0}, + {ACPI_FUJITSU_BL_HID, 0}, {ACPI_FUJITSU_HOTKEY_HID, 0}, {"", 0} }; @@ -1196,17 +1196,17 @@ static int __init fujitsu_init(void) if (acpi_disabled) return -ENODEV; - fujitsu = kzalloc(sizeof(struct fujitsu_t), GFP_KERNEL); - if (!fujitsu) + fujitsu_bl = kzalloc(sizeof(struct fujitsu_bl), GFP_KERNEL); + if (!fujitsu_bl) return -ENOMEM; - fujitsu->keycode1 = KEY_PROG1; - fujitsu->keycode2 = KEY_PROG2; - fujitsu->keycode3 = KEY_PROG3; - fujitsu->keycode4 = KEY_PROG4; - fujitsu->keycode5 = KEY_RFKILL; + fujitsu_bl->keycode1 = KEY_PROG1; + fujitsu_bl->keycode2 = KEY_PROG2; + fujitsu_bl->keycode3 = KEY_PROG3; + fujitsu_bl->keycode4 = KEY_PROG4; + fujitsu_bl->keycode5 = KEY_RFKILL; dmi_check_system(fujitsu_dmi_table); - result = acpi_bus_register_driver(&acpi_fujitsu_driver); + result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver); if (result < 0) { ret = -ENODEV; goto fail_acpi; @@ -1214,18 +1214,18 @@ static int __init fujitsu_init(void) /* Register platform stuff */ - fujitsu->pf_device = platform_device_alloc("fujitsu-laptop", -1); - if (!fujitsu->pf_device) { + fujitsu_bl->pf_device = platform_device_alloc("fujitsu-laptop", -1); + if (!fujitsu_bl->pf_device) { ret = -ENOMEM; goto fail_platform_driver; } - ret = platform_device_add(fujitsu->pf_device); + ret = platform_device_add(fujitsu_bl->pf_device); if (ret) goto fail_platform_device1; ret = - sysfs_create_group(&fujitsu->pf_device->dev.kobj, + sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj, &fujitsupf_attribute_group); if (ret) goto fail_platform_device2; @@ -1236,19 +1236,19 @@ static int __init fujitsu_init(void) struct backlight_properties props; memset(&props, 0, sizeof(struct backlight_properties)); - max_brightness = fujitsu->max_brightness; + max_brightness = fujitsu_bl->max_brightness; props.type = BACKLIGHT_PLATFORM; props.max_brightness = max_brightness - 1; - fujitsu->bl_device = backlight_device_register("fujitsu-laptop", - NULL, NULL, - &fujitsubl_ops, - &props); - if (IS_ERR(fujitsu->bl_device)) { - ret = PTR_ERR(fujitsu->bl_device); - fujitsu->bl_device = NULL; + fujitsu_bl->bl_device = backlight_device_register("fujitsu-laptop", + NULL, NULL, + &fujitsu_bl_ops, + &props); + if (IS_ERR(fujitsu_bl->bl_device)) { + ret = PTR_ERR(fujitsu_bl->bl_device); + fujitsu_bl->bl_device = NULL; goto fail_sysfs_group; } - fujitsu->bl_device->props.brightness = fujitsu->brightness_level; + fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level; } ret = platform_driver_register(&fujitsupf_driver); @@ -1272,9 +1272,9 @@ static int __init fujitsu_init(void) /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */ if (acpi_video_get_backlight_type() == acpi_backlight_vendor) { if (call_fext_func(FUNC_BACKLIGHT, 0x2, 0x4, 0x0) == 3) - fujitsu->bl_device->props.power = FB_BLANK_POWERDOWN; + fujitsu_bl->bl_device->props.power = FB_BLANK_POWERDOWN; else - fujitsu->bl_device->props.power = FB_BLANK_UNBLANK; + fujitsu_bl->bl_device->props.power = FB_BLANK_UNBLANK; } pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n"); @@ -1286,18 +1286,18 @@ static int __init fujitsu_init(void) fail_hotkey: platform_driver_unregister(&fujitsupf_driver); fail_backlight: - backlight_device_unregister(fujitsu->bl_device); + backlight_device_unregister(fujitsu_bl->bl_device); fail_sysfs_group: - sysfs_remove_group(&fujitsu->pf_device->dev.kobj, + sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, &fujitsupf_attribute_group); fail_platform_device2: - platform_device_del(fujitsu->pf_device); + platform_device_del(fujitsu_bl->pf_device); fail_platform_device1: - platform_device_put(fujitsu->pf_device); + platform_device_put(fujitsu_bl->pf_device); fail_platform_driver: - acpi_bus_unregister_driver(&acpi_fujitsu_driver); + acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver); fail_acpi: - kfree(fujitsu); + kfree(fujitsu_bl); return ret; } @@ -1310,16 +1310,16 @@ static void __exit fujitsu_cleanup(void) platform_driver_unregister(&fujitsupf_driver); - backlight_device_unregister(fujitsu->bl_device); + backlight_device_unregister(fujitsu_bl->bl_device); - sysfs_remove_group(&fujitsu->pf_device->dev.kobj, + sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, &fujitsupf_attribute_group); - platform_device_unregister(fujitsu->pf_device); + platform_device_unregister(fujitsu_bl->pf_device); - acpi_bus_unregister_driver(&acpi_fujitsu_driver); + acpi_bus_unregister_driver(&acpi_fujitsu_bl_driver); - kfree(fujitsu); + kfree(fujitsu_bl); pr_info("driver unloaded\n"); } From 6942eabc140eac54d5c0db2695eed63768209c34 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:25 +0100 Subject: [PATCH 0364/1299] platform/x86: fujitsu-laptop: replace "hotkey" with "laptop" in symbol names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Functions, structures, variables and constants whose names currently contain the "hotkey" keyword are not only responsible for handling hotkeys, but also other laptop-related features (rfkill, lid, dock, LEDs). Fix their naming by using a consistent "_laptop"/"_LAPTOP" suffix/infix. Update comments so that they reflect this change. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 158 +++++++++++++------------- 1 file changed, 79 insertions(+), 79 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index e1737b9d9d95..70124004b346 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -82,9 +82,9 @@ #define ACPI_FUJITSU_BL_HID "FUJ02B1" #define ACPI_FUJITSU_BL_DRIVER_NAME "Fujitsu laptop FUJ02B1 ACPI brightness driver" #define ACPI_FUJITSU_BL_DEVICE_NAME "Fujitsu FUJ02B1" -#define ACPI_FUJITSU_HOTKEY_HID "FUJ02E3" -#define ACPI_FUJITSU_HOTKEY_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" -#define ACPI_FUJITSU_HOTKEY_DEVICE_NAME "Fujitsu FUJ02E3" +#define ACPI_FUJITSU_LAPTOP_HID "FUJ02E3" +#define ACPI_FUJITSU_LAPTOP_DRIVER_NAME "Fujitsu laptop FUJ02E3 ACPI hotkeys driver" +#define ACPI_FUJITSU_LAPTOP_DEVICE_NAME "Fujitsu FUJ02E3" #define ACPI_FUJITSU_NOTIFY_CODE1 0x80 @@ -154,8 +154,8 @@ static struct fujitsu_bl *fujitsu_bl; static int use_alt_lcd_levels = -1; static int disable_brightness_adjust = -1; -/* Device used to access other hotkeys on the laptop */ -struct fujitsu_hotkey_t { +/* Device used to access hotkeys and other features on the laptop */ +struct fujitsu_laptop { acpi_handle acpi_handle; struct acpi_device *dev; struct input_dev *input; @@ -171,9 +171,9 @@ struct fujitsu_hotkey_t { int eco_led_registered; }; -static struct fujitsu_hotkey_t *fujitsu_hotkey; +static struct fujitsu_laptop *fujitsu_laptop; -static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event); +static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event); #if IS_ENABLED(CONFIG_LEDS_CLASS) static enum led_brightness logolamp_get(struct led_classdev *cdev); @@ -239,7 +239,7 @@ static int call_fext_func(int cmd, int arg0, int arg1, int arg2) unsigned long long value; acpi_handle handle = NULL; - status = acpi_get_handle(fujitsu_hotkey->acpi_handle, "FUNC", &handle); + status = acpi_get_handle(fujitsu_laptop->acpi_handle, "FUNC", &handle); if (ACPI_FAILURE(status)) { vdbg_printk(FUJLAPTOP_DBG_ERROR, "FUNC interface is not present\n"); @@ -567,9 +567,9 @@ static ssize_t show_lid_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_hotkey->rfkill_supported & 0x100)) + if (!(fujitsu_laptop->rfkill_supported & 0x100)) return sprintf(buf, "unknown\n"); - if (fujitsu_hotkey->rfkill_state & 0x100) + if (fujitsu_laptop->rfkill_state & 0x100) return sprintf(buf, "open\n"); else return sprintf(buf, "closed\n"); @@ -579,9 +579,9 @@ static ssize_t show_dock_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_hotkey->rfkill_supported & 0x200)) + if (!(fujitsu_laptop->rfkill_supported & 0x200)) return sprintf(buf, "unknown\n"); - if (fujitsu_hotkey->rfkill_state & 0x200) + if (fujitsu_laptop->rfkill_state & 0x200) return sprintf(buf, "docked\n"); else return sprintf(buf, "undocked\n"); @@ -591,9 +591,9 @@ static ssize_t show_radios_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_hotkey->rfkill_supported & 0x20)) + if (!(fujitsu_laptop->rfkill_supported & 0x20)) return sprintf(buf, "unknown\n"); - if (fujitsu_hotkey->rfkill_state & 0x20) + if (fujitsu_laptop->rfkill_state & 0x20) return sprintf(buf, "on\n"); else return sprintf(buf, "killed\n"); @@ -840,7 +840,7 @@ static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event) /* ACPI device for hotkey handling */ -static int acpi_fujitsu_hotkey_add(struct acpi_device *device) +static int acpi_fujitsu_laptop_add(struct acpi_device *device) { int result = 0; int state = 0; @@ -851,32 +851,32 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (!device) return -EINVAL; - fujitsu_hotkey->acpi_handle = device->handle; + fujitsu_laptop->acpi_handle = device->handle; sprintf(acpi_device_name(device), "%s", - ACPI_FUJITSU_HOTKEY_DEVICE_NAME); + ACPI_FUJITSU_LAPTOP_DEVICE_NAME); sprintf(acpi_device_class(device), "%s", ACPI_FUJITSU_CLASS); - device->driver_data = fujitsu_hotkey; + device->driver_data = fujitsu_laptop; /* kfifo */ - spin_lock_init(&fujitsu_hotkey->fifo_lock); - error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int), + spin_lock_init(&fujitsu_laptop->fifo_lock); + error = kfifo_alloc(&fujitsu_laptop->fifo, RINGBUFFERSIZE * sizeof(int), GFP_KERNEL); if (error) { pr_err("kfifo_alloc failed\n"); goto err_stop; } - fujitsu_hotkey->input = input = input_allocate_device(); + fujitsu_laptop->input = input = input_allocate_device(); if (!input) { error = -ENOMEM; goto err_free_fifo; } - snprintf(fujitsu_hotkey->phys, sizeof(fujitsu_hotkey->phys), + snprintf(fujitsu_laptop->phys, sizeof(fujitsu_laptop->phys), "%s/video/input0", acpi_device_hid(device)); input->name = acpi_device_name(device); - input->phys = fujitsu_hotkey->phys; + input->phys = fujitsu_laptop->phys; input->id.bustype = BUS_HOST; input->id.product = 0x06; input->dev.parent = &device->dev; @@ -894,7 +894,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (error) goto err_free_input_dev; - error = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state); + error = acpi_bus_update_power(fujitsu_laptop->acpi_handle, &state); if (error) { pr_err("Error reading power state\n"); goto err_unregister_input_dev; @@ -904,7 +904,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); - fujitsu_hotkey->dev = device; + fujitsu_laptop->dev = device; if (acpi_has_method(device->handle, METHOD_NAME__INI)) { vdbg_printk(FUJLAPTOP_DBG_INFO, "Invoking _INI\n"); @@ -920,16 +920,16 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) ; /* No action, result is discarded */ vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); - fujitsu_hotkey->rfkill_supported = + fujitsu_laptop->rfkill_supported = call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0); /* Make sure our bitmask of supported functions is cleared if the RFKILL function block is not implemented, like on the S7020. */ - if (fujitsu_hotkey->rfkill_supported == UNSUPPORTED_CMD) - fujitsu_hotkey->rfkill_supported = 0; + if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD) + fujitsu_laptop->rfkill_supported = 0; - if (fujitsu_hotkey->rfkill_supported) - fujitsu_hotkey->rfkill_state = + if (fujitsu_laptop->rfkill_supported) + fujitsu_laptop->rfkill_state = call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); /* Suspect this is a keymap of the application panel, print it */ @@ -940,7 +940,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &logolamp_led); if (result == 0) { - fujitsu_hotkey->logolamp_registered = 1; + fujitsu_laptop->logolamp_registered = 1; } else { pr_err("Could not register LED handler for logo lamp, error %i\n", result); @@ -952,7 +952,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &kblamps_led); if (result == 0) { - fujitsu_hotkey->kblamps_registered = 1; + fujitsu_laptop->kblamps_registered = 1; } else { pr_err("Could not register LED handler for keyboard lamps, error %i\n", result); @@ -969,7 +969,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &radio_led); if (result == 0) { - fujitsu_hotkey->radio_led_registered = 1; + fujitsu_laptop->radio_led_registered = 1; } else { pr_err("Could not register LED handler for radio LED, error %i\n", result); @@ -986,7 +986,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = led_classdev_register(&fujitsu_bl->pf_device->dev, &eco_led); if (result == 0) { - fujitsu_hotkey->eco_led_registered = 1; + fujitsu_laptop->eco_led_registered = 1; } else { pr_err("Could not register LED handler for eco LED, error %i\n", result); @@ -1002,47 +1002,47 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) err_free_input_dev: input_free_device(input); err_free_fifo: - kfifo_free(&fujitsu_hotkey->fifo); + kfifo_free(&fujitsu_laptop->fifo); err_stop: return error; } -static int acpi_fujitsu_hotkey_remove(struct acpi_device *device) +static int acpi_fujitsu_laptop_remove(struct acpi_device *device) { - struct fujitsu_hotkey_t *fujitsu_hotkey = acpi_driver_data(device); - struct input_dev *input = fujitsu_hotkey->input; + struct fujitsu_laptop *fujitsu_laptop = acpi_driver_data(device); + struct input_dev *input = fujitsu_laptop->input; #if IS_ENABLED(CONFIG_LEDS_CLASS) - if (fujitsu_hotkey->logolamp_registered) + if (fujitsu_laptop->logolamp_registered) led_classdev_unregister(&logolamp_led); - if (fujitsu_hotkey->kblamps_registered) + if (fujitsu_laptop->kblamps_registered) led_classdev_unregister(&kblamps_led); - if (fujitsu_hotkey->radio_led_registered) + if (fujitsu_laptop->radio_led_registered) led_classdev_unregister(&radio_led); - if (fujitsu_hotkey->eco_led_registered) + if (fujitsu_laptop->eco_led_registered) led_classdev_unregister(&eco_led); #endif input_unregister_device(input); - kfifo_free(&fujitsu_hotkey->fifo); + kfifo_free(&fujitsu_laptop->fifo); - fujitsu_hotkey->acpi_handle = NULL; + fujitsu_laptop->acpi_handle = NULL; return 0; } -static void acpi_fujitsu_hotkey_press(int keycode) +static void acpi_fujitsu_laptop_press(int keycode) { - struct input_dev *input = fujitsu_hotkey->input; + struct input_dev *input = fujitsu_laptop->input; int status; - status = kfifo_in_locked(&fujitsu_hotkey->fifo, + status = kfifo_in_locked(&fujitsu_laptop->fifo, (unsigned char *)&keycode, sizeof(keycode), - &fujitsu_hotkey->fifo_lock); + &fujitsu_laptop->fifo_lock); if (status != sizeof(keycode)) { vdbg_printk(FUJLAPTOP_DBG_WARN, "Could not push keycode [0x%x]\n", keycode); @@ -1054,16 +1054,16 @@ static void acpi_fujitsu_hotkey_press(int keycode) "Push keycode into ringbuffer [%d]\n", keycode); } -static void acpi_fujitsu_hotkey_release(void) +static void acpi_fujitsu_laptop_release(void) { - struct input_dev *input = fujitsu_hotkey->input; + struct input_dev *input = fujitsu_laptop->input; int keycode, status; while (true) { - status = kfifo_out_locked(&fujitsu_hotkey->fifo, + status = kfifo_out_locked(&fujitsu_laptop->fifo, (unsigned char *)&keycode, sizeof(keycode), - &fujitsu_hotkey->fifo_lock); + &fujitsu_laptop->fifo_lock); if (status != sizeof(keycode)) return; input_report_key(input, keycode, 0); @@ -1073,14 +1073,14 @@ static void acpi_fujitsu_hotkey_release(void) } } -static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) +static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) { struct input_dev *input; int keycode; unsigned int irb = 1; int i; - input = fujitsu_hotkey->input; + input = fujitsu_laptop->input; if (event != ACPI_FUJITSU_NOTIFY_CODE1) { keycode = KEY_UNKNOWN; @@ -1093,8 +1093,8 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) return; } - if (fujitsu_hotkey->rfkill_supported) - fujitsu_hotkey->rfkill_state = + if (fujitsu_laptop->rfkill_supported) + fujitsu_laptop->rfkill_state = call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); i = 0; @@ -1128,16 +1128,16 @@ static void acpi_fujitsu_hotkey_notify(struct acpi_device *device, u32 event) } if (keycode > 0) - acpi_fujitsu_hotkey_press(keycode); + acpi_fujitsu_laptop_press(keycode); else if (keycode == 0) - acpi_fujitsu_hotkey_release(); + acpi_fujitsu_laptop_release(); } /* On some models (first seen on the Skylake-based Lifebook * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is * handled in software; its state is queried using FUNC_RFKILL */ - if ((fujitsu_hotkey->rfkill_supported & BIT(26)) && + if ((fujitsu_laptop->rfkill_supported & BIT(26)) && (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) { keycode = KEY_TOUCHPAD_TOGGLE; input_report_key(input, keycode, 1); @@ -1166,25 +1166,25 @@ static struct acpi_driver acpi_fujitsu_bl_driver = { }, }; -static const struct acpi_device_id fujitsu_hotkey_device_ids[] = { - {ACPI_FUJITSU_HOTKEY_HID, 0}, +static const struct acpi_device_id fujitsu_laptop_device_ids[] = { + {ACPI_FUJITSU_LAPTOP_HID, 0}, {"", 0}, }; -static struct acpi_driver acpi_fujitsu_hotkey_driver = { - .name = ACPI_FUJITSU_HOTKEY_DRIVER_NAME, +static struct acpi_driver acpi_fujitsu_laptop_driver = { + .name = ACPI_FUJITSU_LAPTOP_DRIVER_NAME, .class = ACPI_FUJITSU_CLASS, - .ids = fujitsu_hotkey_device_ids, + .ids = fujitsu_laptop_device_ids, .ops = { - .add = acpi_fujitsu_hotkey_add, - .remove = acpi_fujitsu_hotkey_remove, - .notify = acpi_fujitsu_hotkey_notify, + .add = acpi_fujitsu_laptop_add, + .remove = acpi_fujitsu_laptop_remove, + .notify = acpi_fujitsu_laptop_notify, }, }; static const struct acpi_device_id fujitsu_ids[] __used = { {ACPI_FUJITSU_BL_HID, 0}, - {ACPI_FUJITSU_HOTKEY_HID, 0}, + {ACPI_FUJITSU_LAPTOP_HID, 0}, {"", 0} }; MODULE_DEVICE_TABLE(acpi, fujitsu_ids); @@ -1255,18 +1255,18 @@ static int __init fujitsu_init(void) if (ret) goto fail_backlight; - /* Register hotkey driver */ + /* Register laptop driver */ - fujitsu_hotkey = kzalloc(sizeof(struct fujitsu_hotkey_t), GFP_KERNEL); - if (!fujitsu_hotkey) { + fujitsu_laptop = kzalloc(sizeof(struct fujitsu_laptop), GFP_KERNEL); + if (!fujitsu_laptop) { ret = -ENOMEM; - goto fail_hotkey; + goto fail_laptop; } - result = acpi_bus_register_driver(&acpi_fujitsu_hotkey_driver); + result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver); if (result < 0) { ret = -ENODEV; - goto fail_hotkey1; + goto fail_laptop1; } /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */ @@ -1281,9 +1281,9 @@ static int __init fujitsu_init(void) return 0; -fail_hotkey1: - kfree(fujitsu_hotkey); -fail_hotkey: +fail_laptop1: + kfree(fujitsu_laptop); +fail_laptop: platform_driver_unregister(&fujitsupf_driver); fail_backlight: backlight_device_unregister(fujitsu_bl->bl_device); @@ -1304,9 +1304,9 @@ static int __init fujitsu_init(void) static void __exit fujitsu_cleanup(void) { - acpi_bus_unregister_driver(&acpi_fujitsu_hotkey_driver); + acpi_bus_unregister_driver(&acpi_fujitsu_laptop_driver); - kfree(fujitsu_hotkey); + kfree(fujitsu_laptop); platform_driver_unregister(&fujitsupf_driver); From 1650602691f4e8ea8f6e306452a72ac9a611932a Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:26 +0100 Subject: [PATCH 0365/1299] platform/x86: fujitsu-laptop: make platform-related variables match naming convention MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace "fujitsupf" with "fujitsu_pf" in all platform-related variable names to match the module-wide naming convention. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 70124004b346..6cdd1b334e8a 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -607,7 +607,7 @@ static DEVICE_ATTR(lid, 0444, show_lid_state, ignore_store); static DEVICE_ATTR(dock, 0444, show_dock_state, ignore_store); static DEVICE_ATTR(radios, 0444, show_radios_state, ignore_store); -static struct attribute *fujitsupf_attributes[] = { +static struct attribute *fujitsu_pf_attributes[] = { &dev_attr_brightness_changed.attr, &dev_attr_max_brightness.attr, &dev_attr_lcd_level.attr, @@ -617,11 +617,11 @@ static struct attribute *fujitsupf_attributes[] = { NULL }; -static struct attribute_group fujitsupf_attribute_group = { - .attrs = fujitsupf_attributes +static struct attribute_group fujitsu_pf_attribute_group = { + .attrs = fujitsu_pf_attributes }; -static struct platform_driver fujitsupf_driver = { +static struct platform_driver fujitsu_pf_driver = { .driver = { .name = "fujitsu-laptop", } @@ -1226,7 +1226,7 @@ static int __init fujitsu_init(void) ret = sysfs_create_group(&fujitsu_bl->pf_device->dev.kobj, - &fujitsupf_attribute_group); + &fujitsu_pf_attribute_group); if (ret) goto fail_platform_device2; @@ -1251,7 +1251,7 @@ static int __init fujitsu_init(void) fujitsu_bl->bl_device->props.brightness = fujitsu_bl->brightness_level; } - ret = platform_driver_register(&fujitsupf_driver); + ret = platform_driver_register(&fujitsu_pf_driver); if (ret) goto fail_backlight; @@ -1284,12 +1284,12 @@ static int __init fujitsu_init(void) fail_laptop1: kfree(fujitsu_laptop); fail_laptop: - platform_driver_unregister(&fujitsupf_driver); + platform_driver_unregister(&fujitsu_pf_driver); fail_backlight: backlight_device_unregister(fujitsu_bl->bl_device); fail_sysfs_group: sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, - &fujitsupf_attribute_group); + &fujitsu_pf_attribute_group); fail_platform_device2: platform_device_del(fujitsu_bl->pf_device); fail_platform_device1: @@ -1308,12 +1308,12 @@ static void __exit fujitsu_cleanup(void) kfree(fujitsu_laptop); - platform_driver_unregister(&fujitsupf_driver); + platform_driver_unregister(&fujitsu_pf_driver); backlight_device_unregister(fujitsu_bl->bl_device); sysfs_remove_group(&fujitsu_bl->pf_device->dev.kobj, - &fujitsupf_attribute_group); + &fujitsu_pf_attribute_group); platform_device_unregister(fujitsu_bl->pf_device); From 8ef27bd3410c4e5856bac2315ef51224926020e0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:27 +0100 Subject: [PATCH 0366/1299] platform/x86: fujitsu-laptop: rename FUNC_RFKILL to FUNC_FLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FUNC subfunction 0x1000 is currently referred to as FUNC_RFKILL, which is misleading, because it handles more than just radio devices (also lid, dock, LEDs). Rename the FUNC_RFKILL constant to FUNC_FLAGS. Replace "rfkill" with "flags" in the names of its associated fields inside struct fujitsu_laptop. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 50 +++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 6cdd1b334e8a..55d696262301 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -89,7 +89,7 @@ #define ACPI_FUJITSU_NOTIFY_CODE1 0x80 /* FUNC interface - command values */ -#define FUNC_RFKILL 0x1000 +#define FUNC_FLAGS 0x1000 #define FUNC_LEDS 0x1001 #define FUNC_BUTTONS 0x1002 #define FUNC_BACKLIGHT 0x1004 @@ -163,8 +163,8 @@ struct fujitsu_laptop { struct platform_device *pf_device; struct kfifo fifo; spinlock_t fifo_lock; - int rfkill_supported; - int rfkill_state; + int flags_supported; + int flags_state; int logolamp_registered; int kblamps_registered; int radio_led_registered; @@ -300,9 +300,9 @@ static int radio_led_set(struct led_classdev *cdev, enum led_brightness brightness) { if (brightness >= LED_FULL) - return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, RADIO_LED_ON); + return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, RADIO_LED_ON); else - return call_fext_func(FUNC_RFKILL, 0x5, RADIO_LED_ON, 0x0); + return call_fext_func(FUNC_FLAGS, 0x5, RADIO_LED_ON, 0x0); } static int eco_led_set(struct led_classdev *cdev, @@ -346,7 +346,7 @@ static enum led_brightness radio_led_get(struct led_classdev *cdev) { enum led_brightness brightness = LED_OFF; - if (call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0) & RADIO_LED_ON) + if (call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0) & RADIO_LED_ON) brightness = LED_FULL; return brightness; @@ -567,9 +567,9 @@ static ssize_t show_lid_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->rfkill_supported & 0x100)) + if (!(fujitsu_laptop->flags_supported & 0x100)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->rfkill_state & 0x100) + if (fujitsu_laptop->flags_state & 0x100) return sprintf(buf, "open\n"); else return sprintf(buf, "closed\n"); @@ -579,9 +579,9 @@ static ssize_t show_dock_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->rfkill_supported & 0x200)) + if (!(fujitsu_laptop->flags_supported & 0x200)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->rfkill_state & 0x200) + if (fujitsu_laptop->flags_state & 0x200) return sprintf(buf, "docked\n"); else return sprintf(buf, "undocked\n"); @@ -591,9 +591,9 @@ static ssize_t show_radios_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->rfkill_supported & 0x20)) + if (!(fujitsu_laptop->flags_supported & 0x20)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->rfkill_state & 0x20) + if (fujitsu_laptop->flags_state & 0x20) return sprintf(buf, "on\n"); else return sprintf(buf, "killed\n"); @@ -920,17 +920,17 @@ static int acpi_fujitsu_laptop_add(struct acpi_device *device) ; /* No action, result is discarded */ vdbg_printk(FUJLAPTOP_DBG_INFO, "Discarded %i ringbuffer entries\n", i); - fujitsu_laptop->rfkill_supported = - call_fext_func(FUNC_RFKILL, 0x0, 0x0, 0x0); + fujitsu_laptop->flags_supported = + call_fext_func(FUNC_FLAGS, 0x0, 0x0, 0x0); /* Make sure our bitmask of supported functions is cleared if the RFKILL function block is not implemented, like on the S7020. */ - if (fujitsu_laptop->rfkill_supported == UNSUPPORTED_CMD) - fujitsu_laptop->rfkill_supported = 0; + if (fujitsu_laptop->flags_supported == UNSUPPORTED_CMD) + fujitsu_laptop->flags_supported = 0; - if (fujitsu_laptop->rfkill_supported) - fujitsu_laptop->rfkill_state = - call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); + if (fujitsu_laptop->flags_supported) + fujitsu_laptop->flags_state = + call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0); /* Suspect this is a keymap of the application panel, print it */ pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0)); @@ -1093,9 +1093,9 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) return; } - if (fujitsu_laptop->rfkill_supported) - fujitsu_laptop->rfkill_state = - call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); + if (fujitsu_laptop->flags_supported) + fujitsu_laptop->flags_state = + call_fext_func(FUNC_FLAGS, 0x4, 0x0, 0x0); i = 0; while ((irb = @@ -1135,10 +1135,10 @@ static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event) /* On some models (first seen on the Skylake-based Lifebook * E736/E746/E756), the touchpad toggle hotkey (Fn+F4) is - * handled in software; its state is queried using FUNC_RFKILL + * handled in software; its state is queried using FUNC_FLAGS */ - if ((fujitsu_laptop->rfkill_supported & BIT(26)) && - (call_fext_func(FUNC_RFKILL, 0x1, 0x0, 0x0) & BIT(26))) { + if ((fujitsu_laptop->flags_supported & BIT(26)) && + (call_fext_func(FUNC_FLAGS, 0x1, 0x0, 0x0) & BIT(26))) { keycode = KEY_TOUCHPAD_TOGGLE; input_report_key(input, keycode, 1); input_sync(input); From d3dd4480f8a911198d85b3cdb97f22db6539d2d1 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:28 +0100 Subject: [PATCH 0367/1299] platform/x86: fujitsu-laptop: replace numeric values with constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace three repeating numeric values with constants to improve code clarity. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 55d696262301..deef40a03b6d 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -97,6 +97,11 @@ /* FUNC interface - responses */ #define UNSUPPORTED_CMD 0x80000000 +/* FUNC interface - status flags */ +#define FLAG_RFKILL 0x020 +#define FLAG_LID 0x100 +#define FLAG_DOCK 0x200 + #if IS_ENABLED(CONFIG_LEDS_CLASS) /* FUNC interface - LED control */ #define FUNC_LED_OFF 0x1 @@ -567,9 +572,9 @@ static ssize_t show_lid_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->flags_supported & 0x100)) + if (!(fujitsu_laptop->flags_supported & FLAG_LID)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->flags_state & 0x100) + if (fujitsu_laptop->flags_state & FLAG_LID) return sprintf(buf, "open\n"); else return sprintf(buf, "closed\n"); @@ -579,9 +584,9 @@ static ssize_t show_dock_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->flags_supported & 0x200)) + if (!(fujitsu_laptop->flags_supported & FLAG_DOCK)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->flags_state & 0x200) + if (fujitsu_laptop->flags_state & FLAG_DOCK) return sprintf(buf, "docked\n"); else return sprintf(buf, "undocked\n"); @@ -591,9 +596,9 @@ static ssize_t show_radios_state(struct device *dev, struct device_attribute *attr, char *buf) { - if (!(fujitsu_laptop->flags_supported & 0x20)) + if (!(fujitsu_laptop->flags_supported & FLAG_RFKILL)) return sprintf(buf, "unknown\n"); - if (fujitsu_laptop->flags_state & 0x20) + if (fujitsu_laptop->flags_state & FLAG_RFKILL) return sprintf(buf, "on\n"); else return sprintf(buf, "killed\n"); From 8c590e339f37022dff209ef16cf699531df1a0ca Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:29 +0100 Subject: [PATCH 0368/1299] platform/x86: fujitsu-laptop: remove redundant forward declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both acpi_fujitsu_bl_notify() and acpi_fujitsu_laptop_notify() are defined before they are first used, so remove their forward declarations as they are redundant. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index deef40a03b6d..ba0c0c211251 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -178,8 +178,6 @@ struct fujitsu_laptop { static struct fujitsu_laptop *fujitsu_laptop; -static void acpi_fujitsu_laptop_notify(struct acpi_device *device, u32 event); - #if IS_ENABLED(CONFIG_LEDS_CLASS) static enum led_brightness logolamp_get(struct led_classdev *cdev); static int logolamp_set(struct led_classdev *cdev, @@ -227,8 +225,6 @@ static struct led_classdev eco_led = { static u32 dbg_level = 0x03; #endif -static void acpi_fujitsu_bl_notify(struct acpi_device *device, u32 event); - /* Fujitsu ACPI interface function */ static int call_fext_func(int cmd, int arg0, int arg1, int arg2) From c1d1e8a051761fb808308dab32b9351e1d1fbb9b Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:30 +0100 Subject: [PATCH 0369/1299] platform/x86: fujitsu-laptop: simplify acpi_bus_register_driver() error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A separate variable is not needed to handle error codes returned by acpi_bus_register_driver(). If the latter fails, just use the value it returned as the value returned by fujitsu_init(). Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index ba0c0c211251..a5478a011b90 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -1192,7 +1192,7 @@ MODULE_DEVICE_TABLE(acpi, fujitsu_ids); static int __init fujitsu_init(void) { - int ret, result, max_brightness; + int ret, max_brightness; if (acpi_disabled) return -ENODEV; @@ -1207,11 +1207,9 @@ static int __init fujitsu_init(void) fujitsu_bl->keycode5 = KEY_RFKILL; dmi_check_system(fujitsu_dmi_table); - result = acpi_bus_register_driver(&acpi_fujitsu_bl_driver); - if (result < 0) { - ret = -ENODEV; + ret = acpi_bus_register_driver(&acpi_fujitsu_bl_driver); + if (ret) goto fail_acpi; - } /* Register platform stuff */ @@ -1264,11 +1262,9 @@ static int __init fujitsu_init(void) goto fail_laptop; } - result = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver); - if (result < 0) { - ret = -ENODEV; + ret = acpi_bus_register_driver(&acpi_fujitsu_laptop_driver); + if (ret) goto fail_laptop1; - } /* Sync backlight power status (needs FUJ02E3 device, hence deferred) */ if (acpi_video_get_backlight_type() == acpi_backlight_vendor) { From 5296a73613814a15e54ebddc2843ec0f84951d60 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:32 +0100 Subject: [PATCH 0370/1299] platform/x86: fujitsu-laptop: autodetect LCD interface on all models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Presence of ACPI method SBL2 should be checked on all models rather than just the ones with predefined hotkey keycode overrides. Move most of dmi_check_cb_common() to acpi_fujitsu_bl_add(). Adjust indentation to make checkpatch happy. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index a5478a011b90..56804c4db33f 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -631,15 +631,6 @@ static struct platform_driver fujitsu_pf_driver = { static void __init dmi_check_cb_common(const struct dmi_system_id *id) { pr_info("Identified laptop model '%s'\n", id->ident); - if (use_alt_lcd_levels == -1) { - if (acpi_has_method(NULL, - "\\_SB.PCI0.LPCB.FJEX.SBL2")) - use_alt_lcd_levels = 1; - else - use_alt_lcd_levels = 0; - vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as " - "%i\n", use_alt_lcd_levels); - } } static int __init dmi_check_cb_s6410(const struct dmi_system_id *id) @@ -751,6 +742,15 @@ static int acpi_fujitsu_bl_add(struct acpi_device *device) pr_err("_INI Method failed\n"); } + if (use_alt_lcd_levels == -1) { + if (acpi_has_method(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2")) + use_alt_lcd_levels = 1; + else + use_alt_lcd_levels = 0; + vdbg_printk(FUJLAPTOP_DBG_TRACE, "auto-detected usealt as %i\n", + use_alt_lcd_levels); + } + /* do config (detect defaults) */ use_alt_lcd_levels = use_alt_lcd_levels == 1 ? 1 : 0; disable_brightness_adjust = disable_brightness_adjust == 1 ? 1 : 0; From 5802d0bc3fe9f598f0ff3f5fd7fd8c5c935a1b5d Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 8 Feb 2017 14:46:33 +0100 Subject: [PATCH 0371/1299] platform/x86: fujitsu-laptop: remove redundant MODULE_ALIAS entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MODULE_DEVICE_TABLE is all that is needed for fujitsu-laptop to be properly autoloaded based on presence of its associated ACPI devices, so remove redundant MODULE_ALIAS entries. Signed-off-by: Alan Jenkins [kempniu: rebase patch, rewrite commit message] Signed-off-by: Michał Kępień Signed-off-by: Andy Shevchenko Reviewed-by: Jonathan Woithe --- drivers/platform/x86/fujitsu-laptop.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 56804c4db33f..e12cc3504d48 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -1338,7 +1338,3 @@ MODULE_AUTHOR("Jonathan Woithe, Peter Gruber, Tony Vroon"); MODULE_DESCRIPTION("Fujitsu laptop extras support"); MODULE_VERSION(FUJITSU_DRIVER_VERSION); MODULE_LICENSE("GPL"); - -MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1D3:*:cvrS6410:*"); -MODULE_ALIAS("dmi:*:svnFUJITSUSIEMENS:*:pvr:rvnFUJITSU:rnFJNB1E6:*:cvrS6420:*"); -MODULE_ALIAS("dmi:*:svnFUJITSU:*:pvr:rvnFUJITSU:rnFJNB19C:*:cvrS7020:*"); From 71050ae7bf83e4d71a859257d11adc5de517073e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= Date: Mon, 20 Feb 2017 14:50:22 -0500 Subject: [PATCH 0372/1299] platform/x86: asus-wmi: Detect quirk_no_rfkill from the DSDT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some Asus laptops that have an airplane-mode indicator LED, also have the WMI WLAN user bit set, and the following bits in their DSDT: Scope (_SB) { (...) Device (ATKD) { (...) Method (WMNB, 3, Serialized) { (...) If (LEqual (IIA0, 0x00010002)) { OWGD (IIA1) Return (One) } } } } So when asus-wmi uses ASUS_WMI_DEVID_WLAN_LED (0x00010002) to store the wlan state, it drives the airplane-mode indicator LED (through the call to OWGD) in an inverted fashion: the LED is ON when airplane mode is OFF (since wlan is ON), and vice-versa. This commit skips registering RFKill switches at all for these laptops, to allow the asus-wireless driver to drive the airplane mode LED correctly through the ASHS ACPI device. Relying on the presence of ASHS and ASUS_WMI_DSTS_USER_BIT avoids adding DMI-based quirks for at least 21 different laptops. Signed-off-by: João Paulo Rechi Vita Signed-off-by: Andy Shevchenko --- drivers/platform/x86/asus-wmi.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 43cb680adbb4..8499d3ae4257 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -159,6 +159,8 @@ MODULE_LICENSE("GPL"); #define USB_INTEL_XUSB2PR 0xD0 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 +static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; + struct bios_args { u32 arg0; u32 arg1; @@ -2051,6 +2053,16 @@ static int asus_wmi_fan_init(struct asus_wmi *asus) return 0; } +static bool ashs_present(void) +{ + int i = 0; + while (ashs_ids[i]) { + if (acpi_dev_found(ashs_ids[i++])) + return true; + } + return false; +} + /* * WMI Driver */ @@ -2095,6 +2107,13 @@ static int asus_wmi_add(struct platform_device *pdev) if (err) goto fail_leds; + asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result); + if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) + asus->driver->wlan_ctrl_by_user = 1; + + if (asus->driver->wlan_ctrl_by_user && ashs_present()) + asus->driver->quirks->no_rfkill = 1; + if (!asus->driver->quirks->no_rfkill) { err = asus_wmi_rfkill_init(asus); if (err) @@ -2134,10 +2153,6 @@ static int asus_wmi_add(struct platform_device *pdev) if (err) goto fail_debugfs; - asus_wmi_get_devstate(asus, ASUS_WMI_DEVID_WLAN, &result); - if (result & (ASUS_WMI_DSTS_PRESENCE_BIT | ASUS_WMI_DSTS_USER_BIT)) - asus->driver->wlan_ctrl_by_user = 1; - return 0; fail_debugfs: From 9260a0409794a59079393e843511414d5f08c14b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:51 +0100 Subject: [PATCH 0373/1299] x86/platform/intel/iosf_mbi: Add a mutex for P-Unit access One some systems the P-Unit accesses the PMIC to change various voltages through the same bus as other kernel drivers use for e.g. battery monitoring. If a driver sends requests to the P-Unit which require the P-Unit to access the PMIC bus while another driver is also accessing the PMIC bus various bad things happen. This commit adds a mutex to protect the P-Unit against simultaneous accesses and 2 functions to lock / unlock this mutex. Note on these systems the i2c-bus driver will request a sempahore from the P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing it, but this does not appear to be sufficient, we still need to avoid making certain P-Unit requests during the access window to avoid problems. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241 Signed-off-by: Hans de Goede Tested-by: tagorereddy Reviewed-by: Andy Shevchenko Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-2-hdegoede@redhat.com --- arch/x86/include/asm/iosf_mbi.h | 31 ++++++++++++++++++++++++++++++ arch/x86/platform/intel/iosf_mbi.c | 13 +++++++++++++ 2 files changed, 44 insertions(+) diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index b41ee164930a..f6119d0e8cba 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -88,6 +88,33 @@ int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr); */ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask); +/** + * iosf_mbi_punit_acquire() - Acquire access to the P-Unit + * + * One some systems the P-Unit accesses the PMIC to change various voltages + * through the same bus as other kernel drivers use for e.g. battery monitoring. + * + * If a driver sends requests to the P-Unit which require the P-Unit to access + * the PMIC bus while another driver is also accessing the PMIC bus various bad + * things happen. + * + * To avoid these problems this function must be called before accessing the + * P-Unit or the PMIC, be it through iosf_mbi* functions or through other means. + * + * Note on these systems the i2c-bus driver will request a sempahore from the + * P-Unit for exclusive access to the PMIC bus when i2c drivers are accessing + * it, but this does not appear to be sufficient, we still need to avoid making + * certain P-Unit requests during the access window to avoid problems. + * + * This function locks a mutex, as such it may sleep. + */ +void iosf_mbi_punit_acquire(void); + +/** + * iosf_mbi_punit_release() - Release access to the P-Unit + */ +void iosf_mbi_punit_release(void); + #else /* CONFIG_IOSF_MBI is not enabled */ static inline bool iosf_mbi_available(void) @@ -115,6 +142,10 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) WARN(1, "IOSF_MBI driver not available"); return -EPERM; } + +static inline void iosf_mbi_punit_acquire(void) {} +static inline void iosf_mbi_punit_release(void) {} + #endif /* CONFIG_IOSF_MBI */ #endif /* IOSF_MBI_SYMS_H */ diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index edf2c54bf131..ed24fa9fb65d 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -34,6 +34,7 @@ static struct pci_dev *mbi_pdev; static DEFINE_SPINLOCK(iosf_mbi_lock); +static DEFINE_MUTEX(iosf_mbi_punit_mutex); static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset) { @@ -190,6 +191,18 @@ bool iosf_mbi_available(void) } EXPORT_SYMBOL(iosf_mbi_available); +void iosf_mbi_punit_acquire(void) +{ + mutex_lock(&iosf_mbi_punit_mutex); +} +EXPORT_SYMBOL(iosf_mbi_punit_acquire); + +void iosf_mbi_punit_release(void) +{ + mutex_unlock(&iosf_mbi_punit_mutex); +} +EXPORT_SYMBOL(iosf_mbi_punit_release); + #ifdef CONFIG_IOSF_MBI_DEBUG static u32 dbg_mdr; static u32 dbg_mcr; From 5432fcaff3cee045ddbd94f29802ee3ac4f50283 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:26 +0200 Subject: [PATCH 0374/1299] drm/i915: Store aux power domain in intel_dp The aux power domain only makes sense in the DP code. Storing it in struct intel_dp avoids some indirection. v2: Rebase Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 50 ----------------------- drivers/gpu/drm/i915/intel_dp.c | 61 +++++++++++----------------- drivers/gpu/drm/i915/intel_drv.h | 3 +- 3 files changed, 24 insertions(+), 90 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5a91f6741fed..e9fb2edb9dd7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5587,26 +5587,6 @@ static enum intel_display_power_domain port_to_power_domain(enum port port) } } -static enum intel_display_power_domain port_to_aux_power_domain(enum port port) -{ - switch (port) { - case PORT_A: - return POWER_DOMAIN_AUX_A; - case PORT_B: - return POWER_DOMAIN_AUX_B; - case PORT_C: - return POWER_DOMAIN_AUX_C; - case PORT_D: - return POWER_DOMAIN_AUX_D; - case PORT_E: - /* FIXME: Check VBT for actual wiring of PORT E */ - return POWER_DOMAIN_AUX_D; - default: - MISSING_CASE(port); - return POWER_DOMAIN_AUX_A; - } -} - enum intel_display_power_domain intel_display_port_power_domain(struct intel_encoder *intel_encoder) { @@ -5634,36 +5614,6 @@ intel_display_port_power_domain(struct intel_encoder *intel_encoder) } } -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - case INTEL_OUTPUT_HDMI: - /* - * Only DDI platforms should ever use these output types. - * We can get here after the HDMI detect code has already set - * the type of the shared encoder. Since we can't be sure - * what's the status of the given connectors, play safe and - * run the DP detection too. - */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_aux_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_aux_power_domain(intel_dig_port->port); - default: - MISSING_CASE(intel_encoder->type); - return POWER_DOMAIN_AUX_A; - } -} - static u64 get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6c661c64cff6..aa0c79087bbf 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -396,14 +396,12 @@ static void pps_lock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; /* * See vlv_power_sequencer_reset() why we need * a power domain reference here. */ - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); mutex_lock(&dev_priv->pps_mutex); } @@ -414,12 +412,10 @@ static void pps_unlock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; mutex_unlock(&dev_priv->pps_mutex); - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void @@ -2005,9 +2001,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; bool need_to_disable = !intel_dp->want_panel_vdd; @@ -2023,8 +2017,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) if (edp_have_panel_vdd(intel_dp)) return need_to_disable; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", port_name(intel_dig_port->port)); @@ -2082,8 +2075,6 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; @@ -2113,8 +2104,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void edp_panel_vdd_work(struct work_struct *__work) @@ -2227,11 +2217,8 @@ void intel_edp_panel_on(struct intel_dp *intel_dp) static void edp_panel_off(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_ctrl_reg; @@ -2263,8 +2250,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) wait_panel_off(intel_dp); /* We got a reference when we enabled the VDD. */ - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } void intel_edp_panel_off(struct intel_dp *intel_dp) @@ -4590,11 +4576,9 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = connector->dev; enum drm_connector_status status; - enum intel_display_power_domain power_domain; u8 sink_irq_vector = 0; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(to_i915(dev), power_domain); + intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain); /* Can't disconnect eDP, but you can close the lid... */ if (is_edp(intel_dp)) @@ -4695,7 +4679,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(to_i915(dev), power_domain); + intel_display_power_put(to_i915(dev), intel_dp->aux_power_domain); return status; } @@ -4723,7 +4707,6 @@ intel_dp_force(struct drm_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - enum intel_display_power_domain power_domain; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4732,12 +4715,11 @@ intel_dp_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); intel_dp_set_edid(intel_dp); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; @@ -4972,7 +4954,6 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; lockdep_assert_held(&dev_priv->pps_mutex); @@ -4986,8 +4967,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) * indefinitely. */ DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n"); - power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); edp_panel_vdd_schedule_off(intel_dp); } @@ -5061,10 +5041,8 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; enum irqreturn ret = IRQ_NONE; if (intel_dig_port->base.type != INTEL_OUTPUT_EDP && @@ -5093,8 +5071,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) return IRQ_NONE; } - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { @@ -5122,7 +5099,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) ret = IRQ_HANDLED; put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); return ret; } @@ -5913,27 +5890,35 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, return false; } +/* Set up the hotplug pin and aux power domain. */ static void intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) { struct intel_encoder *encoder = &intel_dig_port->base; + struct intel_dp *intel_dp = &intel_dig_port->dp; - /* Set up the hotplug pin. */ switch (intel_dig_port->port) { case PORT_A: encoder->hpd_pin = HPD_PORT_A; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; break; case PORT_B: encoder->hpd_pin = HPD_PORT_B; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_B; break; case PORT_C: encoder->hpd_pin = HPD_PORT_C; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_C; break; case PORT_D: encoder->hpd_pin = HPD_PORT_D; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; break; case PORT_E: encoder->hpd_pin = HPD_PORT_E; + + /* FIXME: Check VBT for actual wiring of PORT E */ + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; break; default: MISSING_CASE(intel_dig_port->port); @@ -6014,6 +5999,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; + intel_dp_init_connector_port_info(intel_dig_port); + intel_dp_aux_init(intel_dp); INIT_DELAYED_WORK(&intel_dp->panel_vdd_work, @@ -6026,8 +6013,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, else intel_connector->get_hw_state = intel_connector_get_hw_state; - intel_dp_init_connector_port_info(intel_dig_port); - /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && (port == PORT_B || port == PORT_C || port == PORT_D)) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c3c8ed726717..f2c717a42f30 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -951,6 +951,7 @@ struct intel_dp { /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; + enum intel_display_power_domain aux_power_domain; uint8_t train_set[4]; int panel_power_up_delay; int panel_power_down_delay; @@ -1421,8 +1422,6 @@ void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); enum intel_display_power_domain intel_display_port_power_domain(struct intel_encoder *intel_encoder); -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); From 79f255a0c99cf5d871161959d91bd7ba711f949f Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:27 +0200 Subject: [PATCH 0375/1299] drm/i915: Store encoder power domain in struct intel_encoder The encoder power domain is obviously tied to the encoder, so store it in struct intel_encoder. This avoids some indirection. v2: Rebase Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 21 ++++++++----------- drivers/gpu/drm/i915/intel_ddi.c | 15 +++++++------- drivers/gpu/drm/i915/intel_display.c | 31 ++-------------------------- drivers/gpu/drm/i915/intel_dp.c | 8 +++---- drivers/gpu/drm/i915/intel_dp_mst.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 4 ++-- drivers/gpu/drm/i915/intel_dsi.c | 9 ++++---- drivers/gpu/drm/i915/intel_dvo.c | 1 + drivers/gpu/drm/i915/intel_hdmi.c | 8 +++---- drivers/gpu/drm/i915/intel_lvds.c | 8 +++---- drivers/gpu/drm/i915/intel_sdvo.c | 1 + drivers/gpu/drm/i915/intel_tv.c | 1 + 12 files changed, 41 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2bf5aca6e37c..8c82607294c6 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -69,12 +69,11 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -91,7 +90,7 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -676,7 +675,6 @@ intel_crt_detect(struct drm_connector *connector, bool force) struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; enum drm_connector_status status; struct intel_load_detect_pipe tmp; struct drm_modeset_acquire_ctx ctx; @@ -689,8 +687,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired @@ -745,7 +742,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) drm_modeset_acquire_fini(&ctx); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return status; } @@ -761,12 +758,10 @@ static int intel_crt_get_modes(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; int ret; struct i2c_adapter *i2c; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); @@ -778,7 +773,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, i2c); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -904,6 +899,8 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->adpa_reg = adpa_reg; + crt->base.power_domain = POWER_DOMAIN_PORT_CRT; + crt->base.compute_config = intel_crt_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { crt->base.disable = pch_disable_crt; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e2947b7f5079..2cd1cc30b11b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1349,12 +1349,11 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) enum port port = intel_ddi_get_encoder_port(intel_encoder); enum pipe pipe = 0; enum transcoder cpu_transcoder; - enum intel_display_power_domain power_domain; uint32_t tmp; bool ret; - power_domain = intel_display_port_power_domain(intel_encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain)) return false; if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) { @@ -1396,7 +1395,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) } out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -1407,13 +1406,12 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_ddi_get_encoder_port(encoder); - enum intel_display_power_domain power_domain; u32 tmp; int i; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -1470,7 +1468,7 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, "(PHY_CTL %08x)\n", port_name(port), tmp); } - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -2237,6 +2235,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_dig_port->max_lanes = max_lanes; intel_encoder->type = INTEL_OUTPUT_UNKNOWN; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e9fb2edb9dd7..bad4a692d36f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5568,7 +5568,7 @@ static void i9xx_pfit_enable(struct intel_crtc *crtc) I915_WRITE(BCLRPAT(crtc->pipe), 0); } -static enum intel_display_power_domain port_to_power_domain(enum port port) +enum intel_display_power_domain intel_port_to_power_domain(enum port port) { switch (port) { case PORT_A: @@ -5587,33 +5587,6 @@ static enum intel_display_power_domain port_to_power_domain(enum port port) } } -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - /* Only DDI platforms should ever use this output type */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_ANALOG: - return POWER_DOMAIN_PORT_CRT; - case INTEL_OUTPUT_DSI: - return POWER_DOMAIN_PORT_DSI; - default: - return POWER_DOMAIN_PORT_OTHER; - } -} - static u64 get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -5637,7 +5610,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); - mask |= BIT_ULL(intel_display_port_power_domain(intel_encoder)); + mask |= BIT_ULL(intel_encoder->power_domain); } if (HAS_DDI(dev_priv) && crtc_state->has_audio) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index aa0c79087bbf..fd96a6cf7326 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2496,12 +2496,11 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -2537,7 +2536,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -6097,6 +6096,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->max_lanes = 4; intel_encoder->type = INTEL_OUTPUT_DP; + intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) intel_encoder->crtc_mask = 1 << 2; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 6a85d388f936..d94fd4b80963 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -548,6 +548,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum DRM_MODE_ENCODER_DPMST, "DP-MST %c", pipe_name(pipe)); intel_encoder->type = INTEL_OUTPUT_DP_MST; + intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->port; intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f2c717a42f30..82f8f828539b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -249,6 +249,7 @@ struct intel_encoder { void (*suspend)(struct intel_encoder *); int crtc_mask; enum hpd_pin hpd_pin; + enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ const struct drm_connector *audio_connector; }; @@ -1420,8 +1421,7 @@ int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder); +enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index c26fe4fc0819..d7ffb9ac3c8a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -775,14 +775,13 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum intel_display_power_domain power_domain; enum port port; bool active = false; DRM_DEBUG_KMS("\n"); - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; /* @@ -838,7 +837,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, } out_put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return active; } @@ -1516,6 +1515,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_connector->get_hw_state = intel_connector_get_hw_state; intel_encoder->port = port; + /* * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI * port C. BXT isn't limited like this. @@ -1603,6 +1603,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) } intel_encoder->type = INTEL_OUTPUT_DSI; + intel_encoder->power_domain = POWER_DOMAIN_PORT_DSI; intel_encoder->cloneable = 0; drm_connector_init(dev, connector, &intel_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 50da89dcb92b..6025839ed3b7 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -515,6 +515,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) "DVO %c", port_name(port)); intel_encoder->type = INTEL_OUTPUT_DVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 6c83442e2152..048f76d329bb 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -901,12 +901,11 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -926,7 +925,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1980,6 +1979,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, } intel_encoder->type = INTEL_OUTPUT_HDMI; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9ca4dc4d2378..8b942ef2b3ec 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -91,12 +91,11 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -114,7 +113,7 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1066,6 +1065,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_LVDS; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (HAS_PCH_SPLIT(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 2ad13903a054..816a6f5a3fd9 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2981,6 +2981,7 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, /* encoder type will be decided later */ intel_encoder = &intel_sdvo->base; intel_encoder->type = INTEL_OUTPUT_SDVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_sdvo_enc_funcs, 0, diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index eb692e4ffe01..6ed1a3ce47b7 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1621,6 +1621,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_TVOUT; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); intel_encoder->cloneable = 0; From 9a5da00b7ce0dec87fc0707c5cd17fed03e183cd Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 24 Feb 2017 16:18:45 +0200 Subject: [PATCH 0376/1299] drm/i915: Check encoder type in enc_to_dig_port() Don't allow conversion from arbitraty encoder types to a digital port. Calling enc_to_dig_port() with the wrong encoder may seem far fetched, but certain paths of the ddi code may be called with hasell's analog encoder and the conversion is wrong for DP mst encoders too, so safe guard against it. v2: Warn if encoder type is unknown and device is not DDI. (Imre) v3: Remove stray hunk from rebase error. (Ander) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170224141845.5836-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 82f8f828539b..7f35e3fa68db 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1121,7 +1121,19 @@ intel_attached_encoder(struct drm_connector *connector) static inline struct intel_digital_port * enc_to_dig_port(struct drm_encoder *encoder) { - return container_of(encoder, struct intel_digital_port, base.base); + struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + + switch (intel_encoder->type) { + case INTEL_OUTPUT_UNKNOWN: + WARN_ON(!HAS_DDI(to_i915(encoder->dev))); + case INTEL_OUTPUT_DP: + case INTEL_OUTPUT_EDP: + case INTEL_OUTPUT_HDMI: + return container_of(encoder, struct intel_digital_port, + base.base); + default: + return NULL; + } } static inline struct intel_dp_mst_encoder * From f4f4b59be52b28823642f82312e15369d783d858 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:29 +0200 Subject: [PATCH 0377/1299] drm/i915/glk: Implement WaDDIIOTimeout Implement WaDDIIOTimeout to avoid a timeout when enabling the DDI IO power domains. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-5-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_pm.c | 10 ++++++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a6fdb024b86..c2472ea762da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2787,6 +2787,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_KBL_REVID(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +#define GLK_REVID_A0 0x0 +#define GLK_REVID_A1 0x1 + +#define IS_GLK_REVID(dev_priv, since, until) \ + (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ad666933a88d..bdce90084d43 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6479,6 +6479,11 @@ enum { #define CHICKEN_PAR2_1 _MMIO(0x42090) #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) +#define CHICKEN_MISC_2 _MMIO(0x42084) +#define GLK_CL0_PWR_DOWN (1 << 10) +#define GLK_CL1_PWR_DOWN (1 << 11) +#define GLK_CL2_PWR_DOWN (1 << 12) + #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 #define HSW_FBCQ_DIS (1 << 22) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 169c4908ad5b..b38707efb620 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -114,6 +114,16 @@ static void glk_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* WaDDIIOTimeout:glk */ + if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) { + u32 val = I915_READ(CHICKEN_MISC_2); + val &= ~(GLK_CL0_PWR_DOWN | + GLK_CL1_PWR_DOWN | + GLK_CL2_PWR_DOWN); + I915_WRITE(CHICKEN_MISC_2, val); + } + } static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) From 71cc22e5db8994ff69c947a296235e8547039188 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:30 +0200 Subject: [PATCH 0378/1299] drm/i915/glk: Don't enable DDI IO power domains during init In Geminilake, the DDI IO power domains can't be enabled before a DPLL is running and mapped to the appropriate DDI. At least on Geminilake, attempting to enable those during init will lead to a timeout. The failure to enable the power domain also causes issues with the state verifier during resume from suspend. After all the init power domains are enabled, the call to intel_power_domains_sync_hw() from the resume path will cause the hw_enabled field on the respective power wells to be false while the usage count remains above zero. Further attempts to enable the power domain caused by a modeset will simply update the usage count without doing anything else. When the state verifier attempts to read the state of a DDI encoder, intel_display_power_get_if_enabled() returns false, leading to the following WARN: WARNING: CPU: 3 PID: 1743 at drivers/gpu/drm/i915/intel_display.c:7001 verify_connector_state.isra.80+0x26c/0x2b0 [i915] attached crtc is active, but connector isn't Modules linked in: i915(E) tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_broute bridge stp llc ebtable_nat ip6table_mangle ip6table_security ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_raw iptable_mangle iptable_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables x86_pkg_temp_thermal coretemp kvm_intel kvm i2c_algo_bit drm_kms_helper irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel drm shpchp tpm_tis tpm_tis_core tpm nfsd auth_rpcgss nfs_acl lockd grace sunrpc crc32c_intel serio_raw [last unloaded: i915] CPU: 3 PID: 1743 Comm: kworker/u8:22 Tainted: G W E 4.10.0-rc3ander+ #300 Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0023.B40.1611302145 11/30/2016 Workqueue: events_unbound async_run_entry_fn Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5f/0x80 verify_connector_state.isra.80+0x26c/0x2b0 [i915] intel_atomic_commit_tail+0x520/0x1000 [i915] ? remove_wait_queue+0x70/0x70 intel_atomic_commit+0x3f8/0x520 [i915] ? intel_runtime_pm_put+0x6e/0xa0 [i915] drm_atomic_commit+0x4b/0x50 [drm] __intel_display_resume+0x72/0xc0 [i915] intel_display_resume+0x107/0x150 [i915] i915_drm_resume+0xe0/0x180 [i915] i915_pm_restore+0x1e/0x30 [i915] i915_pm_resume+0xe/0x10 [i915] pci_pm_resume+0x64/0xa0 dpm_run_callback+0xa1/0x2a0 ? pci_pm_thaw+0x90/0x90 device_resume+0xe3/0x200 async_resume+0x1d/0x50 async_run_entry_fn+0x39/0x170 process_one_work+0x212/0x670 ? process_one_work+0x197/0x670 worker_thread+0x4e/0x490 kthread+0x101/0x140 ? process_one_work+0x670/0x670 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x2a/0x40 Cc: David Weinehall Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-6-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 6b52258152b7..514ef56f562d 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -452,14 +452,11 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT_ULL(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES)) #define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT_ULL(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES)) #define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT_ULL(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ From 62b695662a2413286a25df418b0af665bf2899c5 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 24 Feb 2017 16:19:59 +0200 Subject: [PATCH 0379/1299] drm/i915: Only enable DDI IO power domains after enabling DPLL According to bspec, the DDI IO power domains should be enabled after enabling the DPLL and mapping it to the DDI. The current order doesn't seem to create problems with Skylake and Kabylake, but causes enable timeouts in Geminilake. v2: Rebase. - Take power domain references before sanitizing encoders. (Imre) - Add comment to get_encoder_power_domains() defition. (Ander) v3: Don't put the domain if called with HSW/BDW's analog encoder. (CI) v4: Put IO power domain before unmapping DPLL. (Imre) - Change return type of intel_ddi_get_power_domains() to u64. (Imre) Cc: David Weinehall Cc: Imre Deak Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: David Weinehall # v1 Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170224141959.5955-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/intel_ddi.c | 49 ++++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 20 ++++++++ drivers/gpu/drm/i915/intel_drv.h | 4 ++ drivers/gpu/drm/i915/intel_runtime_pm.c | 68 ++++++++++++++----------- 5 files changed, 117 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c2472ea762da..933874ddea75 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -344,6 +344,11 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_A_IO, + POWER_DOMAIN_PORT_DDI_B_IO, + POWER_DOMAIN_PORT_DDI_C_IO, + POWER_DOMAIN_PORT_DDI_D_IO, + POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 2cd1cc30b11b..a7c08d7027fa 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1473,6 +1473,17 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, return ret; } +static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) +{ + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum pipe pipe; + + if (intel_ddi_get_hw_state(encoder, &pipe)) + return BIT_ULL(dig_port->ddi_io_power_domain); + + return 0; +} + void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) { struct drm_crtc *crtc = &intel_crtc->base; @@ -1703,6 +1714,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_set_link_params(intel_dp, link_rate, lane_count, link_mst); @@ -1710,6 +1722,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); @@ -1729,9 +1744,13 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct drm_encoder *drm_encoder = &encoder->base; enum port port = intel_ddi_get_encoder_port(encoder); int level = intel_ddi_hdmi_level(dev_priv, port); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_hdmi_ddi_buffers(encoder); if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); @@ -1775,6 +1794,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); int type = intel_encoder->type; uint32_t val; bool wait = false; @@ -1803,6 +1823,9 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, intel_edp_panel_off(intel_dp); } + if (dig_port) + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); @@ -2211,12 +2234,38 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; + intel_encoder->get_power_domains = intel_ddi_get_power_domains; intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + switch (port) { + case PORT_A: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_A_IO; + break; + case PORT_B: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_B_IO; + break; + case PORT_C: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_C_IO; + break; + case PORT_D: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_D_IO; + break; + case PORT_E: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_E_IO; + break; + default: + MISSING_CASE(port); + } + /* * Bspec says that DDI_A_4_LANES is the only supported configuration * for Broxton. Yet some BIOS fail to set this bit on port A if eDP diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bad4a692d36f..93371c2377b2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15412,6 +15412,24 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } } +static void +get_encoder_power_domains(struct drm_i915_private *dev_priv) +{ + struct intel_encoder *encoder; + + for_each_intel_encoder(&dev_priv->drm, encoder) { + u64 get_domains; + enum intel_display_power_domain domain; + + if (!encoder->get_power_domains) + continue; + + get_domains = encoder->get_power_domains(encoder); + for_each_power_domain(domain, get_domains) + intel_display_power_get(dev_priv, domain); + } +} + /* Scan out the current hw modeset state, * and sanitizes it to the current state */ @@ -15427,6 +15445,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ + get_encoder_power_domains(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7f35e3fa68db..741beba0c9c0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -241,6 +241,9 @@ struct intel_encoder { * be set correctly before calling this function. */ void (*get_config)(struct intel_encoder *, struct intel_crtc_state *pipe_config); + /* Returns a mask of power domains that need to be referenced as part + * of the hardware state readout code. */ + u64 (*get_power_domains)(struct intel_encoder *encoder); /* * Called during system suspend after all pending requests for the * encoder are flushed (for example for DP AUX transactions) and @@ -1027,6 +1030,7 @@ struct intel_digital_port { enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); bool release_cl2_override; uint8_t max_lanes; + enum intel_display_power_domain ddi_io_power_domain; }; struct intel_dp_mst_encoder { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 514ef56f562d..012bc358a33a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -93,6 +93,16 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: return "PORT_DDI_E_LANES"; + case POWER_DOMAIN_PORT_DDI_A_IO: + return "PORT_DDI_A_IO"; + case POWER_DOMAIN_PORT_DDI_B_IO: + return "PORT_DDI_B_IO"; + case POWER_DOMAIN_PORT_DDI_C_IO: + return "PORT_DDI_C_IO"; + case POWER_DOMAIN_PORT_DDI_D_IO: + return "PORT_DDI_D_IO"; + case POWER_DOMAIN_PORT_DDI_E_IO: + return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -385,18 +395,18 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ +#define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ +#define SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ +#define SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_D_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ +#define SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ @@ -451,12 +461,12 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES)) -#define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES)) -#define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES)) +#define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO)) +#define GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO)) +#define GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ @@ -2114,26 +2124,26 @@ static struct i915_power_well skl_power_wells[] = { .id = SKL_DISP_PW_2, }, { - .name = "DDI A/E power well", - .domains = SKL_DISPLAY_DDI_A_E_POWER_DOMAINS, + .name = "DDI A/E IO power well", + .domains = SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_A_E, }, { - .name = "DDI B power well", - .domains = SKL_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = SKL_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, { - .name = "DDI D power well", - .domains = SKL_DISPLAY_DDI_D_POWER_DOMAINS, + .name = "DDI D IO power well", + .domains = SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_D, }, @@ -2246,20 +2256,20 @@ static struct i915_power_well glk_power_wells[] = { .id = GLK_DISP_PW_AUX_C, }, { - .name = "DDI A power well", - .domains = GLK_DISPLAY_DDI_A_POWER_DOMAINS, + .name = "DDI A IO power well", + .domains = GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = GLK_DISP_PW_DDI_A, }, { - .name = "DDI B power well", - .domains = GLK_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = GLK_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, From ca7a45ba6fb9e7ceca56d10b91db29c2f3451a2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Winiarski?= Date: Mon, 27 Feb 2017 12:22:56 +0100 Subject: [PATCH 0380/1299] drm/i915/skl: Add missing SKL ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used by production device: Intel(R) Iris(TM) Graphics P555 Cc: Cc: Mika Kuoppala Cc: Rodrigo Vivi Signed-off-by: Michał Winiarski Reviewed-by: Rodrigo Vivi Reviewed-by: Mika Kuoppala Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227112256.20060-1-michal.winiarski@intel.com --- include/drm/i915_pciids.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index a1dd21d6b723..466c71592a6f 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -265,7 +265,8 @@ INTEL_VGA_DEVICE(0x1923, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x1927, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x192B, info) /* Halo GT3 */ \ + INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ + INTEL_VGA_DEVICE(0x192D, info) /* SRV GT3 */ #define INTEL_SKL_GT4_IDS(info) \ INTEL_VGA_DEVICE(0x1932, info), /* DT GT4 */ \ From 69060d96440b83da151989d83fd180eb05d84780 Mon Sep 17 00:00:00 2001 From: Kelvin Gardiner Date: Fri, 24 Feb 2017 11:15:24 -0800 Subject: [PATCH 0381/1299] drm/i915/bdw: Do not write the replay bit of the ring mode register The replay bit of the ring mode register is not a valid bit for Gen8+. Do not write to this bit. Signed-off-by: Kelvin Gardiner Cc: Joonas Lahtinen Cc: Ceraolo Spurio, Daniele Reviewed-by: Daniele Ceraolo Spurio [Joonas: Fixed commit message line to be under 72 chars] Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1487963724-4824-1-git-send-email-kelvin.gardiner@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1c6c71673bfa..f9a8545474bc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1178,7 +1178,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), engine->status_page.ggtt_offset); From bf75d59eff679d2e2b7af5c6958a088f8a458f7a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:52 +0000 Subject: [PATCH 0382/1299] drm/i915: Only unwind the local pgtable layer if empty Only if we allocated the layer and the lower level failed should we remove this layer when unwinding. Otherwise we ignore the overlapping entries by overwriting the old layer with scratch. Fixes: c5d092a4293f ("drm/i915: Remove bitmap tracking for used-pml4") Fixes: e2b763caa6eb ("drm/i915: Remove bitmap tracking for used-pdpes") Reported-by: Matthew Auld Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99947 Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Tested-by: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 37 ++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6fdbd5ae4fcb..c3a121ab8914 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -716,10 +716,13 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { + GEM_BUG_ON(pt == vm->scratch_pt); + if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) continue; gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + GEM_BUG_ON(!pd->used_pdes); pd->used_pdes--; free_pt(vm, pt); @@ -755,10 +758,13 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + GEM_BUG_ON(pd == vm->scratch_pd); + if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) continue; gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); pdp->used_pdpes--; free_pd(vm, pd); @@ -801,6 +807,8 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { + GEM_BUG_ON(pdp == vm->scratch_pdp); + if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) continue; @@ -1089,6 +1097,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, gen8_ppgtt_set_pde(vm, pd, pt, pde); pd->used_pdes++; + GEM_BUG_ON(pd->used_pdes > I915_PDES); } pt->used_ptes += gen8_pte_count(start, length); @@ -1118,21 +1127,25 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); pdp->used_pdpes++; + GEM_BUG_ON(pdp->used_pdpes > I915_PDPES_PER_PDP(vm)); mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); - if (unlikely(ret)) { - gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - pdp->used_pdpes--; - free_pd(vm, pd); - goto unwind; - } + if (unlikely(ret)) + goto unwind_pd; } return 0; +unwind_pd: + if (!pd->used_pdes) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); + pdp->used_pdpes--; + free_pd(vm, pd); + } unwind: gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); return -ENOMEM; @@ -1166,15 +1179,17 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, } ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); - if (unlikely(ret)) { - gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - free_pdp(vm, pdp); - goto unwind; - } + if (unlikely(ret)) + goto unwind_pdp; } return 0; +unwind_pdp: + if (!pdp->used_pdpes) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); + } unwind: gen8_ppgtt_clear_4lvl(vm, from, start - from); return -ENOMEM; From 2f7399af94a2d6f6459373969d266708e6a7fc38 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:53 +0000 Subject: [PATCH 0383/1299] drm/i915: Unwind vma->pages allocation upon failure If we fail to allocate the ppgtt range after allocating the pages for the vma, we should unwind the local allocation before reporting back the failure. Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c3a121ab8914..875a48b9d05a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2312,7 +2312,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, vma->node.start, vma->node.size); if (ret) - return ret; + goto err_pages; } appgtt->base.insert_entries(&appgtt->base, @@ -2329,6 +2329,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } return 0; + +err_pages: + if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { + if (vma->pages != vma->obj->mm.pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + } + return ret; } static void aliasing_gtt_unbind_vma(struct i915_vma *vma) From 31c7effa39f21f0fea1b3250ae9ff32b9c7e1ae5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:54 +0000 Subject: [PATCH 0384/1299] drm/i915: Remove the vma from the drm_mm if binding fails As we track whether a vma has been inserted into the drm_mm using the vma->flags, if we fail to bind the vma into the GTT we do not update those bits and will attempt to reinsert the vma into the drm_mm on future passes. To prevent that, we want to unwind i915_vma_insert() if we fail in our attempt to bind. Fixes: 59bfa1248e22 ("drm/i915: Start passing around i915_vma from execbuffer") Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.9+ Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 57 +++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 6e9eade304b8..1aba47024656 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -509,10 +509,36 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return ret; } +static void +i915_vma_remove(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. + */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); +} + int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - unsigned int bound = vma->flags; + const unsigned int bound = vma->flags; int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -521,18 +547,18 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { ret = -EBUSY; - goto err; + goto err_unpin; } if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - goto err; + goto err_unpin; } ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - goto err; + goto err_remove; if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); @@ -541,7 +567,12 @@ int __i915_vma_do_pin(struct i915_vma *vma, GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; -err: +err_remove: + if ((bound & I915_VMA_BIND_MASK) == 0) { + GEM_BUG_ON(vma->pages); + i915_vma_remove(vma); + } +err_unpin: __i915_vma_unpin(vma); return ret; } @@ -654,9 +685,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->pages != obj->mm.pages) { GEM_BUG_ON(!vma->pages); sg_free_table(vma->pages); @@ -664,18 +692,7 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->pages = NULL; - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + i915_vma_remove(vma); destroy: if (unlikely(i915_vma_is_closed(vma))) From d825adb48cf9bf9e3f5cb1d927e2827f8c2abee4 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 26 Feb 2017 16:15:21 +0900 Subject: [PATCH 0385/1299] xenbus: Remove duplicate inclusion of linux/init.h This patch remove duplicate inclusion of linux/init.h in xenbus_dev_frontend.c. Confirm successfully compile after remove the line. Signed-off-by: Masanari Iida Reviewed-by: Juergen Gross --- drivers/xen/xenbus/xenbus_dev_frontend.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 4d343eed08f5..1f4733b80c87 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include From 6067a27d1f0184596d51decbac1c1fdc4acb012f Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 15 Feb 2017 15:52:59 +0200 Subject: [PATCH 0386/1299] drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain Baytrails, namely the 4 cpu core variants, have been plaqued by spurious system hangs, mostly occurring with light loads. Multiple bisects by various people point to a commit which changes the reclocking strategy for Baytrail to follow its bigger brethen: commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail") There is also a review comment attached to this commit from Deepak S on avoiding punit access on Cherryview and thus it was excluded on common reclocking path. By taking the same approach and omitting the punit access by not tweaking the thresholds when the hardware has been asked to move into different frequency, considerable gains in stability have been observed. With J1900 box, light render/video load would end up in system hang in usually less than 12 hours. With this patch applied, the cumulative uptime has now been 34 days without issues. To provoke system hang, light loads on both render and bsd engines in parallel have been used: glxgears >/dev/null 2>/dev/null & mpv --vo=vaapi --hwdec=vaapi --loop=inf vid.mp4 So far, author has not witnessed system hang with above load and this patch applied. Reports from the tenacious people at kernel bugzilla are also promising. Considering that the punit access frequency with this patch is considerably less, there is a possibility that this will push the, still unknown, root cause past the triggering point on most loads. But as we now can reliably reproduce the hang independently, we can reduce the pain that users are having and use a static thresholds until a root cause is found. v3: don't break debugfs and simplification (Chris Wilson) References: https://bugzilla.kernel.org/show_bug.cgi?id=109051 Cc: Chris Wilson Cc: Ville Syrjälä Cc: Len Brown Cc: Daniel Vetter Cc: Jani Nikula Cc: fritsch@xbmc.org Cc: miku@iki.fi Cc: Ezequiel Garcia CC: Michal Feix Cc: Hans de Goede Cc: Deepak S Cc: Jarkko Nikula Cc: # v4.2+ Acked-by: Daniel Vetter Acked-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1487166779-26945-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b38707efb620..ac0cd82f61e5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4879,6 +4879,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) break; } + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + I915_WRITE(GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, @@ -4899,6 +4905,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); +skip_hw_write: dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; dev_priv->rps.down_threshold = threshold_down; From 20fe17aa52dcafd3bdeb9e37281d027b0c1c5a15 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 13:59:11 +0000 Subject: [PATCH 0387/1299] drm/i915: Remove redundant TLB invalidate on switching contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are required to reload the TLBs around context switches (MI_SET_CONTEXT specifically) and the recommendation is do that before the MI_SET_CONTEXT so that it is serialised with the switch and not forgotten: [DevSNB] If Flush TLB invalidation Mode is enabled it’s the driver’s responsibility to invalidate the TLBs at least once after the previous context switch after any GTT mappings changed (including new GTT entries). This can be done by a pipeline PIPE_CONTROL with TLB inv bit set immediately before MI_SET_CONTEXT. However, we already do an unconditional TLB invalidate before every batch so this condition is satifisfied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227135913.8056-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 99c46f4dbde6..521e6f4705b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -607,17 +607,6 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) 0; int len; - /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB - * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value - * explicitly, so we rely on the value at ring init, stored in - * itlb_before_ctx_switch. - */ - if (IS_GEN6(dev_priv)) { - int ret = engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) - return ret; - } - /* These flags are for resource streamer on HSW+ */ if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); From 12946eceeb97a36a7087995646e077e9872ac99b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 13:59:12 +0000 Subject: [PATCH 0388/1299] drm/i915: Remove redundant TLB invalidate on switching ppgtt We are required to reload the TLBs around ppgtt switches. However, we already do an unconditional TLB invalidate before every batch and a flush afterwards, so this condition is already satisfied without extra flushes around the LRI instructions. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227135913.8056-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 875a48b9d05a..e0c9542a90c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1464,13 +1464,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, { struct intel_engine_cs *engine = req->engine; u32 *cs; - int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - cs = intel_ring_begin(req, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1491,13 +1486,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, { struct intel_engine_cs *engine = req->engine; u32 *cs; - int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - cs = intel_ring_begin(req, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1510,13 +1500,6 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = MI_NOOP; intel_ring_advance(req, cs); - /* XXX: RCS is the only one to auto invalidate the TLBs? */ - if (engine->id != RCS) { - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - } - return 0; } From afeddf50811253ee512fb9192cfb443891929776 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 13:59:13 +0000 Subject: [PATCH 0389/1299] drm/i915: Reduce context alignment No hardware was ever shipped that needed more than 4096 byte alignment and future hardware will not use this legacy path. So reduce the alignment to make it easier and quicker to launch workloads. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227135913.8056-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 17 ----------------- drivers/gpu/drm/i915/i915_gem_context.h | 2 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++- 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 521e6f4705b1..baceca14f5e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,21 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -/* This is a HW constraint. The value below is the largest known requirement - * I've seen in a spec to date, and that was a workaround for a non-shipping - * part. It should be safe to decrease this, but it's more future proof as is. - */ -#define GEN6_CONTEXT_ALIGN (64<<10) -#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT - -static size_t get_context_alignment(struct drm_i915_private *dev_priv) -{ - if (IS_GEN6(dev_priv)) - return GEN6_CONTEXT_ALIGN; - - return GEN7_CONTEXT_ALIGN; -} - static int get_context_size(struct drm_i915_private *dev_priv) { int ret; @@ -281,8 +266,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - ctx->ggtt_alignment = get_context_alignment(dev_priv); - if (dev_priv->hw_context_size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 0ac750b90f3d..81268c9770a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -140,8 +140,6 @@ struct i915_gem_context { */ int priority; - /** ggtt_alignment: alignment restriction for context objects */ - u32 ggtt_alignment; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f62afffef682..4a864f8c9387 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1431,7 +1431,8 @@ static int context_pin(struct i915_gem_context *ctx) return ret; } - return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | PIN_HIGH); + return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT, + PIN_GLOBAL | PIN_HIGH); } static int intel_ring_context_pin(struct intel_engine_cs *engine, From 3a150df945b7408c27cad2c01a1638e8b14ac562 Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Wed, 22 Feb 2017 08:29:26 +0800 Subject: [PATCH 0390/1299] tracing: Fix code comment for ftrace_ops_get_func() There is no function 'ftrace_ops_recurs_func' existing in the current code, it was renamed to ftrace_ops_assist_func() in commit c68c0fa29341 ("ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too"). Update the comment to the correct function name. Link: http://lkml.kernel.org/r/1487723366-14463-1-git-send-email-chuhu@redhat.com Signed-off-by: Chunyu Hu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0c0609326391..fd84f2e30b6d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5487,7 +5487,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, * Normally the mcount trampoline will call the ops->func, but there * are times that it should not. For example, if the ops does not * have its own recursion protection, then it should call the - * ftrace_ops_recurs_func() instead. + * ftrace_ops_assist_func() instead. * * Returns the function that the trampoline should call for @ops. */ From b0734f77b3d1ae00603bf478611662d5bf6c9b54 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 14:10:20 +0000 Subject: [PATCH 0391/1299] drm/i915: Distinguish between timeout and error in sideband transactions After initiating a sideband transaction, we only want to wait for the transaction to become idle. If, as we are, we wait for both the busy and error flag to clear, if an error is raised we just spin until the timeout. Once the hw is idle, we can then check to see if the hw flagged an error, and report it distinctly. Signed-off-by: Chris Wilson Cc: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170223141020.13250-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sideband.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 9f782b5eb6e6..7d971cb56116 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -216,6 +216,7 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, } I915_WRITE(SBI_ADDR, (reg << 16)); + I915_WRITE(SBI_DATA, 0); if (destination == SBI_ICLK) value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD; @@ -225,10 +226,15 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete read transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete read\n"); + return 0; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI read of reg %x\n", reg); return 0; } @@ -260,10 +266,16 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete write transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete write\n"); + return; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI write of %x to reg %x\n", + value, reg); return; } } From 8d769ea7bc16c34c9dc5143be021e943014c4cd1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:47 +0000 Subject: [PATCH 0392/1299] drm/i915: Report both waiters and success from intel_engine_wakeup() The two users of the return value from intel_engine_wakeup() are expecting different results. In the breadcrumbs hangcheck, we are using it to determine whether wake_up_process() detected the waiter was currently running (and if so we presume that it hasn't yet missed the interrupt). However, in the fake_irq path, we are using the return value as a check as to whether there are any waiters, and so we may incorrectly stop the fake-irq if that waiter was currently running. To handle the two different needs, return both bits of information! We uninline it from the irq path in preparation for the next patch which makes the irq hotpath special and relegates intel_engine_wakeup() to the slow fixup paths. v2: s/ret/result/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 28 +++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 26 +++------------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 027c93e34c97..c8361f350350 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,32 @@ #include "i915_drv.h" +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +{ + unsigned int result = 0; + + /* Note that for this not to dangerously chase a dangling pointer, + * we must hold the rcu_read_lock here. + * + * Also note that tsk is likely to be in !TASK_RUNNING state so an + * early test for tsk->state != TASK_RUNNING before wake_up_process() + * is unlikely to be beneficial. + */ + if (intel_engine_has_waiter(engine)) { + struct task_struct *tsk; + + result = ENGINE_WAKEUP_WAITER; + + rcu_read_lock(); + tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); + if (tsk && !wake_up_process(tsk)) + result |= ENGINE_WAKEUP_ACTIVE; + rcu_read_unlock(); + } + + return result; +} + static unsigned long wait_timeout(void) { return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); @@ -49,7 +75,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) * to process the pending interrupt (e.g, low priority task on a loaded * system) and wait until it sleeps before declaring a missed interrupt. */ - if (!intel_engine_wakeup(engine)) { + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ACTIVE) { mod_timer(&b->hangcheck, wait_timeout()); return; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0f29e07a9581..7d753dc1b89d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -642,29 +642,9 @@ static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); } -static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) -{ - bool wakeup = false; - - /* Note that for this not to dangerously chase a dangling pointer, - * we must hold the rcu_read_lock here. - * - * Also note that tsk is likely to be in !TASK_RUNNING state so an - * early test for tsk->state != TASK_RUNNING before wake_up_process() - * is unlikely to be beneficial. - */ - if (intel_engine_has_waiter(engine)) { - struct task_struct *tsk; - - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk) - wakeup = wake_up_process(tsk); - rcu_read_unlock(); - } - - return wakeup; -} +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); +#define ENGINE_WAKEUP_WAITER BIT(0) +#define ENGINE_WAKEUP_ACTIVE BIT(1) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); From 56299fb7d9047cc1d25362827073b2ac0984ed21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:48 +0000 Subject: [PATCH 0393/1299] drm/i915: Signal first fence from irq handler if complete As execlists and other non-semaphore multi-engine devices coordinate between engines using interrupts, we can shave off a few 10s of microsecond of scheduling latency by doing the fence signaling from the interrupt as opposed to a RT kthread. (Realistically the delay adds about 1% to an individual cross-engine workload.) We only signal the first fence in order to limit the amount of work we move into the interrupt handler. We also have to remember that our breadcrumbs may be unordered with respect to the interrupt and so we still require the waiter process to perform some heavyweight coherency fixups, as well as traversing the tree of waiters. v2: No need for early exit in irq handler - it breaks the flow between patches and prevents the tracepoint v3: Restore rcu hold across irq signaling of request Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 13 ++++----- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.h | 2 ++ drivers/gpu/drm/i915/i915_irq.c | 36 ++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_breadcrumbs.c | 32 ++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +++--- 6 files changed, 54 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 933874ddea75..71152656ebbf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4060,9 +4060,9 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * is woken. */ if (engine->irq_seqno_barrier && - rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { - struct task_struct *tsk; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; /* The ordering of irq_posted versus applying the barrier * is crucial. The clearing of the current irq_posted must @@ -4084,17 +4084,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the seqno before we believe it coherent since they see * irq_posted == false but we are still running). */ - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk && tsk != current) + spin_lock_irqsave(&b->lock, flags); + if (b->first_wait && b->first_wait->tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will * be woken by whomever made the change. We only have * to worry about when we steal the irq-posted for * ourself. */ - wake_up_process(tsk); - rcu_read_unlock(); + wake_up_process(b->first_wait->tsk); + spin_unlock_irqrestore(&b->lock, flags); if (__i915_gem_request_completed(req, seqno)) return true; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index fbfeb5f8d069..77c3ee7a3fd0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1083,7 +1083,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - intel_wait_init(&wait); + intel_wait_init(&wait, req); restart: do { diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 5f73d8c0a38a..0efee879df23 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -32,10 +32,12 @@ struct drm_file; struct drm_i915_gem_object; +struct drm_i915_gem_request; struct intel_wait { struct rb_node node; struct task_struct *tsk; + struct drm_i915_gem_request *request; u32 seqno; }; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 312d30e96dc2..e06e6eb99801 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,12 +1033,42 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - bool waiters; + struct drm_i915_gem_request *rq = NULL; + struct intel_wait *wait; atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - waiters = intel_engine_wakeup(engine); - trace_intel_engine_notify(engine, waiters); + + rcu_read_lock(); + + spin_lock(&engine->breadcrumbs.lock); + wait = engine->breadcrumbs.first_wait; + if (wait) { + /* We use a callback from the dma-fence to submit + * requests after waiting on our own requests. To + * ensure minimum delay in queuing the next request to + * hardware, signal the fence now rather than wait for + * the signaler to be woken up. We still wake up the + * waiter in order to handle the irq-seqno coherency + * issues (we may receive the interrupt before the + * seqno is written, see __i915_request_irq_complete()) + * and to handle coalescing of multiple seqno updates + * and many waiters. + */ + if (i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno)) + rq = wait->request; + + wake_up_process(wait->tsk); + } + spin_unlock(&engine->breadcrumbs.lock); + + if (rq) + dma_fence_signal(&rq->fence); + + rcu_read_unlock(); + + trace_intel_engine_notify(engine, wait); } static void vlv_c0_read(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index c8361f350350..e7f4a4c923a2 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -28,26 +28,18 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) { + struct intel_wait *wait; + unsigned long flags; unsigned int result = 0; - /* Note that for this not to dangerously chase a dangling pointer, - * we must hold the rcu_read_lock here. - * - * Also note that tsk is likely to be in !TASK_RUNNING state so an - * early test for tsk->state != TASK_RUNNING before wake_up_process() - * is unlikely to be beneficial. - */ - if (intel_engine_has_waiter(engine)) { - struct task_struct *tsk; - + spin_lock_irqsave(&engine->breadcrumbs.lock, flags); + wait = engine->breadcrumbs.first_wait; + if (wait) { result = ENGINE_WAKEUP_WAITER; - - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk && !wake_up_process(tsk)) + if (!wake_up_process(wait->tsk)) result |= ENGINE_WAKEUP_ACTIVE; - rcu_read_unlock(); } + spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags); return result; } @@ -301,7 +293,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } rb_link_node(&wait->node, parent, p); rb_insert_color(&wait->node, &b->waiters); - GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh)); if (completed) { struct rb_node *next = rb_next(completed); @@ -310,7 +301,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (next && next != &wait->node) { GEM_BUG_ON(first); b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current * seqno, processing the completed tasks and selecting * the next waiter, we may have missed the interrupt @@ -337,7 +327,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); b->first_wait = wait; - rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -348,7 +337,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, */ __intel_breadcrumbs_enable_irq(b); } - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh)); GEM_BUG_ON(!b->first_wait); GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); @@ -396,8 +384,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; - GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk); - /* We are the current bottom-half. Find the next candidate, * the first waiter in the queue on the remaining oldest * request. As multiple seqnos may complete in the time it @@ -439,13 +425,11 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, * exception rather than a seqno completion. */ b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) __intel_breadcrumbs_enable_irq(b); wake_up_process(b->first_wait->tsk); } else { b->first_wait = NULL; - rcu_assign_pointer(b->irq_seqno_bh, NULL); __intel_breadcrumbs_disable_irq(b); } } else { @@ -459,7 +443,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); } void intel_engine_remove_wait(struct intel_engine_cs *engine, @@ -621,6 +604,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) return; request->signaling.wait.tsk = b->signaler; + request->signaling.wait.request = request; request->signaling.wait.seqno = seqno; i915_gem_request_get(request); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7d753dc1b89d..974ec11d2b1d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -235,8 +235,6 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ - spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ @@ -582,9 +580,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait) +static inline void intel_wait_init(struct intel_wait *wait, + struct drm_i915_gem_request *rq) { wait->tsk = current; + wait->request = rq; } static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) @@ -639,7 +639,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); + return READ_ONCE(engine->breadcrumbs.first_wait); } unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); From 19d0a5727123509f0fd8b84551d4715e86937df9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:49 +0000 Subject: [PATCH 0394/1299] drm/i915: Defer enabling hangcheck to the first fake breadcrumb interrupt By deferring hangcheck to the fake breadcrumb interrupt, we can simply the enabling procedure slightly - as by enabling the fake, we then enable the hangcheck. By always enabling the hangcheck from each fake interrupt (it will be a no-op for an already queued hangcheck), it will make restoring the breadcrumbs after a reset simpler in the next patch. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 36 ++++++++++++------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index e7f4a4c923a2..3855c8c39b35 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -75,17 +75,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); - - /* Ensure that even if the GPU hangs, we get woken up. - * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! To - * prevent this, we also queue the hangcheck from the retire - * worker. - */ - i915_queue_hangcheck(engine->i915); } static void intel_breadcrumbs_fake_irq(unsigned long data) @@ -99,8 +88,21 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * every jiffie in order to kick the oldest waiter to do the * coherent seqno check. */ - if (intel_engine_wakeup(engine)) - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + if (!intel_engine_wakeup(engine)) + return; + + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + + /* Ensure that even if the GPU hangs, we get woken up. + * + * However, note that if no one is waiting, we never notice + * a gpu hang. Eventually, we will have to wait for a resource + * held by the GPU and so trigger a hangcheck. In the most + * pathological case, this will be upon memory starvation! To + * prevent this, we also queue the hangcheck from the retire + * worker. + */ + i915_queue_hangcheck(engine->i915); } static void irq_enable(struct intel_engine_cs *engine) @@ -179,13 +181,11 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) b->irq_enabled = true; } - if (!b->irq_enabled || use_fake_irq(b)) { + /* Ensure we never sleep indefinitely */ + if (!b->irq_enabled || use_fake_irq(b)) mod_timer(&b->fake_irq, jiffies + 1); - i915_queue_hangcheck(i915); - } else { - /* Ensure we never sleep indefinitely */ + else mod_timer(&b->hangcheck, wait_timeout()); - } } static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) From 67b807a892302d184f6987b0c3cc2dc2505e4a2d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:50 +0000 Subject: [PATCH 0395/1299] drm/i915: Delay disabling the user interrupt for breadcrumbs A significant cost in setting up a wait is the overhead of enabling the interrupt. As we disable the interrupt whenever the queue of waiters is empty, if we are frequently waiting on alternating batches, we end up re-enabling the interrupt on a frequent basis. We do want to disable the interrupt during normal operations as under high load it may add several thousand interrupts/s - we have been known in the past to occupy whole cores with our interrupt handler after accidentally leaving user interrupts enabled. As a compromise, leave the interrupt enabled until the next IRQ, or the system is idle. This gives a small window for a waiter to keep the interrupt active and not be delayed by having to re-enable the interrupt. v2: Restore hangcheck/missed-irq detection for continuations v3: Be more careful restoring the hangcheck timer after reset v4: Be more careful restoring the fake irq after reset (if required!) v5: Redo changes to intel_engine_wakeup() v6: Factor out __intel_engine_wakeup() v7: Improve commentary for declaring a missed wakeup Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 4 +- drivers/gpu/drm/i915/i915_irq.c | 2 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 176 +++++++++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +- 4 files changed, 125 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 01dbba3813c7..0ad080984877 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2988,8 +2988,10 @@ i915_gem_idle_work_handler(struct work_struct *work) if (wait_for(intel_execlists_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + intel_engine_disarm_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + } GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e06e6eb99801..19892854707f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1060,6 +1060,8 @@ static void notify_ring(struct intel_engine_cs *engine) rq = wait->request; wake_up_process(wait->tsk); + } else { + __intel_engine_disarm_breadcrumbs(engine); } spin_unlock(&engine->breadcrumbs.lock); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 3855c8c39b35..7c7867d65a39 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,20 +26,30 @@ #include "i915_drv.h" -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) { struct intel_wait *wait; - unsigned long flags; unsigned int result = 0; - spin_lock_irqsave(&engine->breadcrumbs.lock, flags); - wait = engine->breadcrumbs.first_wait; + wait = b->first_wait; if (wait) { result = ENGINE_WAKEUP_WAITER; - if (!wake_up_process(wait->tsk)) - result |= ENGINE_WAKEUP_ACTIVE; + if (wake_up_process(wait->tsk)) + result |= ENGINE_WAKEUP_ASLEEP; } - spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags); + + return result; +} + +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + unsigned int result; + + spin_lock_irqsave(&b->lock, flags); + result = __intel_breadcrumbs_wakeup(b); + spin_unlock_irqrestore(&b->lock, flags); return result; } @@ -54,7 +64,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) struct intel_engine_cs *engine = (struct intel_engine_cs *)data; struct intel_breadcrumbs *b = &engine->breadcrumbs; - if (!b->irq_enabled) + if (!b->irq_armed) return; if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { @@ -63,23 +73,32 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) return; } - /* If the waiter was currently running, assume it hasn't had a chance + /* We keep the hangcheck time alive until we disarm the irq, even + * if there are no waiters at present. + * + * If the waiter was currently running, assume it hasn't had a chance * to process the pending interrupt (e.g, low priority task on a loaded * system) and wait until it sleeps before declaring a missed interrupt. + * + * If the waiter was asleep (and not even pending a wakeup), then we + * must have missed an interrupt as the GPU has stopped advancing + * but we still have a waiter. Assuming all batches complete within + * DRM_I915_HANGCHECK_JIFFIES [1.5s]! */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ACTIVE) { + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { + DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + } else { mod_timer(&b->hangcheck, wait_timeout()); - return; } - - DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); } static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; /* * The timer persists in case we cannot enable interrupts, @@ -88,10 +107,15 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * every jiffie in order to kick the oldest waiter to do the * coherent seqno check. */ - if (!intel_engine_wakeup(engine)) + + spin_lock_irqsave(&b->lock, flags); + if (!__intel_breadcrumbs_wakeup(b)) + __intel_engine_disarm_breadcrumbs(engine); + spin_unlock_irqrestore(&b->lock, flags); + if (!b->irq_armed) return; - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + mod_timer(&b->fake_irq, jiffies + 1); /* Ensure that even if the GPU hangs, we get woken up. * @@ -127,6 +151,42 @@ static void irq_disable(struct intel_engine_cs *engine) spin_unlock(&engine->i915->irq_lock); } +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + assert_spin_locked(&b->lock); + + if (b->irq_enabled) { + irq_disable(engine); + b->irq_enabled = false; + } + + b->irq_armed = false; +} + +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + + if (!b->irq_armed) + return; + + spin_lock_irqsave(&b->lock, flags); + + /* We only disarm the irq when we are idle (all requests completed), + * so if there remains a sleeping waiter, it missed the request + * completion. + */ + if (__intel_breadcrumbs_wakeup(b) & ENGINE_WAKEUP_ASLEEP) + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + + __intel_engine_disarm_breadcrumbs(engine); + + spin_unlock_irqrestore(&b->lock, flags); +} + static bool use_fake_irq(const struct intel_breadcrumbs *b) { const struct intel_engine_cs *engine = @@ -144,6 +204,15 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b) return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; } +static void enable_fake_irq(struct intel_breadcrumbs *b) +{ + /* Ensure we never sleep indefinitely */ + if (!b->irq_enabled || use_fake_irq(b)) + mod_timer(&b->fake_irq, jiffies + 1); + else + mod_timer(&b->hangcheck, wait_timeout()); +} + static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = @@ -151,9 +220,17 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) struct drm_i915_private *i915 = engine->i915; assert_spin_locked(&b->lock); - if (b->rpm_wakelock) + if (b->irq_armed) return; + /* The breadcrumb irq will be disarmed on the interrupt after the + * waiters are signaled. This gives us a single interrupt window in + * which we can add a new waiter and avoid the cost of re-enabling + * the irq. + */ + b->irq_armed = true; + GEM_BUG_ON(b->irq_enabled); + if (I915_SELFTEST_ONLY(b->mock)) { /* For our mock objects we want to avoid interaction * with the real hardware (which is not set up). So @@ -162,17 +239,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) * implementation to call intel_engine_wakeup() * itself when it wants to simulate a user interrupt, */ - b->rpm_wakelock = true; return; } /* Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. For completeness, - * record an rpm reference for ourselves to cover the - * interrupt we unmask. + * and should have its own rpm reference. This is tracked + * by i915->gt.awake, we can forgo holding our own wakref + * for the interrupt as before i915->gt.awake is released (when + * the driver is idle) we disarm the breadcrumbs. */ - intel_runtime_pm_get_noresume(i915); - b->rpm_wakelock = true; /* No interrupts? Kick the waiter every jiffie! */ if (intel_irqs_enabled(i915)) { @@ -181,34 +256,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) b->irq_enabled = true; } - /* Ensure we never sleep indefinitely */ - if (!b->irq_enabled || use_fake_irq(b)) - mod_timer(&b->fake_irq, jiffies + 1); - else - mod_timer(&b->hangcheck, wait_timeout()); -} - -static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) -{ - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - assert_spin_locked(&b->lock); - if (!b->rpm_wakelock) - return; - - if (I915_SELFTEST_ONLY(b->mock)) { - b->rpm_wakelock = false; - return; - } - - if (b->irq_enabled) { - irq_disable(engine); - b->irq_enabled = false; - } - - intel_runtime_pm_put(engine->i915); - b->rpm_wakelock = false; + enable_fake_irq(b); } static inline struct intel_wait *to_wait(struct rb_node *node) @@ -430,7 +478,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, wake_up_process(b->first_wait->tsk); } else { b->first_wait = NULL; - __intel_breadcrumbs_disable_irq(b); } } else { GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); @@ -722,15 +769,22 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); spin_lock_irq(&b->lock); - __intel_breadcrumbs_disable_irq(b); - if (intel_engine_has_waiter(engine)) { - __intel_breadcrumbs_enable_irq(b); - if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) - wake_up_process(b->first_wait->tsk); - } else { - /* sanitize the IMR and unmask any auxiliary interrupts */ + if (b->irq_enabled) + irq_enable(engine); + else irq_disable(engine); - } + + /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + * GPU is active and may have already executed the MI_USER_INTERRUPT + * before the CPU is ready to receive. However, the engine is currently + * idle (we haven't started it yet), there is no possibility for a + * missed interrupt as we enabled the irq and so we can clear the + * immediate wakeup (until a real interrupt arrives for the waiter). + */ + clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + if (b->irq_armed) + enable_fake_irq(b); spin_unlock_irq(&b->lock); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 974ec11d2b1d..3dd6eee4a08b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -246,8 +246,8 @@ struct intel_engine_cs { unsigned int hangcheck_interrupts; + bool irq_armed : 1; bool irq_enabled : 1; - bool rpm_wakelock : 1; I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; @@ -644,7 +644,10 @@ static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); #define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ACTIVE BIT(1) +#define ENGINE_WAKEUP_ASLEEP BIT(1) + +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); From dcc235279a52d49023d4f1af7c3ed468a97015ae Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Feb 2017 21:29:03 +0100 Subject: [PATCH 0396/1299] gcc-plugins: fix sancov_plugin for gcc-5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The name of the local variable was inadvertantly changed from sancov_plugin_pass_info to sancov_pass_info: scripts/gcc-plugins/sancov_plugin.c: In function ‘int plugin_init(plugin_name_args*, plugin_gcc_version*)’: scripts/gcc-plugins/sancov_plugin.c:136:67: error: ‘sancov_plugin_pass_info’ was not declared in this scope This changes the conditional reference to this variable as well. Fixes: 5a45a4c5c3f5 ("gcc-plugins: consolidate on PASS_INFO macro") Signed-off-by: Arnd Bergmann Signed-off-by: Kees Cook --- scripts/gcc-plugins/sancov_plugin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c index 9b0b5cbc5b89..0f98634c20a0 100644 --- a/scripts/gcc-plugins/sancov_plugin.c +++ b/scripts/gcc-plugins/sancov_plugin.c @@ -133,7 +133,7 @@ __visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gc #if BUILDING_GCC_VERSION < 6000 register_callback(plugin_name, PLUGIN_START_UNIT, &sancov_start_unit, NULL); register_callback(plugin_name, PLUGIN_REGISTER_GGC_ROOTS, NULL, (void *)>_ggc_r_gt_sancov); - register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_plugin_pass_info); + register_callback(plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL, &sancov_pass_info); #endif return 0; From 39299838eea1a310b1309b538b80cd00603698fd Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:29 +0530 Subject: [PATCH 0397/1299] drm/i915/glk: Program dphy param reg for GLK For GEMINILAKE, dphy param reg values are programmed in terms of HS byte clock count while for older platforms in terms of HS ddr clk count. v2: Added comments to clarify ddr clock count calculation v3: Use multiplier variable instead of IS_GEMINILAKE() check everywhere (Jani) Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-2-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 31 +++++++++++----------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 84b3683c1f46..405a0f74f455 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -571,6 +571,7 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) u32 tclk_prepare_clkzero, ths_prepare_hszero; u32 lp_to_hs_switch, hs_to_lp_switch; u32 pclk, computed_ddr; + u32 mul; u16 burst_mode_ratio; enum port port; @@ -674,11 +675,6 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) break; } - /* - * ui(s) = 1/f [f in hz] - * ui(ns) = 10^9 / (f*10^6) [f in Mhz] -> 10^3/f(Mhz) - */ - /* in Kbps */ ui_num = NS_KHZ_RATIO; ui_den = bitrate; @@ -692,21 +688,26 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) */ intel_dsi->lp_byte_clk = DIV_ROUND_UP(tlpx_ns * ui_den, 8 * ui_num); - /* count values in UI = (ns value) * (bitrate / (2 * 10^6)) + /* DDR clock period = 2 * UI + * UI(sec) = 1/(bitrate * 10^3) (bitrate is in KHZ) + * UI(nsec) = 10^6 / bitrate + * DDR clock period (nsec) = 2 * UI = (2 * 10^6)/ bitrate + * DDR clock count = ns_value / DDR clock period * - * Since txddrclkhs_i is 2xUI, all the count values programmed in - * DPHY param register are divided by 2 - * - * prepare count + * For GEMINILAKE dphy_param_reg will be programmed in terms of + * HS byte clock count for other platform in HS ddr clock count */ + mul = IS_GEMINILAKE(dev_priv) ? 8 : 2; ths_prepare_ns = max(mipi_config->ths_prepare, mipi_config->tclk_prepare); - prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * 2); + + /* prepare count */ + prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, - ui_num * 2 + ui_num * mul ); /* @@ -720,12 +721,12 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( - (tclk_prepare_clkzero - ths_prepare_ns) - * ui_den, 2 * ui_num); + (tclk_prepare_clkzero - ths_prepare_ns) + * ui_den, ui_num * mul); /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); - trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, 2 * ui_num); + trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); if (prepare_cnt > PREPARE_CNT_MAX || exit_zero_cnt > EXIT_ZERO_CNT_MAX || From b426f985158d9a723ab195258748c0c5e0793a52 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:30 +0530 Subject: [PATCH 0398/1299] drm/i915/glk: Program new MIPI DSI PHY registers for GLK Program the clk lane and tlpx time count registers to configure DSI PHY. v2: Addressed Jani's Review comments(renamed bit field macros) v3: Program clk lane timing reg same as dphy param reg. v4: Removed "line over 80 character" warning Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-3-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ drivers/gpu/drm/i915/intel_dsi.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bdce90084d43..31e38ce9730b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8625,6 +8625,14 @@ enum { #define LP_BYTECLK_SHIFT 0 #define LP_BYTECLK_MASK (0xffff << 0) +#define _MIPIA_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb0a4) +#define _MIPIC_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb8a4) +#define MIPI_TLPX_TIME_COUNT(port) _MMIO_MIPI(port, _MIPIA_TLPX_TIME_COUNT, _MIPIC_TLPX_TIME_COUNT) + +#define _MIPIA_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb098) +#define _MIPIC_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb898) +#define MIPI_CLK_LANE_TIMING(port) _MMIO_MIPI(port, _MIPIA_CLK_LANE_TIMING, _MIPIC_CLK_LANE_TIMING) + /* bits 31:0 */ #define _MIPIA_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb064) #define _MIPIC_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb864) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index d7ffb9ac3c8a..f00b4e22f95b 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1309,6 +1309,14 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, */ I915_WRITE(MIPI_LP_BYTECLK(port), intel_dsi->lp_byte_clk); + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(MIPI_TLPX_TIME_COUNT(port), + intel_dsi->lp_byte_clk); + /* Shadow of DPHY reg */ + I915_WRITE(MIPI_CLK_LANE_TIMING(port), + intel_dsi->dphy_reg); + } + /* the bw essential for transmitting 16 long packets containing * 252 bytes meant for dcs write memory command is programmed in * this register in terms of byte clocks. based on dsi transfer From f340c2ff5ebdd213682fe8b9a14838878cd0ff01 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:32 +0530 Subject: [PATCH 0399/1299] drm/i915/glk: Add DSI PLL divider range for glk PLL divider range for GLK is different than that of BXT, hence adding the GLK range check in this patch. v2: Code restructure using min and max ratio variables (Ander) v3: Code changes to avoid "maybe-uninitialized" warning (Jani) Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-5-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_dsi_pll.c | 24 +++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 31e38ce9730b..89093efc9b42 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8337,10 +8337,12 @@ enum { #define BXT_DSI_PLL_PVD_RATIO_SHIFT 16 #define BXT_DSI_PLL_PVD_RATIO_MASK (3 << BXT_DSI_PLL_PVD_RATIO_SHIFT) #define BXT_DSI_PLL_PVD_RATIO_1 (1 << BXT_DSI_PLL_PVD_RATIO_SHIFT) +#define BXT_DSIC_16X_BY1 (0 << 10) #define BXT_DSIC_16X_BY2 (1 << 10) #define BXT_DSIC_16X_BY3 (2 << 10) #define BXT_DSIC_16X_BY4 (3 << 10) #define BXT_DSIC_16X_MASK (3 << 10) +#define BXT_DSIA_16X_BY1 (0 << 8) #define BXT_DSIA_16X_BY2 (1 << 8) #define BXT_DSIA_16X_BY3 (2 << 8) #define BXT_DSIA_16X_BY4 (3 << 8) @@ -8350,6 +8352,8 @@ enum { #define BXT_DSI_PLL_RATIO_MAX 0x7D #define BXT_DSI_PLL_RATIO_MIN 0x22 +#define GLK_DSI_PLL_RATIO_MAX 0x6F +#define GLK_DSI_PLL_RATIO_MIN 0x22 #define BXT_DSI_PLL_RATIO_MASK 0xFF #define BXT_REF_CLOCK_KHZ 19200 diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 3a7308681360..3edfba862227 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -426,11 +426,12 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); } -static int bxt_compute_dsi_pll(struct intel_encoder *encoder, +static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - u8 dsi_ratio; + u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max; u32 dsi_clk; dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, @@ -442,11 +443,20 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, * round 'up' the result */ dsi_ratio = DIV_ROUND_UP(dsi_clk * 2, BXT_REF_CLOCK_KHZ); - if (dsi_ratio < BXT_DSI_PLL_RATIO_MIN || - dsi_ratio > BXT_DSI_PLL_RATIO_MAX) { + + if (IS_BROXTON(dev_priv)) { + dsi_ratio_min = BXT_DSI_PLL_RATIO_MIN; + dsi_ratio_max = BXT_DSI_PLL_RATIO_MAX; + } else { + dsi_ratio_min = GLK_DSI_PLL_RATIO_MIN; + dsi_ratio_max = GLK_DSI_PLL_RATIO_MAX; + } + + if (dsi_ratio < dsi_ratio_min || dsi_ratio > dsi_ratio_max) { DRM_ERROR("Cant get a suitable ratio from DSI PLL ratios\n"); return -ECHRNG; - } + } else + DRM_DEBUG_KMS("DSI PLL calculation is Done!!\n"); /* * Program DSI ratio and Select MIPIC and MIPIA PLL output as 8x @@ -458,7 +468,7 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, /* As per recommendation from hardware team, * Prog PVD ratio =1 if dsi ratio <= 50 */ - if (dsi_ratio <= 50) + if (IS_BROXTON(dev_priv) && dsi_ratio <= 50) config->dsi_pll.ctrl |= BXT_DSI_PLL_PVD_RATIO_1; return 0; @@ -518,7 +528,7 @@ int intel_compute_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return vlv_compute_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - return bxt_compute_dsi_pll(encoder, config); + return gen9lp_compute_dsi_pll(encoder, config); return -ENODEV; } From 09a568e7ac2c053a2f51653deb11744d133d41a4 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:33 +0530 Subject: [PATCH 0400/1299] drm/i915i/glk: Program MIPI_CLOCK_CTRL only for BXT Register MIPI_CLOCK_CTRL is applicable only for BXT platform. Future platform have other registers to program the escape clock dividers. Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-6-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi_pll.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 3edfba862227..0a9a5c431a04 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -489,8 +489,10 @@ static void bxt_enable_dsi_pll(struct intel_encoder *encoder, POSTING_READ(BXT_DSI_PLL_CTL); /* Program TX, RX, Dphy clocks */ - for_each_dsi_port(port, intel_dsi->ports) - bxt_dsi_program_clocks(encoder->base.dev, port, config); + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + bxt_dsi_program_clocks(encoder->base.dev, port, config); + } /* Enable DSI PLL */ val = I915_READ(BXT_DSI_PLL_ENABLE); @@ -554,19 +556,22 @@ void intel_disable_dsi_pll(struct intel_encoder *encoder) bxt_disable_dsi_pll(encoder); } -static void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) +static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, + enum port port) { u32 tmp; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); /* Clear old configurations */ - tmp = I915_READ(BXT_MIPI_CLOCK_CTL); - tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); - I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + if (IS_BROXTON(dev_priv)) { + tmp = I915_READ(BXT_MIPI_CLOCK_CTL); + tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); + I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + } I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); } @@ -575,7 +580,7 @@ void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (IS_GEN9_LP(dev_priv)) - bxt_dsi_reset_clocks(encoder, port); + gen9lp_dsi_reset_clocks(encoder, port); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_reset_clocks(encoder, port); } From bcc65700484115dc4f80817de91dc86eeeb8b361 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:34 +0530 Subject: [PATCH 0401/1299] drm/i915/glk: Program txesc clock divider for GLK v2: Addressed Jani's Review comments(renamed bit field macros) Txesc clock divider is calculated and programmed for geminilake platform. Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-7-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 5 +++ drivers/gpu/drm/i915/intel_dsi_pll.c | 61 +++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 89093efc9b42..1343170d8f5c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8251,6 +8251,11 @@ enum { #define _MIPI_PORT(port, a, c) _PORT3(port, a, 0, c) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) +#define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) +#define GLK_TX_ESC_CLK_DIV1_MASK 0x3FF +#define MIPIO_TXESC_CLK_DIV2 _MMIO(0x160008) +#define GLK_TX_ESC_CLK_DIV2_MASK 0x3FF + /* BXT MIPI clock controls */ #define BXT_MAX_VAR_OUTPUT_KHZ 39500 diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 0a9a5c431a04..7a7617b24cb2 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -372,6 +372,53 @@ static void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) ESCAPE_CLOCK_DIVIDER_SHIFT); } +static void glk_dsi_program_esc_clock(struct drm_device *dev, + const struct intel_crtc_state *config) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 dsi_rate = 0; + u32 pll_ratio = 0; + u32 ddr_clk = 0; + u32 div1_value = 0; + u32 div2_value = 0; + u32 txesc1_div = 0; + u32 txesc2_div = 0; + + pll_ratio = config->dsi_pll.ctrl & BXT_DSI_PLL_RATIO_MASK; + + dsi_rate = (BXT_REF_CLOCK_KHZ * pll_ratio) / 2; + + ddr_clk = dsi_rate / 2; + + /* Variable divider value */ + div1_value = DIV_ROUND_CLOSEST(ddr_clk, 20000); + + /* Calculate TXESC1 divider */ + if (div1_value <= 10) + txesc1_div = div1_value; + else if ((div1_value > 10) && (div1_value <= 20)) + txesc1_div = DIV_ROUND_UP(div1_value, 2); + else if ((div1_value > 20) && (div1_value <= 30)) + txesc1_div = DIV_ROUND_UP(div1_value, 4); + else if ((div1_value > 30) && (div1_value <= 40)) + txesc1_div = DIV_ROUND_UP(div1_value, 6); + else if ((div1_value > 40) && (div1_value <= 50)) + txesc1_div = DIV_ROUND_UP(div1_value, 8); + else + txesc1_div = 10; + + /* Calculate TXESC2 divider */ + div2_value = DIV_ROUND_UP(div1_value, txesc1_div); + + if (div2_value < 10) + txesc2_div = div2_value; + else + txesc2_div = 10; + + I915_WRITE(MIPIO_TXESC_CLK_DIV1, txesc1_div & GLK_TX_ESC_CLK_DIV1_MASK); + I915_WRITE(MIPIO_TXESC_CLK_DIV2, txesc2_div & GLK_TX_ESC_CLK_DIV2_MASK); +} + /* Program BXT Mipi clocks and dividers */ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, const struct intel_crtc_state *config) @@ -474,7 +521,7 @@ static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, return 0; } -static void bxt_enable_dsi_pll(struct intel_encoder *encoder, +static void gen9lp_enable_dsi_pll(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -492,6 +539,8 @@ static void bxt_enable_dsi_pll(struct intel_encoder *encoder, if (IS_BROXTON(dev_priv)) { for_each_dsi_port(port, intel_dsi->ports) bxt_dsi_program_clocks(encoder->base.dev, port, config); + } else { + glk_dsi_program_esc_clock(encoder->base.dev, config); } /* Enable DSI PLL */ @@ -543,7 +592,7 @@ void intel_enable_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_enable_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - bxt_enable_dsi_pll(encoder, config); + gen9lp_enable_dsi_pll(encoder, config); } void intel_disable_dsi_pll(struct intel_encoder *encoder) @@ -571,6 +620,14 @@ static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + } else { + tmp = I915_READ(MIPIO_TXESC_CLK_DIV1); + tmp &= ~GLK_TX_ESC_CLK_DIV1_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV1, tmp); + + tmp = I915_READ(MIPIO_TXESC_CLK_DIV2); + tmp &= ~GLK_TX_ESC_CLK_DIV2_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV2, tmp); } I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); } From ebeac38025ff36b76b0f6fa0aeb52729e96bb6bc Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Fri, 17 Feb 2017 18:13:35 +0530 Subject: [PATCH 0402/1299] drm/i915/glk: Validate only DSI PORT A PLL divider As per BSPEC, GLK supports MIPI DSI 8X clk only on PORT A. Therefore only for PORT A PLL divider value should be validated. Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-8-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi_pll.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 7a7617b24cb2..2ff2ee7f3b78 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -206,17 +206,24 @@ static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) return false; /* - * Both dividers must be programmed with valid values even if only one - * of the PLL is used, see BSpec/Broxton Clocks. Check this here for + * Dividers must be programmed with valid values. As per BSEPC, for + * GEMINLAKE only PORT A divider values are checked while for BXT + * both divider values are validated. Check this here for * paranoia, since BIOS is known to misconfigure PLLs in this way at * times, and since accessing DSI registers with invalid dividers * causes a system hang. */ val = I915_READ(BXT_DSI_PLL_CTL); - if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { - DRM_DEBUG_DRIVER("PLL is enabled with invalid divider settings (%08x)\n", - val); - enabled = false; + if (IS_GEMINILAKE(dev_priv)) { + if (!(val & BXT_DSIA_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } + } else { + if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } } return enabled; From 80166e406e4b88e49b5091cec1ac6b03b6f372ed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 08:50:18 +0000 Subject: [PATCH 0403/1299] drm/i915: Consolidate reporting of "missed breadcrumbs" Move the setting of gpu_error->missed_irq_ring bit to a common function so that we can get the debug logging for either path. v2: Add %pF caller Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170228085018.3225-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 7c7867d65a39..5752f23fd289 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -59,6 +59,16 @@ static unsigned long wait_timeout(void) return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); } +static noinline void missed_breadcrumb(struct intel_engine_cs *engine) +{ + DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n", + engine->name, __builtin_return_address(0), + yesno(test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted))); + + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +} + static void intel_breadcrumbs_hangcheck(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -86,8 +96,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) * DRM_I915_HANGCHECK_JIFFIES [1.5s]! */ if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { - DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + missed_breadcrumb(engine); mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); } else { mod_timer(&b->hangcheck, wait_timeout()); @@ -180,7 +189,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) * completion. */ if (__intel_breadcrumbs_wakeup(b) & ENGINE_WAKEUP_ASLEEP) - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + missed_breadcrumb(engine); __intel_engine_disarm_breadcrumbs(engine); From 3870b89a810bc7a3e26ec469ee86077c1de6f9b2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:16 +0200 Subject: [PATCH 0404/1299] drm/i915/dsi: Move calling of wait_for_dsi_fifo_empty to mipi_exec_send_packet Instead of calling wait_for_dsi_fifo_empty on all dsi ports after calling a drm_panel_foo helper which calls VBT sequences, move it to the VBT mipi_exec_send_packet helper, which is the one VBT instruction which actually puts data in the fifo. This results in a nice cleanup making it clearer what all the steps on intel_dsi_enable / disable are and this also makes the VBT code properly wait till a command has actually been send before executing the next steps (typically a delay) in the VBT sequence. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Acked-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/289977b5699e252fea5c211d1d1645f9e79cca79.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 12 +----------- drivers/gpu/drm/i915/intel_dsi.h | 2 ++ drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 2 ++ 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index f00b4e22f95b..598ac658e361 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -80,7 +80,7 @@ enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) } } -static void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) { struct drm_encoder *encoder = &intel_dsi->base.base; struct drm_device *dev = encoder->dev; @@ -525,9 +525,6 @@ static void intel_dsi_enable(struct intel_encoder *encoder) drm_panel_enable(intel_dsi->panel); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); - intel_dsi_port_enable(encoder); } @@ -543,7 +540,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; u32 val; DRM_DEBUG_KMS("\n"); @@ -588,9 +584,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, drm_panel_prepare(intel_dsi->panel); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); - /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ intel_dsi_enable(encoder); @@ -672,9 +665,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder) /* if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ drm_panel_disable(intel_dsi->panel); - - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); } static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index 5967ea6d6045..d56782396f60 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -130,6 +130,8 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) return container_of(encoder, struct intel_dsi, base.base); } +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); + bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); int intel_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config); diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 405a0f74f455..7daaa58289e3 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -192,6 +192,8 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, break; } + wait_for_dsi_fifo_empty(intel_dsi, port); + out: data += len; From 5a2e65e742cac8939ab411f38ef1a36ef927181c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:17 +0200 Subject: [PATCH 0405/1299] drm/i915/dsi: Merge intel_dsi_disable/enable into their respective callers intel_dsi_disable/enable only have one caller, merge them into their respective callers. Change msleep(2) into usleep_range(2000, 5000) to make checkpatch happy, otherwise no functional changes. The main advantage of this change is that it makes it easier to follow all the steps of the panel enable / disable sequence when reading the code. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/d7249612e6d2e9639ecd1d8d106ca37d5794f2a4.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 108 +++++++++++++------------------ 1 file changed, 44 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 598ac658e361..859d4d71f8e1 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -505,32 +505,6 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) } } -static void intel_dsi_enable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - - if (is_cmd_mode(intel_dsi)) { - for_each_dsi_port(port, intel_dsi->ports) - I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); - } else { - msleep(20); /* XXX */ - for_each_dsi_port(port, intel_dsi->ports) - dpi_send_cmd(intel_dsi, TURN_ON, false, port); - msleep(100); - - drm_panel_enable(intel_dsi->panel); - - intel_dsi_port_enable(encoder); - } - - intel_panel_enable_backlight(intel_dsi->attached_connector); -} - static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); @@ -540,6 +514,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; u32 val; DRM_DEBUG_KMS("\n"); @@ -586,7 +561,21 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ - intel_dsi_enable(encoder); + if (is_cmd_mode(intel_dsi)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); + } else { + msleep(20); /* XXX */ + for_each_dsi_port(port, intel_dsi->ports) + dpi_send_cmd(intel_dsi, TURN_ON, false, port); + msleep(100); + + drm_panel_enable(intel_dsi->panel); + + intel_dsi_port_enable(encoder); + } + + intel_panel_enable_backlight(intel_dsi->attached_connector); } static void intel_dsi_enable_nop(struct intel_encoder *encoder, @@ -631,42 +620,6 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } -static void intel_dsi_disable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - u32 temp; - - DRM_DEBUG_KMS("\n"); - - if (is_vid_mode(intel_dsi)) { - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); - - intel_dsi_port_disable(encoder); - msleep(2); - } - - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - - temp = I915_READ(MIPI_DSI_FUNC_PRG(port)); - temp &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), temp); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); - } - /* if disable packets are sent before sending shutdown packet then in - * some next enable sequence send turn on packet error is observed */ - drm_panel_disable(intel_dsi->panel); -} - static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -716,11 +669,38 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; u32 val; DRM_DEBUG_KMS("\n"); - intel_dsi_disable(encoder); + if (is_vid_mode(intel_dsi)) { + for_each_dsi_port(port, intel_dsi->ports) + wait_for_dsi_fifo_empty(intel_dsi, port); + + intel_dsi_port_disable(encoder); + usleep_range(2000, 5000); + } + + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } + + /* + * if disable packets are sent before sending shutdown packet then in + * some next enable sequence send turn on packet error is observed + */ + drm_panel_disable(intel_dsi->panel); intel_dsi_clear_device_ready(encoder); From c7991ecad6a99664506cf290118ecec985854011 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:18 +0200 Subject: [PATCH 0406/1299] drm/i915/dsi: Add intel_dsi_unprepare() helper The enable path has an intel_dsi_prepare() helper which prepares various registers for the mode-set. Move the code undoing this to a new intel_dsi_unprepare() helper function for better symmetry between the enable and disable paths. No functional changes. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/cc0baaf04ea74a20031b4b5bb128591dcfa78406.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 37 +++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 859d4d71f8e1..1f839a837e9c 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -507,6 +507,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); +static void intel_dsi_unprepare(struct intel_encoder *encoder); static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -682,19 +683,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, usleep_range(2000, 5000); } - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - - val = I915_READ(MIPI_DSI_FUNC_PRG(port)); - val &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); - } + intel_dsi_unprepare(encoder); /* * if disable packets are sent before sending shutdown packet then in @@ -1310,6 +1299,28 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, } } +static void intel_dsi_unprepare(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } +} + static int intel_dsi_get_modes(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); From 14be7a5c29c2bf3b9567da1e41a514c4bbcdacdf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:19 +0200 Subject: [PATCH 0407/1299] drm/i915/dsi: Move intel_dsi_clear_device_ready() Move the intel_dsi_clear_device_ready() function to higher up in intel_dsi.c this pairs it with intel_dsi_device_ready(); and pairs intel_dsi_*enable* with intel_dsi_*disable without intel_dsi_clear_device_ready() sitting in the middle of them. This commit purely moves code around, it does not make any changes what-so-ever. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/f971d18ea6d350890447860aeb541dba072a6e47.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 86 ++++++++++++++++---------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 1f839a837e9c..4cca31061a6f 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -433,6 +433,49 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) bxt_dsi_device_ready(encoder); } +static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + + DRM_DEBUG_KMS("\n"); + for_each_dsi_port(port, intel_dsi->ports) { + /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ + i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); + u32 val; + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_EXIT); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + /* Wait till Clock lanes are in LP-00 state for MIPI Port A + * only. MIPI Port C has no similar bit for checking + */ + if (intel_wait_for_register(dev_priv, + port_ctrl, AFE_LATCHOUT, 0, + 30)) + DRM_ERROR("DSI LP not going Low\n"); + + /* Disable MIPI PHY transparent latch */ + val = I915_READ(port_ctrl); + I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); + usleep_range(1000, 1500); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x00); + usleep_range(2000, 2500); + } +} + static void intel_dsi_port_enable(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; @@ -621,49 +664,6 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } -static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - for_each_dsi_port(port, intel_dsi->ports) { - /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? - BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); - u32 val; - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_EXIT); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); - - /* Wait till Clock lanes are in LP-00 state for MIPI Port A - * only. MIPI Port C has no similar bit for checking - */ - if (intel_wait_for_register(dev_priv, - port_ctrl, AFE_LATCHOUT, 0, - 30)) - DRM_ERROR("DSI LP not going Low\n"); - - /* Disable MIPI PHY transparent latch */ - val = I915_READ(port_ctrl); - I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); - usleep_range(1000, 1500); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x00); - usleep_range(2000, 2500); - } -} - static void intel_dsi_post_disable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) From 18a00095a5f3b73cabab513119db9275afb62321 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:20 +0200 Subject: [PATCH 0408/1299] drm/i915/dsi: Make intel_dsi_enable/disable directly exec VBT sequences The drm_panel_enable/disable and drm_panel_prepare/unprepare calls are not fine grained enough to abstract all the different steps we need to take (and VBT sequences we need to exec) properly. So simply remove the panel _enable/disable and prepare/unprepare callbacks and instead export intel_dsi_exec_vbt_sequence() from intel_dsi_panel_vbt.c and call that from intel_dsi_enable/disable(). No functional changes. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/b4ca5185d4788d92df2ed60837a24b8962a8e8ba.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 14 +++++-- drivers/gpu/drm/i915/intel_dsi.h | 3 ++ drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 43 +--------------------- 3 files changed, 15 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 4cca31061a6f..a4ff1e64c030 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -601,7 +601,10 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* put device in ready state */ intel_dsi_device_ready(encoder); - drm_panel_prepare(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ @@ -614,7 +617,8 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, dpi_send_cmd(intel_dsi, TURN_ON, false, port); msleep(100); - drm_panel_enable(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); intel_dsi_port_enable(encoder); } @@ -689,7 +693,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ - drm_panel_disable(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); intel_dsi_clear_device_ready(encoder); @@ -714,7 +719,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } - drm_panel_unprepare(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); msleep(intel_dsi->panel_off_delay); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index d56782396f60..548649158abd 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -132,6 +132,9 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); +void intel_dsi_exec_vbt_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id); + bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); int intel_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config); diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 7daaa58289e3..ab922b6eb36a 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -426,10 +426,9 @@ static const char *sequence_name(enum mipi_seq seq_id) return "(unknown)"; } -static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) +void intel_dsi_exec_vbt_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) { - struct vbt_panel *vbt_panel = to_vbt_panel(panel); - struct intel_dsi *intel_dsi = vbt_panel->intel_dsi; struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); const u8 *data; fn_mipi_elem_exec mipi_elem_exec; @@ -493,40 +492,6 @@ static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) } } -static int vbt_panel_prepare(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_ON); - generic_exec_sequence(panel, MIPI_SEQ_DEASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_INIT_OTP); - - return 0; -} - -static int vbt_panel_unprepare(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_OFF); - - return 0; -} - -static int vbt_panel_enable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_ON); - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_ON); - - return 0; -} - -static int vbt_panel_disable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_OFF); - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_OFF); - - return 0; -} - static int vbt_panel_get_modes(struct drm_panel *panel) { struct vbt_panel *vbt_panel = to_vbt_panel(panel); @@ -550,10 +515,6 @@ static int vbt_panel_get_modes(struct drm_panel *panel) } static const struct drm_panel_funcs vbt_panel_funcs = { - .disable = vbt_panel_disable, - .unprepare = vbt_panel_unprepare, - .prepare = vbt_panel_prepare, - .enable = vbt_panel_enable, .get_modes = vbt_panel_get_modes, }; From 1e08a260b178a6cb5547a28153f88661970474e5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:21 +0200 Subject: [PATCH 0409/1299] drm/i915/dsi: VLV/CHT Only wait for LP00 on MIPI PORT A On some devices only MIPI PORT C is used, in this case checking the MIPI PORT A CTRL AFE_LATCHOUT bit (there is no such bit for PORT C on VLV/CHT) will result in false positive "DSI LP not going Low" errors as this checks the PORT A clk status. In case both ports are used we have already checked the AFE_LATCHOUT bit when going through the for_each_dsi_port() loop for PORT A and checking the same bit again for PORT C is a no-op. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97061 Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/242e4438bf29ebffc66eaa182f22b9d60d304bc2.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index a4ff1e64c030..20ed799714de 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -458,10 +458,12 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) ULPS_STATE_ENTER); usleep_range(2000, 2500); - /* Wait till Clock lanes are in LP-00 state for MIPI Port A - * only. MIPI Port C has no similar bit for checking + /* + * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI + * Port A only. MIPI Port C has no similar bit for checking. */ - if (intel_wait_for_register(dev_priv, + if ((IS_GEN9_LP(dev_priv) || port == PORT_A) && + intel_wait_for_register(dev_priv, port_ctrl, AFE_LATCHOUT, 0, 30)) DRM_ERROR("DSI LP not going Low\n"); From 349ab9192cc32e578ce981936c6ab99ae3a96165 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 11:28:02 +0000 Subject: [PATCH 0410/1299] drm/i915/guc: Make wq_lock irq-safe Following the use of dma_fence_signal() from within our interrupt handler, we need to make guc->wq_lock also irq-safe. This was done previously as part of the guc scheduler patch (which also started mixing our fences with the interrupt handler), but is now required to fix the current guc submission backend. v4: Document that __i915_guc_submit is always under an irq disabled section v5: Move wq_rsvd adjustment to its own function Fixes: 67b807a89230 ("drm/i915: Delay disabling the user interrupt for breadcrumbs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Michal Wajdeczko Link: http://patchwork.freedesktop.org/patch/msgid/20170228112803.11646-2-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 28 +++++++++++++++------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index beec88a30347..d6a6cf2540a1 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -348,7 +348,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) u32 freespace; int ret; - spin_lock(&client->wq_lock); + spin_lock_irq(&client->wq_lock); freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); freespace -= client->wq_rsvd; if (likely(freespace >= wqi_size)) { @@ -358,21 +358,27 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) client->no_wq_space++; ret = -EAGAIN; } - spin_unlock(&client->wq_lock); + spin_unlock_irq(&client->wq_lock); return ret; } +static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size) +{ + unsigned long flags; + + spin_lock_irqsave(&client->wq_lock, flags); + client->wq_rsvd += size; + spin_unlock_irqrestore(&client->wq_lock, flags); +} + void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) { - const size_t wqi_size = sizeof(struct guc_wq_item); + const int wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *client = request->i915->guc.execbuf_client; GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - - spin_lock(&client->wq_lock); - client->wq_rsvd -= wqi_size; - spin_unlock(&client->wq_lock); + guc_client_update_wq_rsvd(client, -wqi_size); } /* Construct a Work Item and append it to the GuC's Work Queue */ @@ -511,6 +517,9 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) struct i915_guc_client *client = guc->execbuf_client; int b_ret; + /* We are always called with irqs disabled */ + GEM_BUG_ON(!irqs_disabled()); + spin_lock(&client->wq_lock); guc_wq_item_append(client, rq); @@ -945,16 +954,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) /* Take over from manual control of ELSP (execlists) */ for_each_engine(engine, dev_priv, id) { + const int wqi_size = sizeof(struct guc_wq_item); struct drm_i915_gem_request *rq; engine->submit_request = i915_guc_submit; engine->schedule = NULL; /* Replay the current set of previously submitted requests */ + spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) { - client->wq_rsvd += sizeof(struct guc_wq_item); + guc_client_update_wq_rsvd(client, wqi_size); __i915_guc_submit(rq); } + spin_unlock_irq(&engine->timeline->lock); } return 0; From 0c33518db7d394d4eef1ce13f9dce32713f14beb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 11:28:03 +0000 Subject: [PATCH 0411/1299] drm/i915/guc: Reorder __i915_guc_submit to reduce spinlock holdtime A couple of operations, the flushes and the tracepoint, do not require serialisation by client->wq_lock, so move them before we take it. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170228112803.11646-3-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index d6a6cf2540a1..7b535a32fc27 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -517,18 +517,18 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) struct i915_guc_client *client = guc->execbuf_client; int b_ret; - /* We are always called with irqs disabled */ - GEM_BUG_ON(!irqs_disabled()); - - spin_lock(&client->wq_lock); - guc_wq_item_append(client, rq); - /* WA to flush out the pending GMADR writes to ring buffer. */ if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); trace_i915_gem_request_in(rq, 0); + /* We are always called with irqs disabled */ + GEM_BUG_ON(!irqs_disabled()); + + spin_lock(&client->wq_lock); + + guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; @@ -538,6 +538,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; + spin_unlock(&client->wq_lock); } From 0aa5277c3a6dbe9d5991c490ef23fe900254d931 Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 28 Feb 2017 15:39:25 +0800 Subject: [PATCH 0412/1299] drm/i915/gvt: have more registers with F_CMD_ACCESS flags set those registers are render registers, should have F_CMD_ACCESS flag set v4: rebase to lastest code base v3: per zhenyu's comments, move newly added registers to a separate patch v2: per Kevin's comments, move newly added registers to the tails of lists. Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 217 +++++++++++++++------------- 1 file changed, 120 insertions(+), 97 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 57b4d538f370..37956eee342c 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1523,6 +1523,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, #define MMIO_GM(reg, d, r, w) \ MMIO_F(reg, 4, F_GMADR, 0xFFFFF000, 0, d, r, w) +#define MMIO_GM_RDR(reg, d, r, w) \ + MMIO_F(reg, 4, F_GMADR | F_CMD_ACCESS, 0xFFFFF000, 0, d, r, w) + #define MMIO_RO(reg, d, f, rm, r, w) \ MMIO_F(reg, 4, F_RO | f, 0, rm, d, r, w) @@ -1542,6 +1545,9 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu, #define MMIO_RING_GM(prefix, d, r, w) \ MMIO_RING_F(prefix, 4, F_GMADR, 0xFFFF0000, 0, d, r, w) +#define MMIO_RING_GM_RDR(prefix, d, r, w) \ + MMIO_RING_F(prefix, 4, F_GMADR | F_CMD_ACCESS, 0xFFFF0000, 0, d, r, w) + #define MMIO_RING_RO(prefix, d, f, rm, r, w) \ MMIO_RING_F(prefix, 4, F_RO | f, 0, rm, d, r, w) @@ -1550,45 +1556,48 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); + MMIO_RING_DFH(RING_IMR, D_ALL, F_CMD_ACCESS, NULL, + intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler); MMIO_DFH(SDEIER, D_ALL, 0, NULL, intel_vgpu_reg_ier_handler); MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler); MMIO_D(SDEISR, D_ALL); - MMIO_RING_D(RING_HWSTAM, D_ALL); + MMIO_RING_DFH(RING_HWSTAM, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_GM(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); - MMIO_GM(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(RENDER_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL); + MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL); #define RING_REG(base) (base + 0x28) - MMIO_RING_D(RING_REG, D_ALL); + MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x134) - MMIO_RING_D(RING_REG, D_ALL); + MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL); #undef RING_REG - MMIO_GM(0x2148, D_ALL, NULL, NULL); - MMIO_GM(CCID, D_ALL, NULL, NULL); - MMIO_GM(0x12198, D_ALL, NULL, NULL); + MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL); + MMIO_GM_RDR(CCID, D_ALL, NULL, NULL); + MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL); MMIO_D(GEN7_CXT_SIZE, D_ALL); - MMIO_RING_D(RING_TAIL, D_ALL); - MMIO_RING_D(RING_HEAD, D_ALL); - MMIO_RING_D(RING_CTL, D_ALL); - MMIO_RING_D(RING_ACTHD, D_ALL); - MMIO_RING_GM(RING_START, D_ALL, NULL, NULL); + MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_HEAD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_CTL, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_ACTHD, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL); /* RING MODE */ #define RING_REG(base) (base + 0x29c) - MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK, NULL, ring_mode_mmio_write); + MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, + ring_mode_mmio_write); #undef RING_REG - MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS, @@ -1596,28 +1605,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); - MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2124, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2088, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_DFH(0x2470, D_ALL, F_MODE_MASK, NULL, NULL); - MMIO_D(GAM_ECOCHK, D_ALL); - MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x9030, D_ALL); - MMIO_D(0x20a0, D_ALL); - MMIO_D(0x2420, D_ALL); - MMIO_D(0x2430, D_ALL); - MMIO_D(0x2434, D_ALL); - MMIO_D(0x2438, D_ALL); - MMIO_D(0x243c, D_ALL); - MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2148,8 +2159,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_D(FORCEWAKE_ACK, D_ALL); MMIO_D(GEN6_GT_CORE_STATUS, D_ALL); MMIO_D(GEN6_GT_THREAD_STATUS_REG, D_ALL); - MMIO_D(GTFIFODBG, D_ALL); - MMIO_D(GTFIFOCTL, D_ALL); + MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write); MMIO_DH(FORCEWAKE_ACK_HSW, D_HSW | D_BDW, NULL, NULL); MMIO_D(ECOBUS, D_ALL); @@ -2291,29 +2302,29 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_F(0x2290, 8, 0, 0, 0, D_HSW_PLUS, NULL, NULL); - MMIO_D(GEN7_OACONTROL, D_HSW); + MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_HSW_PLUS, NULL, NULL); + MMIO_DFH(GEN7_OACONTROL, D_HSW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x2b00, D_BDW_PLUS); MMIO_D(0x2360, D_BDW_PLUS); - MMIO_F(0x5200, 32, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5240, 32, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(0x5280, 16, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(BCS_SWCTRL, D_ALL); + MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL); - MMIO_F(HS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(DS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(IA_VERTICES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(IA_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(VS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(GS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(GS_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(CL_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(CL_PRIMITIVES_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(PS_INVOCATION_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); - MMIO_F(PS_DEPTH_COUNT, 8, 0, 0, 0, D_ALL, NULL, NULL); + MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(DS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(IA_VERTICES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(IA_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(VS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(GS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(GS_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(CL_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); + MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL); MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); @@ -2329,7 +2340,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) struct drm_i915_private *dev_priv = gvt->dev_priv; int ret; - MMIO_DH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, + MMIO_DFH(RING_IMR(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, NULL, intel_vgpu_reg_imr_handler); MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler); @@ -2394,24 +2405,31 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL, intel_vgpu_reg_master_irq_handler); - MMIO_D(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(0x1c134, D_BDW_PLUS); + MMIO_DFH(RING_HWSTAM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1c134, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_GM(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); - MMIO_D(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_D(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS); - MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK, NULL, ring_mode_mmio_write); - MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, - NULL, NULL); - MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_MODE_MASK, - NULL, NULL); + MMIO_DFH(RING_TAIL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(RING_HEAD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_GM_RDR(RING_START(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); + MMIO_DFH(RING_CTL(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(RING_ACTHD(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_ACTHD_UDW(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1c29c, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, + ring_mode_mmio_write); + MMIO_DFH(RING_MI_MODE(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_INSTPM(GEN8_BSD2_RING_BASE), D_BDW_PLUS, + F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(RING_TIMESTAMP(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, ring_timestamp_mmio_read, NULL); - MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS); + MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); #define RING_REG(base) (base + 0xd0) MMIO_RING_F(RING_REG, 4, F_RO, 0, @@ -2428,13 +2446,16 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) #undef RING_REG #define RING_REG(base) (base + 0x234) - MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL); - MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO, 0, ~0LL, D_BDW_PLUS, NULL, NULL); + MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS, + NULL, NULL); + MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 4, F_RO | F_CMD_ACCESS, 0, + ~0LL, D_BDW_PLUS, NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x244) - MMIO_RING_D(RING_REG, D_BDW_PLUS); - MMIO_D(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS); + MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, F_CMD_ACCESS, + NULL, NULL); #undef RING_REG #define RING_REG(base) (base + 0x370) @@ -2468,8 +2489,8 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_F(RING_REG(GEN8_BSD2_RING_BASE), 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG - MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); - MMIO_GM(0x1c080, D_BDW_PLUS, NULL, NULL); + MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL); + MMIO_GM_RDR(RING_HWS_PGA(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL); MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); @@ -2490,15 +2511,17 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS); MMIO_D(0xfdc, D_BDW_PLUS); - MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS); - MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS); + MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); + MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb1f0, D_BDW); - MMIO_D(0xb1c0, D_BDW); + MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); - MMIO_D(0xb100, D_BDW); - MMIO_D(0xb10c, D_BDW); + MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0xb110, D_BDW); MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, @@ -2508,10 +2531,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_D(0x44484, D_BDW_PLUS); MMIO_D(0x4448c, D_BDW_PLUS); - MMIO_D(0x83a4, D_BDW); + MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS); - MMIO_D(0x8430, D_BDW); + MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x110000, D_BDW_PLUS); @@ -2523,9 +2546,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK, NULL, NULL); + MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); - MMIO_D(0x2248, D_BDW); + MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2707,15 +2730,15 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0xd08, D_SKL); MMIO_D(0x20e0, D_SKL); - MMIO_D(0x20ec, D_SKL); + MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ - MMIO_D(0x4de0, D_SKL); - MMIO_D(0x4de4, D_SKL); - MMIO_D(0x4de8, D_SKL); - MMIO_D(0x4dec, D_SKL); - MMIO_D(0x4df0, D_SKL); - MMIO_DH(0x4df4, D_SKL, NULL, gen9_trtte_write); + MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write); MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write); MMIO_D(0x45008, D_SKL); @@ -2739,7 +2762,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x65f08, D_SKL); MMIO_D(0x320f0, D_SKL); - MMIO_D(_REG_VCS2_EXCC, D_SKL); + MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL); MMIO_D(0x70034, D_SKL); MMIO_D(0x71034, D_SKL); MMIO_D(0x72034, D_SKL); @@ -2752,7 +2775,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL); MMIO_D(0x44500, D_SKL); - MMIO_D(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS); + MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } From 9112caafdf01439a6e43f4d8c09ceed7dc613d4a Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 28 Feb 2017 15:40:10 +0800 Subject: [PATCH 0413/1299] drm/i915/gvt: add more registers into handlers list those registers are render registers with F_CMD_ACCESS flag set Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 37956eee342c..ef17c38e00c4 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2332,6 +2332,17 @@ static int init_generic_mmio_info(struct intel_gvt *gvt) MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler); MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); + MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL); + MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2550,6 +2561,15 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt) MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL); return 0; } @@ -2776,6 +2796,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_D(0x44500, D_SKL); MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL); + MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS, + NULL, NULL); return 0; } From 1f58af304cce9e4a25b62b3619e69c586203c8ca Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Tue, 28 Feb 2017 15:41:03 +0800 Subject: [PATCH 0414/1299] drm/i915/gvt: fix an error for one register register 0x20e0 should be mode register v2: rebased to latest code base Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index ef17c38e00c4..548aedfbd402 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2749,7 +2749,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt) MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL); MMIO_D(0xd08, D_SKL); - MMIO_D(0x20e0, D_SKL); + MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL); MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL); /* TRTT */ From 858b0f571d30916bd69c45922045f24f26d6bfc9 Mon Sep 17 00:00:00 2001 From: Bing Niu Date: Tue, 28 Feb 2017 11:39:48 -0500 Subject: [PATCH 0415/1299] drm/i915/gvt: set SFUSE_STRAP properly for vitual monitor detection update the correct virtual montior connection status to vreg v2: address yulei's comment on commit message Signed-off-by: Bing Niu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/display.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index 43e02e038375..5419ae6ec633 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -176,14 +176,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT | SDE_PORTE_HOTPLUG_SPT); - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED; + } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED; + } - if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) + if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) { vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT; + vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED; + } if (IS_SKYLAKE(dev_priv) && intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) { @@ -196,6 +202,8 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) GEN8_PORT_DP_A_HOTPLUG; else vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT; + + vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED; } } From d1a9ccc4b1374a5a7762031fd8e4e398c68549e6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 28 Feb 2017 11:02:48 +0000 Subject: [PATCH 0416/1299] scsi: qedi: fix missing return error code check on call to qedi_setup_int The call to qedi_setup_int is not updating the return code rc yet rc is being checked for an error. Fix this by assigning rc to the return code from the call to qedi_setup_int. Signed-off-by: Colin Ian King Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 5eda21d903e9..8e3d92807cb8 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1805,7 +1805,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) */ qedi_ops->common->update_pf_params(qedi->cdev, &qedi->pf_params); - qedi_setup_int(qedi); + rc = qedi_setup_int(qedi); if (rc) goto stop_iscsi_func; From 6f8830f5bbab16e54f261de187f3df4644a5b977 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 27 Feb 2017 16:58:36 -0800 Subject: [PATCH 0417/1299] scsi: libiscsi: add lock around task lists to fix list corruption regression There's a rather long standing regression from the commit "libiscsi: Reduce locking contention in fast path" Depending on iSCSI target behavior, it's possible to hit the case in iscsi_complete_task where the task is still on a pending list (!list_empty(&task->running)). When that happens the task is removed from the list while holding the session back_lock, but other task list modification occur under the frwd_lock. That leads to linked list corruption and eventually a panicked system. Rather than back out the session lock split entirely, in order to try and keep some of the performance gains this patch adds another lock to maintain the task lists integrity. Major enterprise supported kernels have been backing out the lock split for while now, thanks to the efforts at IBM where a lab setup has the most reliable reproducer I've seen on this issue. This patch has been tested there successfully. Signed-off-by: Chris Leech Fixes: 659743b02c41 ("[SCSI] libiscsi: Reduce locking contention in fast path") Reported-by: Prashantha Subbarao Reviewed-by: Guilherme G. Piccoli Cc: # v3.15+ Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++- include/scsi/libiscsi.h | 1 + 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 834d1212b6d5..3fca34a675af 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); task->state = state; - if (!list_empty(&task->running)) + spin_lock_bh(&conn->taskqueuelock); + if (!list_empty(&task->running)) { + pr_debug_once("%s while task on list", __func__); list_del_init(&task->running); + } + spin_unlock_bh(&conn->taskqueuelock); if (conn->task == task) conn->task = NULL; @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, if (session->tt->xmit_task(task)) goto free_task; } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->mgmtqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task) * this may be on the requeue list already if the xmit_task callout * is handling the r2ts while we are adding new ones */ + spin_lock_bh(&conn->taskqueuelock); if (list_empty(&task->running)) list_add_tail(&task->running, &conn->requeue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } EXPORT_SYMBOL_GPL(iscsi_requeue_task); @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * only have one nop-out as a ping from us and targets should not * overflow us with nop-ins */ + spin_lock_bh(&conn->taskqueuelock); check_mgmt: while (!list_empty(&conn->mgmtqueue)) { conn->task = list_entry(conn->mgmtqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (iscsi_prep_mgmt_task(conn, conn->task)) { /* regular RX path uses back_lock */ spin_lock_bh(&conn->session->back_lock); __iscsi_put_task(conn->task); spin_unlock_bh(&conn->session->back_lock); conn->task = NULL; + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); } /* process pending command queue */ @@ -1535,19 +1547,24 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, running); list_del_init(&conn->task->running); + spin_unlock_bh(&conn->taskqueuelock); if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { fail_scsi_task(conn->task, DID_IMM_RETRY); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { if (rc == -ENOMEM || rc == -EACCES) { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&conn->task->running, &conn->cmdqueue); conn->task = NULL; + spin_unlock_bh(&conn->taskqueuelock); goto done; } else fail_scsi_task(conn->task, DID_ABORT); + spin_lock_bh(&conn->taskqueuelock); continue; } rc = iscsi_xmit_task(conn); @@ -1558,6 +1575,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) * we need to check the mgmt queue for nops that need to * be sent to aviod starvation */ + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } @@ -1577,12 +1595,15 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) conn->task = task; list_del_init(&conn->task->running); conn->task->state = ISCSI_TASK_RUNNING; + spin_unlock_bh(&conn->taskqueuelock); rc = iscsi_xmit_task(conn); if (rc) goto done; + spin_lock_bh(&conn->taskqueuelock); if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } + spin_unlock_bh(&conn->taskqueuelock); spin_unlock_bh(&conn->session->frwd_lock); return -ENODATA; @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_reject; } } else { + spin_lock_bh(&conn->taskqueuelock); list_add_tail(&task->running, &conn->cmdqueue); + spin_unlock_bh(&conn->taskqueuelock); iscsi_conn_queue_work(conn); } @@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size, INIT_LIST_HEAD(&conn->mgmtqueue); INIT_LIST_HEAD(&conn->cmdqueue); INIT_LIST_HEAD(&conn->requeue); + spin_lock_init(&conn->taskqueuelock); INIT_WORK(&conn->xmitwork, iscsi_xmitworker); /* allocate login_task used for the login/text sequences */ diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index b0e275de6dec..583875ea136a 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -196,6 +196,7 @@ struct iscsi_conn { struct iscsi_task *task; /* xmit task in progress */ /* xmit */ + spinlock_t taskqueuelock; /* protects the next three lists */ struct list_head mgmtqueue; /* mgmt (control) xmit queue */ struct list_head cmdqueue; /* data-path cmd queue */ struct list_head requeue; /* tasks needing another run */ From b6b6fbc8310e1b12b05717da9df6953b138f812b Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Wed, 1 Mar 2017 14:34:52 +0800 Subject: [PATCH 0418/1299] drm/i915/gvt: use pfn_valid for better checking Before get the page from pfn, use pfn_valid to check if pfn is able to translate to page structure. Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/kvmgt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 182914c22ac5..241354890603 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -96,10 +96,10 @@ static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn, struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev; dma_addr_t daddr; - page = pfn_to_page(pfn); - if (is_error_page(page)) + if (unlikely(!pfn_valid(pfn))) return -EFAULT; + page = pfn_to_page(pfn); daddr = dma_map_page(dev, page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, daddr)) From 9dcfe2c75b51f454f39c2de4756e841228865b47 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Mar 2017 09:25:55 +0100 Subject: [PATCH 0419/1299] locking/refcounts: Change WARN() to WARN_ONCE() Linus noticed that the new refcount.h APIs used WARN(), which would turn into a dmesg DoS if it triggers frequently on some buggy driver. So make sure we only warn once. These warnings are never supposed to happen, so it's typically not a problem to lose subsequent warnings. Suggested-by: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Elena Reshetova Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/CA+55aFzbYUTZ=oqZ2YgDjY0C2_n6ODhTfqj6V+m5xWmDxsuB0w@mail.gmail.com Signed-off-by: Ingo Molnar --- lib/refcount.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/refcount.c b/lib/refcount.c index 1d33366189d1..aa09ad3c30b0 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -58,7 +58,7 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(refcount_add_not_zero); void refcount_add(unsigned int i, refcount_t *r) { - WARN(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_add); @@ -97,7 +97,7 @@ bool refcount_inc_not_zero(refcount_t *r) val = old; } - WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); + WARN_ONCE(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n"); return true; } @@ -111,7 +111,7 @@ EXPORT_SYMBOL_GPL(refcount_inc_not_zero); */ void refcount_inc(refcount_t *r) { - WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); } EXPORT_SYMBOL_GPL(refcount_inc); @@ -125,7 +125,7 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) new = val - i; if (new > val) { - WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); return false; } @@ -164,7 +164,7 @@ EXPORT_SYMBOL_GPL(refcount_dec_and_test); void refcount_dec(refcount_t *r) { - WARN(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); + WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); } EXPORT_SYMBOL_GPL(refcount_dec); @@ -204,7 +204,7 @@ bool refcount_dec_not_one(refcount_t *r) new = val - 1; if (new > val) { - WARN(new > val, "refcount_t: underflow; use-after-free.\n"); + WARN_ONCE(new > val, "refcount_t: underflow; use-after-free.\n"); return true; } From 75013fb16f8484898eaa8d0b08fed942d790f029 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 1 Mar 2017 01:23:24 +0900 Subject: [PATCH 0420/1299] kprobes/x86: Fix kernel panic when certain exception-handling addresses are probed Fix to the exception table entry check by using probed address instead of the address of copied instruction. This bug may cause unexpected kernel panic if user probe an address where an exception can happen which should be fixup by __ex_table (e.g. copy_from_user.) Unless user puts a kprobe on such address, this doesn't cause any problem. This bug has been introduced years ago, by commit: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently"). Signed-off-by: Masami Hiramatsu Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently") Link: http://lkml.kernel.org/r/148829899399.28855.12581062400757221722.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 2 +- arch/x86/kernel/kprobes/core.c | 6 +++--- arch/x86/kernel/kprobes/opt.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index c6ee63f927ab..d688826e5736 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -67,7 +67,7 @@ #endif /* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); +extern int can_boost(kprobe_opcode_t *instruction, void *addr); /* Recover instruction if given address is probed */ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 520b8dfe1640..88b3c942473d 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes); * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes, void *addr) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ retry: @@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - if (can_boost(p->ainsn.insn)) + if (can_boost(p->ainsn.insn, p->addr)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3d1bee9d6a72..3e7c6e5a08ff 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) while (len < RELATIVEJUMP_SIZE) { ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) + if (!ret || !can_boost(dest + len, src + len)) return -EINVAL; len += ret; } From 10bce8410607a18eb3adf5d2739db8c8593e110d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 27 Feb 2017 23:50:58 +0100 Subject: [PATCH 0421/1299] x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/ ... since this is all x86-specific data and it makes sense to have it under x86/ logically instead in the toplevel debugfs dir. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170227225058.27289-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index bdb83e431d89..38b64587b31b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void) struct dentry *dbp, *version, *data; int error = -ENOMEM; - dbp = debugfs_create_dir("boot_params", NULL); + dbp = debugfs_create_dir("boot_params", arch_debugfs_dir); if (!dbp) return -ENOMEM; From e7c95effcd3a7a0c9535c809141ca499fede2c31 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Feb 2017 07:05:59 +0100 Subject: [PATCH 0422/1299] s390/crypt: fix missing unlock in ctr_paes_crypt on error path The ctr mode of protected key aes uses the ctrblk page if the ctrblk_lock could be acquired. If the protected key has to be reestablished and this operation fails the unlock for the ctrblk_lock is missing. Add it. Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/paes_s390.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index d69ea495c4d7..716b17238599 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -474,8 +474,11 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (k < n) { - if (__ctr_paes_set_key(ctx) != 0) + if (__ctr_paes_set_key(ctx) != 0) { + if (locked) + spin_unlock(&ctrblk_lock); return blkcipher_walk_done(desc, walk, -EIO); + } } } if (locked) From d9fcf2a1cbd1ff65d0109b1b400938808007fcd5 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 28 Feb 2017 07:42:01 +0100 Subject: [PATCH 0423/1299] s390: fix in-kernel program checks A program check inside the kernel takes a slightly different path in entry.S compare to a normal user fault. A recent change moved the store of the breaking event address into the path taken for in-kernel program checks as well, but %r14 has not been setup to point to the correct location. A wild store is the consequence. Move the store of the breaking event address to the code path for user space faults. Fixes: 34525e1f7e8d ("s390: store breaking event address only for program checks") Reported-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index dff2152350a7..6a7d737d514c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -490,7 +490,7 @@ ENTRY(pgm_check_handler) jnz .Lpgm_svcper # -> single stepped svc 1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) - j 3f + j 4f 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER lg %r15,__LC_KERNEL_STACK lgr %r14,%r12 @@ -499,8 +499,8 @@ ENTRY(pgm_check_handler) tm __LC_PGM_ILC+2,0x02 # check for transaction abort jz 3f mvc __THREAD_trap_tdb(256,%r14),0(%r13) -3: la %r11,STACK_FRAME_OVERHEAD(%r15) - stg %r10,__THREAD_last_break(%r14) +3: stg %r10,__THREAD_last_break(%r14) +4: la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) @@ -509,14 +509,14 @@ ENTRY(pgm_check_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception - jz 4f + jz 5f tmhh %r8,0x0001 # kernel per event ? jz .Lpgm_kprobe oi __PT_FLAGS+7(%r11),_PIF_PER_TRAP mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS mvc __THREAD_per_cause(2,%r14),__LC_PER_CODE mvc __THREAD_per_paid(1,%r14),__LC_PER_ACCESS_ID -4: REENABLE_IRQS +5: REENABLE_IRQS xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) larl %r1,pgm_check_table llgh %r10,__PT_INT_CODE+2(%r11) From e69ca822ce0ed3ba006ce384d7d205c81d92373f Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:16:03 +0100 Subject: [PATCH 0424/1299] s390/cputime: remove last traces of cputime_t The cputime_t type is a thing of the past, replace the last occurences of the type in the s390 code with a simple u64. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 14 ++------------ arch/s390/kernel/vtime.c | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index d1c407ddf703..e5672d6276a6 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,32 +8,22 @@ #define _S390_CPUTIME_H #include -#include #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ -typedef unsigned long long __nocast cputime_t; -typedef unsigned long long __nocast cputime64_t; - #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) -static inline unsigned long __div(unsigned long long n, unsigned long base) -{ - return n / base; -} - /* * Convert cputime to microseconds and back. */ -static inline unsigned int cputime_to_usecs(const cputime_t cputime) +static inline u64 cputime_to_usecs(const u64 cputime) { - return (__force unsigned long long) cputime >> 12; + return cputime >> 12; } - u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 31bd96e81167..8f5f59a151b4 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -111,7 +111,7 @@ static inline u64 scale_vtime(u64 vtime) } static void account_system_index_scaled(struct task_struct *p, - cputime_t cputime, cputime_t scaled, + u64 cputime, u64 scaled, enum cpu_usage_stat index) { p->stimescaled += cputime_to_nsecs(scaled); From 3c915bdc1775acfa214195da1ffb39dabdd1a389 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:18:34 +0100 Subject: [PATCH 0425/1299] s390/cputime: reset all accounting fields on fork copy_thread has to reset all cputime related field in the task struct, not only user_timer and system_timer. Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 54281660582c..249deafaa6ee 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -121,7 +121,10 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp, clear_tsk_thread_flag(p, TIF_SINGLE_STEP); /* Initialize per thread user and system timer values */ p->thread.user_timer = 0; + p->thread.guest_timer = 0; p->thread.system_timer = 0; + p->thread.hardirq_timer = 0; + p->thread.softirq_timer = 0; frame->sf.back_chain = 0; /* new return point is ret_from_fork */ From e53051e757d6cd66741955b93581e54415e48a70 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:21:10 +0100 Subject: [PATCH 0426/1299] s390/cputime: provide archicture specific cputime_to_nsecs The generic cputime_to_nsecs function first converts the cputime to micro-seconds and then multiplies the result with 1000. This looses some bits of accuracy, provide our own version of cputime_to_nsecs that does not loose precision. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index e5672d6276a6..9072bf63a846 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -8,6 +8,7 @@ #define _S390_CPUTIME_H #include +#include #define CPUTIME_PER_USEC 4096ULL #define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) @@ -17,13 +18,18 @@ #define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new) /* - * Convert cputime to microseconds and back. + * Convert cputime to microseconds. */ static inline u64 cputime_to_usecs(const u64 cputime) { return cputime >> 12; } +/* + * Convert cputime to nanoseconds. + */ +#define cputime_to_nsecs(cputime) tod_to_ns(cputime) + u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) From d03bd0454b101adb94d0b5a9cc11396182943cb4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 1 Mar 2017 09:47:57 +0100 Subject: [PATCH 0427/1299] s390/timex: micro optimization for tod_to_ns The conversion of a TOD value to nano-seconds currently uses a 32/32 bit split with the calculation for "nsecs = (TOD * 125) >> 9". Using a 55/9 bit split saves an instruction. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/timex.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 354344dcc198..118535123f34 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -206,20 +206,16 @@ static inline unsigned long long get_tod_clock_monotonic(void) * ns = (todval * 125) >> 9; * * In order to avoid an overflow with the multiplication we can rewrite this. - * With a split todval == 2^32 * th + tl (th upper 32 bits, tl lower 32 bits) + * With a split todval == 2^9 * th + tl (th upper 55 bits, tl lower 9 bits) * we end up with * - * ns = ((2^32 * th + tl) * 125 ) >> 9; - * -> ns = (2^23 * th * 125) + ((tl * 125) >> 9); + * ns = ((2^9 * th + tl) * 125 ) >> 9; + * -> ns = (th * 125) + ((tl * 125) >> 9); * */ static inline unsigned long long tod_to_ns(unsigned long long todval) { - unsigned long long ns; - - ns = ((todval >> 32) << 23) * 125; - ns += ((todval & 0xffffffff) * 125) >> 9; - return ns; + return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9); } #endif From b28ace12661fbcfd90959c1e84ff5a85113a82a1 Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Thu, 23 Feb 2017 10:48:55 +0100 Subject: [PATCH 0428/1299] irqchip/crossbar: Fix incorrect type of local variables The max and entry variables are unsigned according to the dt-bindings. Fix following 3 sparse issues (-Wtypesign): drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness) drivers/irqchip/irq-crossbar.c:222:52: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:222:52: got int * drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:245:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:245:56: got int * drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:263:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:263:56: got int * Signed-off-by: Franck Demathieu Cc: marc.zyngier@arm.com Cc: jason@lakedaemon.net Link: http://lkml.kernel.org/r/20170223094855.6546-1-fdemathieu@gmail.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-crossbar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 1eef56a89b1f..05bbf171df37 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { - int i, size, max = 0, reserved = 0, entry; + int i, size, reserved = 0; + u32 max = 0, entry; const __be32 *irqsr; int ret = -ENOMEM; From bb3f0a52630c84807fca9bdd76ac2f5dcec82689 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 28 Feb 2017 13:50:52 +0800 Subject: [PATCH 0429/1299] x86/apic: Fix a warning message in logical CPU IDs allocation The current warning message in allocate_logical_cpuid() is somewhat confusing: Only 1 processors supported.Processor 2/0x2 and the rest are ignored. As it might imply that there's only one CPU in the system - while what we ran into here is a kernel limitation. Fix the warning message to clarify all that: APIC: NR_CPUS/possible_cpus limit of 2 reached. Processor 2/0x2 and the rest are ignored. ( Also update the error return from -1 to -EINVAL, which is the more canonical return value. ) Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1488261052-25753-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4261b3282ad9..11088b86e5c7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2062,10 +2062,10 @@ static int allocate_logical_cpuid(int apicid) /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "Only %d processors supported." + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids - 1, nr_logical_cpuids, apicid); - return -1; + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; } cpuid_to_apicid[nr_logical_cpuids] = apicid; From 11277aabcbbe13916151af897d29a5e9f71ca73f Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 23 Feb 2017 17:16:41 +0800 Subject: [PATCH 0430/1299] x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR() The following commit: 2e63ad4bd5dd ("x86/apic: Do not init irq remapping if ioapic is disabled") ... added a check for skipped IO-APIC setup to enable_IR_x2apic(), but this check is also duplicated in try_to_enable_IR() - and it will never succeed in calling irq_remapping_enable(). Remove the whole irq_remapping_enable() complication: if the IO-APIC is disabled we cannot enable IRQ remapping. Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1487841401-1543-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11088b86e5c7..aee7deddabd0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } #endif /* !CONFIG_X86_X2APIC */ -static int __init try_to_enable_IR(void) -{ -#ifdef CONFIG_X86_IO_APIC - if (!x2apic_enabled() && skip_ioapic_setup) { - pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); - return -1; - } -#endif - return irq_remapping_enable(); -} - void __init enable_IR_x2apic(void) { unsigned long flags; int ret, ir_stat; - if (skip_ioapic_setup) + if (skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return; + } ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) @@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void) /* If irq_remapping_prepare() succeeded, try to enable it */ if (ir_stat >= 0) - ir_stat = try_to_enable_IR(); + ir_stat = irq_remapping_enable(); /* ir_stat contains the remap mode or an error code */ try_to_enable_x2apic(ir_stat); From 2a4d0c627f5374f365a873dea4e10ae0bb437680 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 13 Feb 2017 13:13:36 +0300 Subject: [PATCH 0431/1299] x86/selftests: Add clobbers for int80 on x86_64 Kernel erases R8..R11 registers prior returning to userspace from int80: https://lkml.org/lkml/2009/10/1/164 GCC can reuse these registers and doesn't expect them to change during syscall invocation. I met this kind of bug in CRIU once GCC 6.1 and CLANG stored local variables in those registers and the kernel zerofied them during syscall: https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa2 By that reason I suggest to add those registers to clobbers in selftests. Also, as noted by Andy - removed unneeded clobber for flags in INT $0x80 inline asm. Signed-off-by: Dmitry Safonov Acked-by: Andy Lutomirski Cc: 0x7f454c46@gmail.com Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shuah Khan Cc: Thomas Gleixner Cc: linux-kselftest@vger.kernel.org Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/fsgsbase.c | 2 +- tools/testing/selftests/x86/ldt_gdt.c | 16 +++++++++++----- tools/testing/selftests/x86/ptrace_syscall.c | 3 ++- .../testing/selftests/x86/single_step_syscall.c | 5 ++++- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 5b2b4b3c634c..b4967d875236 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -245,7 +245,7 @@ void do_unexpected_base(void) long ret; asm volatile ("int $0x80" : "=a" (ret) : "a" (243), "b" (low_desc) - : "flags"); + : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 4af47079cf04..f6121612e769 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -45,6 +45,12 @@ #define AR_DB (1 << 22) #define AR_G (1 << 23) +#ifdef __x86_64__ +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" +#else +# define INT80_CLOBBERS +#endif + static int nerrs; /* Points to an array of 1024 ints, each holding its own index. */ @@ -588,7 +594,7 @@ static int invoke_set_thread_area(void) asm volatile ("int $0x80" : "=a" (ret), "+m" (low_user_desc) : "a" (243), "b" (low_user_desc) - : "flags"); + : INT80_CLOBBERS); return ret; } @@ -657,7 +663,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -688,7 +694,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); if (sel != 0) { result = "FAIL"; @@ -721,7 +727,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base); @@ -774,7 +780,7 @@ static void test_gdt_invalidation(void) "+a" (eax) : "m" (low_user_desc_clear), [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear) - : "flags"); + : INT80_CLOBBERS); #ifdef __x86_64__ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base); diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c index b037ce9cf116..eaea92439708 100644 --- a/tools/testing/selftests/x86/ptrace_syscall.c +++ b/tools/testing/selftests/x86/ptrace_syscall.c @@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args) asm volatile ("int $0x80" : "+a" (args->nr), "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2), - "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)); + "+S" (args->arg3), "+D" (args->arg4), "+r" (bp) + : : "r8", "r9", "r10", "r11"); args->arg5 = bp; #else sys32_helper(args, int80_and_ret); diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50c26358e8b7..a48da95c18fd 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps; #ifdef __x86_64__ # define REG_IP REG_RIP # define WIDTH "q" +# define INT80_CLOBBERS "r8", "r9", "r10", "r11" #else # define REG_IP REG_EIP # define WIDTH "l" +# define INT80_CLOBBERS #endif static unsigned long get_eflags(void) @@ -140,7 +142,8 @@ int main() printf("[RUN]\tSet TF and check int80\n"); set_eflags(get_eflags() | X86_EFLAGS_TF); - asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid) + : INT80_CLOBBERS); check_result(); /* From 6b0b7551428e4caae1e2c023a529465a9a9ae2d4 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 16 Feb 2017 17:00:50 +1100 Subject: [PATCH 0432/1299] perf/core: Rename CONFIG_[UK]PROBE_EVENT to CONFIG_[UK]PROBE_EVENTS We have uses of CONFIG_UPROBE_EVENT and CONFIG_KPROBE_EVENT as well as CONFIG_UPROBE_EVENTS and CONFIG_KPROBE_EVENTS. Consistently use the plurals. Signed-off-by: Anton Blanchard Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: alexander.shishkin@linux.intel.com Cc: davem@davemloft.net Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/20170216060050.20866-1-anton@ozlabs.org Signed-off-by: Ingo Molnar --- Documentation/trace/kprobetrace.txt | 2 +- Documentation/trace/uprobetracer.txt | 2 +- arch/powerpc/configs/85xx/kmp204x_defconfig | 2 +- arch/s390/configs/default_defconfig | 2 +- arch/s390/configs/gcov_defconfig | 2 +- arch/s390/configs/performance_defconfig | 2 +- arch/s390/defconfig | 2 +- kernel/trace/Kconfig | 6 +++--- kernel/trace/Makefile | 4 ++-- kernel/trace/trace.c | 10 +++++----- kernel/trace/trace_probe.h | 4 ++-- 11 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt index e4991fb1eedc..41ef9d8efe95 100644 --- a/Documentation/trace/kprobetrace.txt +++ b/Documentation/trace/kprobetrace.txt @@ -12,7 +12,7 @@ kprobes can probe (this means, all functions body except for __kprobes functions). Unlike the Tracepoint based event, this can be added and removed dynamically, on the fly. -To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y. +To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y. Similar to the events tracer, this doesn't need to be activated via current_tracer. Instead of that, add probe points via diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt index fa7b680ee8a0..bf526a7c5559 100644 --- a/Documentation/trace/uprobetracer.txt +++ b/Documentation/trace/uprobetracer.txt @@ -7,7 +7,7 @@ Overview -------- Uprobe based trace events are similar to kprobe based trace events. -To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y. +To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y. Similar to the kprobe-event tracer, this doesn't need to be activated via current_tracer. Instead of that, add probe points via diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig index aaaaa609cd24..34a4da23f000 100644 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig @@ -210,7 +210,7 @@ CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y CONFIG_RCU_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 143b1e00b818..4b176fe83da4 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -609,7 +609,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f05d2d6e1087..0de46cc397f6 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -560,7 +560,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 2358bf33c5ef..e167557b434c 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -558,7 +558,7 @@ CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y CONFIG_TRACE_ENUM_MAP_FILE=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 68bfd09f1b02..97189dbaf34b 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -179,7 +179,7 @@ CONFIG_FTRACE_SYSCALLS=y CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_UPROBE_EVENT=y +CONFIG_UPROBE_EVENTS=y CONFIG_FUNCTION_PROFILER=y CONFIG_TRACE_ENUM_MAP_FILE=y CONFIG_KPROBES_SANITY_TEST=y diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d5038005eb5d..d4a06e714645 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -429,7 +429,7 @@ config BLK_DEV_IO_TRACE If unsure, say N. -config KPROBE_EVENT +config KPROBE_EVENTS depends on KPROBES depends on HAVE_REGS_AND_STACK_ACCESS_API bool "Enable kprobes-based dynamic events" @@ -447,7 +447,7 @@ config KPROBE_EVENT This option is also required by perf-probe subcommand of perf tools. If you want to use perf tools, this option is strongly recommended. -config UPROBE_EVENT +config UPROBE_EVENTS bool "Enable uprobes-based dynamic events" depends on ARCH_SUPPORTS_UPROBES depends on MMU @@ -466,7 +466,7 @@ config UPROBE_EVENT config BPF_EVENTS depends on BPF_SYSCALL - depends on (KPROBE_EVENT || UPROBE_EVENT) && PERF_EVENTS + depends on (KPROBE_EVENTS || UPROBE_EVENTS) && PERF_EVENTS bool default y help diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index e57980845549..90f2701d92a7 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o -obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o +obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o obj-$(CONFIG_TRACEPOINTS) += power-traces.o ifeq ($(CONFIG_PM),y) obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o @@ -66,7 +66,7 @@ ifeq ($(CONFIG_TRACING),y) obj-$(CONFIG_KGDB_KDB) += trace_kdb.o endif obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o -obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o +obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 707445ceb7ef..f35109514a01 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4341,22 +4341,22 @@ static const char readme_msg[] = "\t\t\t traces\n" #endif #endif /* CONFIG_STACK_TRACER */ -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#ifdef CONFIG_UPROBE_EVENT +#ifdef CONFIG_UPROBE_EVENTS " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n" "\t\t\t Write into this file to define/undefine new trace events.\n" #endif -#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT) +#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" "\t Format: p|r[:[/]] []\n" "\t -:[/]\n" -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" #endif -#ifdef CONFIG_UPROBE_EVENT +#ifdef CONFIG_UPROBE_EVENTS "\t place: :\n" #endif "\t args: =fetcharg[:type]\n" diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 0c0ae54d44c6..903273c93e61 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -248,7 +248,7 @@ ASSIGN_FETCH_FUNC(file_offset, ftype), \ #define FETCH_TYPE_STRING 0 #define FETCH_TYPE_STRSIZE 1 -#ifdef CONFIG_KPROBE_EVENT +#ifdef CONFIG_KPROBE_EVENTS struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); @@ -278,7 +278,7 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } -#endif /* CONFIG_KPROBE_EVENT */ +#endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg { struct fetch_param fetch; From 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Fri, 17 Feb 2017 11:07:49 -0600 Subject: [PATCH 0433/1299] x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack register Writing to the software acknowledge clear register when there are no pending messages causes a HUB error to assert. The original intent of this write was to clear the pending bits before start of operation, but this is an incorrect method and has been determined to be unnecessary. Signed-off-by: Andrew Banman Acked-by: Mike Travis Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: rja@hpe.com Cc: sivanich@hpe.com Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 766d4d3529a1..f25982cdff90 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); From 90b28ded88dda8bea82b4a86923e73ba0746d884 Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 19 Feb 2017 19:32:48 +0100 Subject: [PATCH 0434/1299] x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e244c19a2451..01c9cda3e1b7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -223,6 +223,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TA */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ From f4c3a88e5f044e0200cd4b03a4d86cd6b6306ec4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 14:55:19 +0000 Subject: [PATCH 0435/1299] drm/i915: Tighten mmio arrays for MIPI_PORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/intel_dsi.c: In function ‘intel_dsi_prepare’: drivers/gpu/drm/i915/intel_dsi.c:1308:1: error: the frame size of 2488 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] which is caused by the compiling expanding every _MIPI_PORT into an on-stack array of u32[3] at every callsite. Not sure why only one machine/compiler appears susceptible, but with a minor tweak to _MIPI_PORT we can defer the error until later. This is a partial revert of commit ce64645d86ac ("drm/i915: use variadic macros and arrays to choose port/pipe based registers") for a particular bad offender. Signed-off-by: Chris Wilson Cc: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170228145519.18012-1-chris@chris-wilson.co.uk Acked-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1343170d8f5c..0d9ae27fddff 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8248,7 +8248,7 @@ enum { /* MIPI DSI registers */ -#define _MIPI_PORT(port, a, c) _PORT3(port, a, 0, c) /* ports A and C only */ +#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) #define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) From 73667e31a153a66da97feb1584726222504924f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:17:17 +0100 Subject: [PATCH 0436/1299] x86/hyperv: Hide unused label This new 32-bit warning just showed up: arch/x86/hyperv/hv_init.c: In function 'hyperv_init': arch/x86/hyperv/hv_init.c:167:1: error: label 'register_msr_cs' defined but not used [-Werror=unused-label] The easiest solution is to move the label up into the existing #ifdef that has the goto. Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource") Signed-off-by: Arnd Bergmann Acked-by: Stephen Hemminger Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: devel@linuxdriverproject.org Cc: Vitaly Kuznetsov Cc: "K. Y. Srinivasan" Link: http://lkml.kernel.org/r/20170214211736.2641241-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index db64baf0e500..8bef70e7f3cc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -158,13 +158,13 @@ void hyperv_init(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); return; } +register_msr_cs: #endif /* * For 32 bit guests just use the MSR based mechanism for reading * the partition counter. */ -register_msr_cs: hyperv_cs = &hyperv_cs_msr; if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); From aa5ec3f715d576353c7d8f4f8085634bd845b73f Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 21:29:22 +0900 Subject: [PATCH 0437/1299] x86/vmware: Remove duplicate inclusion of asm/timer.h Signed-off-by: Masanari Iida Cc: akataria@vmware.com Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20170227122922.26230-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/vmware.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 891f4dad7b2c..22403a28caf5 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -30,7 +30,6 @@ #include #include #include -#include #undef pr_fmt #define pr_fmt(fmt) "vmware: " fmt From e86a2d2d34e20289ae7d46692e16d43c3a785db9 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 22:07:03 +0900 Subject: [PATCH 0438/1299] x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h Signed-off-by: Masanari Iida Cc: fenghua.yu@intel.com Link: http://lkml.kernel.org/r/20170227130703.26968-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8af04afdfcb9..759577d9d166 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include From 153654dbe595a68845ba14d5b0bfe299fa6a7e99 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 28 Feb 2017 21:34:28 +0800 Subject: [PATCH 0439/1299] x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC The revert of 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") causes a problem for IOAPIC hotplug. The problem is that IRQs are allocated and freed in pci_enable_device() and pci_disable_device(). But there are some drivers which don't call pci_disable_device(), and they have good reasons not calling it, so if they're using IOAPIC their IRQs won't have a chance to be released from the IOAPIC. When this happens IOAPIC hot-removal fails with a kernel stack dump and an error message like this: [149335.697989] pin16 on IOAPIC2 is still in use. It turns out that we can fix it in a different way without moving IRQ allocation into pcibios_alloc_irq(), thus avoiding the regression of 991de2e59090. We can keep the allocation and freeing of IRQs as is within pci_enable_device()/pci_disable_device(), without breaking any previous assumption of the rest of the system, keeping compatibility with both the legacy and the modern drivers. We can accomplish this by implementing the existing __weak hook of pcibios_release_device() thus when a pci device is about to be deleted we get notified in the hook and take the chance to release its IRQ, if any, from the IOAPIC. Implement pcibios_release_device() for x86 to release any IRQ not released by the driver. Signed-off-by: Rui Wang Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: fengguang.wu@intel.com Cc: helgaas@kernel.org Cc: kbuild-all@01.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1488288869-31290-2-git-send-email-rui.y.wang@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0cb52ae0a8f0..190e718694b1 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pcibios_release_device(struct pci_dev *dev) +{ + if (atomic_dec_return(&dev->enable_cnt) >= 0) + pcibios_disable_device(dev); + +} +#endif + int pci_ext_cfg_avail(void) { if (raw_pci_ext_ops) From f2ae5da726172fcf82f7be801489dd585f6a38eb Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 28 Feb 2017 21:34:29 +0800 Subject: [PATCH 0440/1299] x86/ioapic: Split IOAPIC hot-removal into two steps The hot removal of IOAPIC is handling PCI and ACPI removal in one go. That only works when the PCI drivers released the interrupt resources, but breaks when a IOAPIC interrupt is still associated to a PCI device. The new pcibios_release_device() callback allows to solve that problem by splitting the removal into two steps: 1) PCI removal: Release all PCI resources including eventually not yet released IOAPIC interrupts via the new pcibios_release_device() callback. 2) ACPI removal: After release of all PCI resources the ACPI resources can be released without issue. [ tglx: Rewrote changelog ] Signed-off-by: Rui Wang Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: fengguang.wu@intel.com Cc: helgaas@kernel.org Cc: kbuild-all@01.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1488288869-31290-3-git-send-email-rui.y.wang@intel.com Signed-off-by: Thomas Gleixner --- drivers/acpi/internal.h | 2 ++ drivers/acpi/ioapic.c | 24 +++++++++++++++++------- drivers/acpi/pci_root.c | 4 ++-- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 219b90bc0922..f15900132912 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -41,8 +41,10 @@ void acpi_gpe_apply_masked_gpes(void); void acpi_container_init(void); void acpi_memory_hotplug_init(void); #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pci_ioapic_remove(struct acpi_pci_root *root); int acpi_ioapic_remove(struct acpi_pci_root *root); #else +static inline void pci_ioapic_remove(struct acpi_pci_root *root) { return; } static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; } #endif #ifdef CONFIG_ACPI_DOCK diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c index 6d7ce6e12aaa..1120dfd625b8 100644 --- a/drivers/acpi/ioapic.c +++ b/drivers/acpi/ioapic.c @@ -206,6 +206,23 @@ int acpi_ioapic_add(acpi_handle root_handle) return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV; } +void pci_ioapic_remove(struct acpi_pci_root *root) +{ + struct acpi_pci_ioapic *ioapic, *tmp; + + mutex_lock(&ioapic_list_lock); + list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) { + if (root->device->handle != ioapic->root_handle) + continue; + if (ioapic->pdev) { + pci_release_region(ioapic->pdev, 0); + pci_disable_device(ioapic->pdev); + pci_dev_put(ioapic->pdev); + } + } + mutex_unlock(&ioapic_list_lock); +} + int acpi_ioapic_remove(struct acpi_pci_root *root) { int retval = 0; @@ -215,15 +232,8 @@ int acpi_ioapic_remove(struct acpi_pci_root *root) list_for_each_entry_safe(ioapic, tmp, &ioapic_list, list) { if (root->device->handle != ioapic->root_handle) continue; - if (acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base)) retval = -EBUSY; - - if (ioapic->pdev) { - pci_release_region(ioapic->pdev, 0); - pci_disable_device(ioapic->pdev); - pci_dev_put(ioapic->pdev); - } if (ioapic->res.flags && ioapic->res.parent) release_resource(&ioapic->res); list_del(&ioapic->list); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index bf601d4df8cf..919be0aa2578 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -648,12 +648,12 @@ static void acpi_pci_root_remove(struct acpi_device *device) pci_stop_root_bus(root->bus); - WARN_ON(acpi_ioapic_remove(root)); - + pci_ioapic_remove(root); device_set_run_wake(root->bus->bridge, false); pci_acpi_remove_bus_pm_notifier(device); pci_remove_root_bus(root->bus); + WARN_ON(acpi_ioapic_remove(root)); dmar_device_remove(device->handle); From 58ab9a088ddac4efe823471275859d64f735577e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 23 Feb 2017 14:26:03 -0800 Subject: [PATCH 0441/1299] x86/pkeys: Check against max pkey to avoid overflows Kirill reported a warning from UBSAN about undefined behavior when using protection keys. He is running on hardware that actually has support for it, which is not widely available. The warning triggers because of very large shifts of integers when doing a pkey_free() of a large, invalid value. This happens because we never check that the pkey "fits" into the mm_pkey_allocation_map(). I do not believe there is any danger here of anything bad happening other than some aliasing issues where somebody could do: pkey_free(35); and the kernel would effectively execute: pkey_free(8); While this might be confusing to an app that was doing something stupid, it has to do something stupid and the effects are limited to the app shooting itself in the foot. Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Cc: kirill.shutemov@linux.intel.com Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pkeys.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; From 940b2f2fd963c043418ce8af64605783b2b19140 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Feb 2017 12:31:40 +0100 Subject: [PATCH 0442/1299] x86/events: Remove last remnants of old filenames Update to the new file paths, remove them from introductory comments. Signed-off-by: Borislav Petkov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170218113140.8051-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/events/amd/core.c | 2 +- arch/x86/events/intel/cstate.c | 2 +- arch/x86/events/intel/rapl.c | 2 +- arch/x86/events/intel/uncore.h | 6 +++--- tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index afb222b63cae..c84584bb9402 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* moved to perf_event_amd_uncore.c */ + /* moved to uncore.c */ return &emptyconstraint; default: return &emptyconstraint; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index aff4b5b69d40..238ae3248ba5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_cstate.c: support cstate residency counters + * Support cstate residency counters * * Copyright (C) 2015, Intel Corp. * Author: Kan Liang (kan.liang@intel.com) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22054ca49026..9d05c7e67f60 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Support Intel RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index ad986c1e29bc..df5989f27b1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; -/* perf_event_intel_uncore_snb.c */ +/* uncore_snb.c */ int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); @@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); -/* perf_event_intel_uncore_snbep.c */ +/* uncore_snbep.c */ int snbep_uncore_pci_init(void); void snbep_uncore_cpu_init(void); int ivbep_uncore_pci_init(void); @@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); -/* perf_event_intel_uncore_nhmex.c */ +/* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 7913363bde5c..4f3c758d875d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -31,7 +31,7 @@ #error Instruction buffer size too small #endif -/* Based on branch_type() from perf_event_intel_lbr.c */ +/* Based on branch_type() from arch/x86/events/intel/lbr.c */ static void intel_pt_insn_decoder(struct insn *insn, struct intel_pt_insn *intel_pt_insn) { From 72042a8c7b01048a36ece216aaf206b7d60ca661 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:35 +1100 Subject: [PATCH 0443/1299] x86/purgatory: Make functions and variables static Sparse emits several 'symbol not declared' warnings for various functions and variables. Add static keyword to functions and variables which have file scope only. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-2-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 25e068ba3382..2a5f4373c797 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -18,11 +18,11 @@ struct sha_region { unsigned long len; }; -unsigned long backup_dest = 0; -unsigned long backup_src = 0; -unsigned long backup_sz = 0; +static unsigned long backup_dest; +static unsigned long backup_src; +static unsigned long backup_sz; -u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; struct sha_region sha_regions[16] = {}; @@ -39,7 +39,7 @@ static int copy_backup_region(void) return 0; } -int verify_sha256_digest(void) +static int verify_sha256_digest(void) { struct sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; From e98fe5127b9cc8ab33d1094b81c19deb1f9082bf Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:36 +1100 Subject: [PATCH 0444/1299] x86/purgatory: Fix sparse warning, symbol not declared Sparse emits warning, 'symbol not declared' for a function that has neither file scope nor a forward declaration. The functions only call site is an ASM file. Add a header file with the function declaration. Include the header file in the C source file defining the function in order to fix the sparse warning. Include the header file in ASM file containing the call site to document the usage. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-3-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 1 + arch/x86/purgatory/purgatory.h | 8 ++++++++ arch/x86/purgatory/setup-x86_64.S | 1 + 3 files changed, 10 insertions(+) create mode 100644 arch/x86/purgatory/purgatory.h diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 2a5f4373c797..b6d5c8946e66 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -11,6 +11,7 @@ */ #include "sha256.h" +#include "purgatory.h" #include "../boot/string.h" struct sha_region { diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h new file mode 100644 index 000000000000..e2e365a6c192 --- /dev/null +++ b/arch/x86/purgatory/purgatory.h @@ -0,0 +1,8 @@ +#ifndef PURGATORY_H +#define PURGATORY_H + +#ifndef __ASSEMBLY__ +extern void purgatory(void); +#endif /* __ASSEMBLY__ */ + +#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index fe3c91ba1bd0..f90e9dfa90bb 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,6 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ +#include "purgatory.h" .text .globl purgatory_start From 8392f16d38bb5222c03073a3906b7fd272386faf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 21 Feb 2017 19:36:39 +0100 Subject: [PATCH 0445/1299] x86/boot: Correct setup_header.start_sys name It is called start_sys_seg elsewhere so rename it to that. It is an obsolete field so we could just as well directly call it __u16 __pad... No functional change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170221183639.16554-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/uapi/asm/bootparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 5138dacf8bb8..07244ea16765 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -58,7 +58,7 @@ struct setup_header { __u32 header; __u16 version; __u32 realmode_swtch; - __u16 start_sys; + __u16 start_sys_seg; __u16 kernel_version; __u8 type_of_loader; __u8 loadflags; From 4644848369c0c06352e9ba3bad830c9dd7281380 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Wed, 1 Mar 2017 12:51:33 +0530 Subject: [PATCH 0446/1299] drm/i915/glk: Add MIPIIO Enable/disable sequence v2: Addressed Jani's Review comments(renamed bit field macros) v3: Jani's Review comment for aligning code to platforms and added wrapper functions. v4: Corrected enable/disable seuqence as per BSPEC v5: Corrected waiting twice for same bit (Review comments: Jani) v6: Rebased to Han's patches(dsi restructuring code) Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488352893-29916-2-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 206 +++++++++++++++++++++++++++++-- 1 file changed, 195 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 20ed799714de..8e97bae378f1 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -357,6 +357,109 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, return true; } +static void glk_dsi_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp, val; + + /* Set the MIPI mode + * If MIPI_Mode is off, then writing to LP_Wake bit is not reflecting. + * Power ON MIPI IO first and then write into IO reset and LP wake bits + */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + I915_WRITE(MIPI_CTRL(port), tmp | GLK_MIPIIO_ENABLE); + } + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); + + /* Program LP Wake */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp |= GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } + + /* Wait for Pwr ACK */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_MIPIIO_PORT_POWERED, + GLK_MIPIIO_PORT_POWERED, 20)) + DRM_ERROR("MIPIO port is powergated\n"); + } + + /* Wait for MIPI PHY status bit to set */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_PHY_STATUS_PORT_READY, + GLK_PHY_STATUS_PORT_READY, 20)) + DRM_ERROR("PHY is not ON\n"); + } + + /* Get IO out of reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + I915_WRITE(MIPI_CTRL(PORT_A), tmp | GLK_MIPIIO_RESET_RELEASED); + + /* Get IO out of Low power state*/ + for_each_dsi_port(port, intel_dsi->ports) { + if (!(I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY)) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= DEVICE_READY; + I915_WRITE(MIPI_DEVICE_READY(port), val); + usleep_range(10, 15); + } + + /* Enter ULPS */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_ENTER | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + /* Wait for ULPS Not active */ + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, + GLK_ULPS_NOT_ACTIVE, 20)) + + /* Exit ULPS */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_EXIT | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + /* Enter Normal Mode */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_NORMAL_OPERATION | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } + + /* Wait for Stop state */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_DATA_LANE_STOP_STATE, + GLK_DATA_LANE_STOP_STATE, 20)) + DRM_ERROR("Date lane not in STOP state\n"); + } + + /* Wait for AFE LATCH */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + BXT_MIPI_PORT_CTRL(port), AFE_LATCHOUT, + AFE_LATCHOUT, 20)) + DRM_ERROR("D-PHY not entering LP-11 state\n"); + } +} + static void bxt_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -429,11 +532,79 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_device_ready(encoder); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) bxt_dsi_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_device_ready(encoder); } -static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) +static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + /* Enter ULPS */ + for_each_dsi_port(port, intel_dsi->ports) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_ENTER | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + } + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Wait for Pwr ACK bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_MIPIIO_PORT_POWERED, 0, 20)) + DRM_ERROR("MIPI IO Port is not powergated\n"); + } +} + +static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp; + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Clear MIPI mode */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_MIPIIO_ENABLE; + I915_WRITE(MIPI_CTRL(port), tmp); + } +} + +static void glk_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + glk_dsi_enter_low_power_mode(encoder); + glk_dsi_disable_mipi_io(encoder); +} + +static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -670,6 +841,17 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } +static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || + IS_BROXTON(dev_priv)) + vlv_dsi_clear_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_clear_device_ready(encoder); +} + static void intel_dsi_post_disable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -1314,18 +1496,20 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) enum port port; u32 val; - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + if (!IS_GEMINILAKE(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - val = I915_READ(MIPI_DSI_FUNC_PRG(port)); - val &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } } } From 0129936ddda26afd5d9d207c4e86b2425952579f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 24 Feb 2017 16:32:10 +0200 Subject: [PATCH 0447/1299] drm/i915/gen9: Increase PCODE request timeout to 50ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 2c7d0602c815277f7cb7c932b091288710d8aba7 Author: Imre Deak Date: Mon Dec 5 18:27:37 2016 +0200 drm/i915/gen9: Fix PCODE polling during CDCLK change notification there is still one report of the CDCLK-change request timing out on a KBL machine, see the Reference link. On that machine the maximum time the request took to succeed was 34ms, so increase the timeout to 50ms. v2: - Change timeout from 100 to 50 ms to maintain the current 50 ms limit for atomic waits in the driver. (Chris, Tvrtko) Reference: https://bugs.freedesktop.org/show_bug.cgi?id=99345 Cc: Ville Syrjälä Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Signed-off-by: Imre Deak Acked-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1487946730-17162-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ac0cd82f61e5..c6938350a6c4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7926,10 +7926,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, * @timeout_base_ms: timeout for polling with preemption enabled * * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE - * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. * The request is acknowledged once the PCODE reply dword equals @reply after * applying @reply_mask. Polling is first attempted with preemption enabled - * for @timeout_base_ms and if this times out for another 10 ms with + * for @timeout_base_ms and if this times out for another 50 ms with * preemption disabled. * * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some @@ -7965,14 +7965,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, * worst case) _and_ PCODE was busy for some reason even after a * (queued) request and @timeout_base_ms delay. As a workaround retry * the poll with preemption disabled to maximize the number of - * requests. Increase the timeout from @timeout_base_ms to 10ms to + * requests. Increase the timeout from @timeout_base_ms to 50ms to * account for interrupts that could reduce the number of these - * requests. + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. */ DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); - ret = wait_for_atomic(COND, 10); + ret = wait_for_atomic(COND, 50); preempt_enable(); out: From 9160095c0e89a4fd58b74b64645b0ceaf11d119c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 28 Feb 2017 13:11:43 +0200 Subject: [PATCH 0448/1299] drm/i915: use BUILD_BUG_ON to ensure platform name has been set up Leave the runtime check in place in case the platform variable itself comes from bogus sources. Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488280303-9323-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_device_info.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 71152656ebbf..48b34a937792 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ enum intel_platform { INTEL_BROXTON, INTEL_KABYLAKE, INTEL_GEMINILAKE, + INTEL_MAX_PLATFORMS }; struct intel_device_info { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 2e1fd857e625..9fc6ab783008 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -56,6 +56,8 @@ static const char * const platform_names[] = { const char *intel_platform_name(enum intel_platform platform) { + BUILD_BUG_ON(ARRAY_SIZE(platform_names) != INTEL_MAX_PLATFORMS); + if (WARN_ON_ONCE(platform >= ARRAY_SIZE(platform_names) || platform_names[platform] == NULL)) return ""; From 25b68a8f0ab13a98de02650208ec927796659898 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 17 Feb 2017 10:13:59 -0500 Subject: [PATCH 0449/1299] timerfd: Only check CAP_WAKE_ALARM when it is needed timerfd_create() and do_timerfd_settime() evaluate capable(CAP_WAKE_ALARM) unconditionally although CAP_WAKE_ALARM is only required for CLOCK_REALTIME_ALARM and CLOCK_BOOTTIME_ALARM. This can cause extraneous audit messages when using a LSM such as SELinux, incorrectly causes PF_SUPERPRIV to be set even when no privilege was exercised, and is inefficient. Flip the order of the tests in both functions so that we only call capable() if the capability is truly required for the operation. Signed-off-by: Stephen Smalley Cc: linux-security-module@vger.kernel.org Cc: selinux@tycho.nsa.gov Link: http://lkml.kernel.org/r/1487344439-22293-1-git-send-email-sds@tycho.nsa.gov Signed-off-by: Thomas Gleixner --- fs/timerfd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/timerfd.c b/fs/timerfd.c index 384fa759a563..c543cdb5f8ed 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; - if (!capable(CAP_WAKE_ALARM) && - (clockid == CLOCK_REALTIME_ALARM || - clockid == CLOCK_BOOTTIME_ALARM)) + if ((clockid == CLOCK_REALTIME_ALARM || + clockid == CLOCK_BOOTTIME_ALARM) && + !capable(CAP_WAKE_ALARM)) return -EPERM; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags, return ret; ctx = f.file->private_data; - if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { + if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) { fdput(f); return -EPERM; } From 249f6962353117de4f98654b416f581f4216013c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:14:57 +0200 Subject: [PATCH 0450/1299] drm/i915/dsi: Document the panel enable / disable sequences from the spec Document the DSI panel enable / disable sequences from the spec, for easy comparison between the code and the spec. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-2-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 37 ++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 8e97bae378f1..234592c62141 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -725,6 +725,43 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); static void intel_dsi_unprepare(struct intel_encoder *encoder); +/* + * Panel enable/disable sequences from the VBT spec. + * + * Note the spec has AssertReset / DeassertReset swapped from their + * usual naming. We use the normal names to avoid confusion (so below + * they are swapped compared to the spec). + * + * Steps starting with MIPI refer to VBT sequences, note that for v2 + * VBTs several steps which have a VBT in v2 are expected to be handled + * directly by the driver, by directly driving gpios for example. + * + * v2 video mode seq v3 video mode seq command mode seq + * - power on - MIPIPanelPowerOn - power on + * - wait t1+t2 - wait t1+t2 + * - MIPIDeassertResetPin - MIPIDeassertResetPin - MIPIDeassertResetPin + * - io lines to lp-11 - io lines to lp-11 - io lines to lp-11 + * - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds + * - MIPITearOn + * - MIPIDisplayOn + * - turn on DPI - turn on DPI - set pipe to dsr mode + * - MIPIDisplayOn - MIPIDisplayOn + * - wait t5 - wait t5 + * - backlight on - MIPIBacklightOn - backlight on + * ... ... ... issue mem cmds ... + * - backlight off - MIPIBacklightOff - backlight off + * - wait t6 - wait t6 + * - MIPIDisplayOff + * - turn off DPI - turn off DPI - disable pipe dsr mode + * - MIPITearOff + * - MIPIDisplayOff - MIPIDisplayOff + * - io lines to lp-00 - io lines to lp-00 - io lines to lp-00 + * - MIPIAssertResetPin - MIPIAssertResetPin - MIPIAssertResetPin + * - wait t3 - wait t3 + * - power off - MIPIPanelPowerOff - power off + * - wait t4 - wait t4 + */ + static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) From 19c17df3cb5e6a7f4cc2b8df63a9409c12c5a610 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:14:58 +0200 Subject: [PATCH 0451/1299] drm/i915/dsi: Drop bogus MIPI_SEQ_ASSERT_RESET before POWER_ON intel_dsi_post_disable(), which does the MIPI_SEQ_ASSERT_RESET, will always be called at some point before intel_dsi_pre_enable() making the MIPI_SEQ_ASSERT_RESET in intel_dsi_pre_enable() redundant. In addition, calling MIPI_SEQ_ASSERT_RESET in the enable path goes against the VBT spec. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-3-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 234592c62141..f9b0767ef073 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -811,7 +811,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* put device in ready state */ intel_dsi_device_ready(encoder); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); From c7dc5275bcbda8bff2acfad98a08964017644186 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:14:59 +0200 Subject: [PATCH 0452/1299] drm/i915/dsi: Move MIPI_SEQ_POWER_ON/OFF calls together with pmic gpio calls Now that we are no longer bound to the drm_panel_ callbacks, call MIPI_SEQ_POWER_ON/OFF at the proper place. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-4-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index f9b0767ef073..ab905474b9dd 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -793,10 +793,10 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_dsi_prepare(encoder, pipe_config); - /* Panel Enable over CRC PMIC */ + /* Power on, try both CRC pmic gpio and VBT */ if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); - + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); msleep(intel_dsi->panel_on_delay); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -811,7 +811,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* put device in ready state */ intel_dsi_device_ready(encoder); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); @@ -940,11 +939,10 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, } intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); + /* Power off, try both CRC pmic gpio and VBT */ msleep(intel_dsi->panel_off_delay); - - /* Panel Disable over CRC PMIC */ + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); From deae2006a3a8d71ebf2b07a9f8621cfda0b3b6e7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:00 +0200 Subject: [PATCH 0453/1299] drm/i915/dsi: Group DPOunit clock gate workaround with PLL enable Move the DPOunit clock gate workaround to directly after the PLL enable. The exact location of the workaround does not matter and there are 2 reasons to group it with the PLL enable: 1) This moves it out of the middle of the init sequence from the spec, making it easier to follow the init sequence / compare it to the spec 2) It is grouped with the pll disable call in intel_dsi_post_disable, so for consistency it should be grouped with the pll enable in intel_dsi_pre_enable Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-5-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index ab905474b9dd..da798a579e11 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -791,14 +791,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, 0); } - intel_dsi_prepare(encoder, pipe_config); - - /* Power on, try both CRC pmic gpio and VBT */ - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - msleep(intel_dsi->panel_on_delay); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 val; @@ -808,6 +800,14 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } + intel_dsi_prepare(encoder, pipe_config); + + /* Power on, try both CRC pmic gpio and VBT */ + if (intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); + msleep(intel_dsi->panel_on_delay); + /* put device in ready state */ intel_dsi_device_ready(encoder); From 3e40fa8a3168da060e3835a17cf1ada5a837ddd6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:01 +0200 Subject: [PATCH 0454/1299] drm/i915/dsi: Execute MIPI_SEQ_DEASSERT_RESET before calling device_ready() Execute MIPI_SEQ_DEASSERT_RESET before putting the device in ready state (LP-11), this is the sequence in which things should be done according to the spec. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-6-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index da798a579e11..b7b590d78b31 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -808,10 +808,13 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); msleep(intel_dsi->panel_on_delay); - /* put device in ready state */ + /* Deassert reset */ + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + + /* Put device in ready state (LP-11) */ intel_dsi_device_ready(encoder); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + /* Send initialization commands in LP mode */ intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); /* Enable port in pre-enable phase itself because as per hw team @@ -915,6 +918,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); + /* Transition to LP-00 */ intel_dsi_clear_device_ready(encoder); if (IS_BROXTON(dev_priv)) { @@ -938,6 +942,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } + /* Assert reset */ intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); /* Power off, try both CRC pmic gpio and VBT */ From f5bce6df8868668d547c5757d9da70471ea40e50 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:02 +0200 Subject: [PATCH 0455/1299] drm/i915/dsi: Group MIPI_SEQ_BACKLIGHT_ON/OFF with panel_[en|dis]able_backlight Execute the MIPI_SEQ_BACKLIGHT_ON/OFF VBT sequences at the same time as we call intel_panel_enable_backlight() / intel_panel_disable_backlight(). Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-7-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index b7b590d78b31..b132841587cc 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -829,12 +829,12 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, msleep(100); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); intel_dsi_port_enable(encoder); } intel_panel_enable_backlight(intel_dsi->attached_connector); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); } static void intel_dsi_enable_nop(struct intel_encoder *encoder, @@ -860,6 +860,7 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, DRM_DEBUG_KMS("\n"); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_panel_disable_backlight(intel_dsi->attached_connector); /* @@ -915,7 +916,6 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); /* Transition to LP-00 */ From 398314516446ad12959e29556a3bafb6534a1065 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:03 +0200 Subject: [PATCH 0456/1299] drm/i915/dsi: Document always using v3 SHUTDOWN / MIPI_SEQ_DISPLAY_OFF order According to the spec for v2 VBTs we should call MIPI_SEQ_DISPLAY_OFF before sending SHUTDOWN, where as for v3 VBTs we should send SHUTDOWN first. Since the v2 order has known issues, we use the v3 order everywhere, add a comment documenting this. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-8-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index b132841587cc..5a50645c3a37 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -872,6 +872,11 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, I915_WRITE(MIPI_DEVICE_READY(port), 0); } + /* + * According to the spec we should send SHUTDOWN before + * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing + * has shown that the v3 sequence works for v2 VBTs too + */ if (is_vid_mode(intel_dsi)) { /* Send Shutdown command to the panel in LP mode */ for_each_dsi_port(port, intel_dsi->ports) From 7108b436c2b53cf5d9c3a90139973bdee95898c8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:04 +0200 Subject: [PATCH 0457/1299] drm/i915/dsi: Execute MIPI_SEQ_TEAR_OFF from intel_dsi_post_disable For v3+ VBTs we should call MIPI_SEQ_TEAR_OFF before MIPI_SEQ_DISPLAY_OFF, v2 VBTs do not have MIPI_SEQ_TEAR_OFF so there this is a nop. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-9-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 5a50645c3a37..7dbef67a5fa0 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -921,6 +921,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ + if (is_cmd_mode(intel_dsi)) + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_TEAR_OFF); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); /* Transition to LP-00 */ From 38dec5c0892a4c475d9ac89fea25f8a84e17a464 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:05 +0200 Subject: [PATCH 0458/1299] drm/i915/dsi: Call MIPI_SEQ_TEAR_ON and DISPLAY_ON for cmd-mode (untested) According to the spec we should call MIPI_SEQ_TEAR_ON and DISPLAY_ON on enable for cmd-mode, just like we already call their counterparts on disable. Note: untested, my panel is a vid-mode panel. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-10-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 7dbef67a5fa0..7d5fb9d3fa64 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -822,6 +822,8 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, if (is_cmd_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_TEAR_ON); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); } else { msleep(20); /* XXX */ for_each_dsi_port(port, intel_dsi->ports) From 25b4620ee822d5f3c2deb3f1358e82763578788f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:06 +0200 Subject: [PATCH 0459/1299] drm/i915/dsi: Skip delays for v3 VBTs in vid-mode For v3 VBTs in vid-mode the delays are part of the VBT sequences, so we should not also delay ourselves otherwise we get double delays. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-11-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 7d5fb9d3fa64..dd38b6835528 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -725,6 +725,17 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); static void intel_dsi_unprepare(struct intel_encoder *encoder); +static void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) +{ + struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); + + /* For v3 VBTs in vid-mode the delays are part of the VBT sequences */ + if (is_vid_mode(intel_dsi) && dev_priv->vbt.dsi.seq_version >= 3) + return; + + msleep(msec); +} + /* * Panel enable/disable sequences from the VBT spec. * @@ -806,7 +817,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - msleep(intel_dsi->panel_on_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); /* Deassert reset */ intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); @@ -828,7 +839,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, msleep(20); /* XXX */ for_each_dsi_port(port, intel_dsi->ports) dpi_send_cmd(intel_dsi, TURN_ON, false, port); - msleep(100); + intel_dsi_msleep(intel_dsi, 100); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); @@ -955,7 +966,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); /* Power off, try both CRC pmic gpio and VBT */ - msleep(intel_dsi->panel_off_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); @@ -964,7 +975,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * FIXME As we do with eDP, just make a note of the time here * and perform the wait before the next panel power on. */ - msleep(intel_dsi->panel_pwr_cycle_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, From 9aceb5c15d84d4b960f5f80fba846c753554d092 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Mar 2017 15:41:27 +0000 Subject: [PATCH 0460/1299] drm/i915: Fix all intel_framebuffer_init failures to take the error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No more direct return -EINVAL as we have to unwind the obj->framebuffer_references. Fixes: 24dbf51a5517 ("drm/i915: struct_mutex is not required for allocating the framebuffer") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170301154128.2841-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 93371c2377b2..6d2597a4a45f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14313,7 +14313,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_INFO(dev_priv)->gen < 4 && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n"); - return -EINVAL; + goto err; } stride_alignment = intel_fb_stride_alignment(dev_priv, @@ -14358,7 +14358,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_GEN(dev_priv) > 3) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_ABGR8888: @@ -14366,7 +14366,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, INTEL_GEN(dev_priv) < 9) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_XBGR8888: @@ -14375,14 +14375,14 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_GEN(dev_priv) < 4) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_ABGR2101010: if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_YUYV: @@ -14392,13 +14392,13 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_GEN(dev_priv) < 5) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; default: DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -14411,7 +14411,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(dev_priv, &intel_fb->base); if (ret) - return ret; + goto err; ret = drm_framebuffer_init(obj->base.dev, &intel_fb->base, From dd689287b977597a46adf8c3de19d40212f0ea9f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Mar 2017 15:41:28 +0000 Subject: [PATCH 0461/1299] drm/i915: Prevent concurrent tiling/framebuffer modifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reintroduce a lock around tiling vs framebuffer creation to prevent modification of the obj->tiling_and_stride whilst the framebuffer is being created. Rather than use struct_mutex once again, use the per-object lock - this will also be required in future to prevent changing the tiling whilst submitting rendering. Reported-by: Ville Syrjälä Fixes: 24dbf51a5517 ("drm/i915: struct_mutex is not required for allocating the framebuffer") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170301154128.2841-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_object.h | 18 ++++++++++++++++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 9 ++++++++- drivers/gpu/drm/i915/intel_display.c | 25 +++++++++++++++--------- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 2fb30a5eb510..1495eeb03382 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -165,7 +165,7 @@ struct drm_i915_gem_object { struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ - atomic_t framebuffer_references; + unsigned int framebuffer_references; /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; @@ -260,6 +260,16 @@ extern void drm_gem_object_unreference(struct drm_gem_object *); __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); +static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) +{ + reservation_object_lock(obj->resv, NULL); +} + +static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) +{ + reservation_object_unlock(obj->resv); +} + static inline bool i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) { @@ -306,6 +316,12 @@ i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); +static inline bool +i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) +{ + return READ_ONCE(obj->framebuffer_references); +} + static inline unsigned int i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 8bc515e8b2a2..006a8b908f77 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!(flags & I915_SHRINK_ACTIVE) && (i915_gem_object_is_active(obj) || - atomic_read(&obj->framebuffer_references))) + i915_gem_object_is_framebuffer(obj))) continue; if (!can_release_pages(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 46ade36dcee6..a0d6d4317a49 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -238,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (atomic_read(&obj->framebuffer_references)) + if (i915_gem_object_is_framebuffer(obj)) return -EBUSY; /* We need to rebind the object if its current allocation @@ -258,6 +258,12 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if (err) return err; + i915_gem_object_lock(obj); + if (i915_gem_object_is_framebuffer(obj)) { + i915_gem_object_unlock(obj); + return -EBUSY; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -294,6 +300,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } obj->tiling_and_stride = tiling | stride; + i915_gem_object_unlock(obj); /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6d2597a4a45f..9d8c08c4b105 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14185,7 +14185,10 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) drm_framebuffer_cleanup(fb); - WARN_ON(atomic_dec_return(&intel_fb->obj->framebuffer_references) < 0); + i915_gem_object_lock(intel_fb->obj); + WARN_ON(!intel_fb->obj->framebuffer_references--); + i915_gem_object_unlock(intel_fb->obj); + i915_gem_object_put(intel_fb->obj); kfree(intel_fb); @@ -14262,12 +14265,16 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - unsigned int tiling = i915_gem_object_get_tiling(obj); - u32 pitch_limit, stride_alignment; struct drm_format_name_buf format_name; + u32 pitch_limit, stride_alignment; + unsigned int tiling, stride; int ret = -EINVAL; - atomic_inc(&obj->framebuffer_references); + i915_gem_object_lock(obj); + obj->framebuffer_references++; + tiling = i915_gem_object_get_tiling(obj); + stride = i915_gem_object_get_stride(obj); + i915_gem_object_unlock(obj); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* @@ -14339,11 +14346,9 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, * If there's a fence, enforce that * the fb pitch and fence stride match. */ - if (tiling != I915_TILING_NONE && - mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { + if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], - i915_gem_object_get_stride(obj)); + mode_cmd->pitches[0], stride); goto err; } @@ -14424,7 +14429,9 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, return 0; err: - atomic_dec(&obj->framebuffer_references); + i915_gem_object_lock(obj); + obj->framebuffer_references--; + i915_gem_object_unlock(obj); return ret; } From 02e012f1727f9c8fe24872c74d91019e35e5897b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Mar 2017 12:11:31 +0000 Subject: [PATCH 0462/1299] drm/i915: Move w/a LRI debug message from context-init to driver load The spam of every context initialisation saying the same thing is annoying me! Move the information to the setup of the engine. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170301121131.11588-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_engine_cs.c | 38 +++++++++++++------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index c4d4698f98e7..c58339b22f6a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1009,30 +1009,32 @@ static int glk_init_workarounds(struct intel_engine_cs *engine) int init_workarounds_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + int err; WARN_ON(engine->id != RCS); dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[RCS] = 0; + dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; if (IS_BROADWELL(dev_priv)) - return bdw_init_workarounds(engine); - - if (IS_CHERRYVIEW(dev_priv)) - return chv_init_workarounds(engine); - - if (IS_SKYLAKE(dev_priv)) - return skl_init_workarounds(engine); - - if (IS_BROXTON(dev_priv)) - return bxt_init_workarounds(engine); - - if (IS_KABYLAKE(dev_priv)) - return kbl_init_workarounds(engine); - - if (IS_GEMINILAKE(dev_priv)) - return glk_init_workarounds(engine); + err = bdw_init_workarounds(engine); + else if (IS_CHERRYVIEW(dev_priv)) + err = chv_init_workarounds(engine); + else if (IS_SKYLAKE(dev_priv)) + err = skl_init_workarounds(engine); + else if (IS_BROXTON(dev_priv)) + err = bxt_init_workarounds(engine); + else if (IS_KABYLAKE(dev_priv)) + err = kbl_init_workarounds(engine); + else if (IS_GEMINILAKE(dev_priv)) + err = glk_init_workarounds(engine); + else + err = 0; + if (err) + return err; + DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", + engine->name, dev_priv->workarounds.count); return 0; } @@ -1066,8 +1068,6 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - return 0; } From a4b0e8a4e92b1baa860e744847fbdb84a50a5071 Mon Sep 17 00:00:00 2001 From: "Potomski, MichalX" Date: Thu, 23 Feb 2017 09:05:30 +0000 Subject: [PATCH 0463/1299] scsi: ufs: Factor out ufshcd_read_desc_param Since in UFS 2.1 specification some of the descriptor lengths differs from 2.0 specification and some devices, which are reporting spec version 2.0 have different descriptor lengths we can not rely on hardcoded values taken from 2.0 specification. This patch introduces reading these lengths per each device from descriptor headers at probe time to ensure their correctness. Signed-off-by: Michal' Potomski Reviewed-by: Subhash Jadavani Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufs.h | 22 ++-- drivers/scsi/ufs/ufshcd.c | 231 ++++++++++++++++++++++++++++---------- drivers/scsi/ufs/ufshcd.h | 15 +++ 3 files changed, 196 insertions(+), 72 deletions(-) diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 318e4a1f76c9..54deeb754db5 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -146,7 +146,7 @@ enum attr_idn { /* Descriptor idn for Query requests */ enum desc_idn { QUERY_DESC_IDN_DEVICE = 0x0, - QUERY_DESC_IDN_CONFIGURAION = 0x1, + QUERY_DESC_IDN_CONFIGURATION = 0x1, QUERY_DESC_IDN_UNIT = 0x2, QUERY_DESC_IDN_RFU_0 = 0x3, QUERY_DESC_IDN_INTERCONNECT = 0x4, @@ -162,19 +162,13 @@ enum desc_header_offset { QUERY_DESC_DESC_TYPE_OFFSET = 0x01, }; -enum ufs_desc_max_size { - QUERY_DESC_DEVICE_MAX_SIZE = 0x40, - QUERY_DESC_CONFIGURAION_MAX_SIZE = 0x90, - QUERY_DESC_UNIT_MAX_SIZE = 0x23, - QUERY_DESC_INTERCONNECT_MAX_SIZE = 0x06, - /* - * Max. 126 UNICODE characters (2 bytes per character) plus 2 bytes - * of descriptor header. - */ - QUERY_DESC_STRING_MAX_SIZE = 0xFE, - QUERY_DESC_GEOMETRY_MAX_SIZE = 0x44, - QUERY_DESC_POWER_MAX_SIZE = 0x62, - QUERY_DESC_RFU_MAX_SIZE = 0x00, +enum ufs_desc_def_size { + QUERY_DESC_DEVICE_DEF_SIZE = 0x40, + QUERY_DESC_CONFIGURATION_DEF_SIZE = 0x90, + QUERY_DESC_UNIT_DEF_SIZE = 0x23, + QUERY_DESC_INTERCONNECT_DEF_SIZE = 0x06, + QUERY_DESC_GEOMETRY_DEF_SIZE = 0x44, + QUERY_DESC_POWER_DEF_SIZE = 0x62, }; /* Unit descriptor parameters offsets in bytes*/ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index dc6efbd1be8e..1359913bf840 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -100,19 +100,6 @@ #define ufshcd_hex_dump(prefix_str, buf, len) \ print_hex_dump(KERN_ERR, prefix_str, DUMP_PREFIX_OFFSET, 16, 4, buf, len, false) -static u32 ufs_query_desc_max_size[] = { - QUERY_DESC_DEVICE_MAX_SIZE, - QUERY_DESC_CONFIGURAION_MAX_SIZE, - QUERY_DESC_UNIT_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, - QUERY_DESC_INTERCONNECT_MAX_SIZE, - QUERY_DESC_STRING_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, - QUERY_DESC_GEOMETRY_MAX_SIZE, - QUERY_DESC_POWER_MAX_SIZE, - QUERY_DESC_RFU_MAX_SIZE, -}; - enum { UFSHCD_MAX_CHANNEL = 0, UFSHCD_MAX_ID = 1, @@ -2857,7 +2844,7 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, goto out; } - if (*buf_len <= QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) { + if (*buf_len < QUERY_DESC_MIN_SIZE || *buf_len > QUERY_DESC_MAX_SIZE) { dev_err(hba->dev, "%s: descriptor buffer size (%d) is out of range\n", __func__, *buf_len); err = -EINVAL; @@ -2937,6 +2924,92 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba, return err; } +/** + * ufshcd_read_desc_length - read the specified descriptor length from header + * @hba: Pointer to adapter instance + * @desc_id: descriptor idn value + * @desc_index: descriptor index + * @desc_length: pointer to variable to read the length of descriptor + * + * Return 0 in case of success, non-zero otherwise + */ +static int ufshcd_read_desc_length(struct ufs_hba *hba, + enum desc_idn desc_id, + int desc_index, + int *desc_length) +{ + int ret; + u8 header[QUERY_DESC_HDR_SIZE]; + int header_len = QUERY_DESC_HDR_SIZE; + + if (desc_id >= QUERY_DESC_IDN_MAX) + return -EINVAL; + + ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC, + desc_id, desc_index, 0, header, + &header_len); + + if (ret) { + dev_err(hba->dev, "%s: Failed to get descriptor header id %d", + __func__, desc_id); + return ret; + } else if (desc_id != header[QUERY_DESC_DESC_TYPE_OFFSET]) { + dev_warn(hba->dev, "%s: descriptor header id %d and desc_id %d mismatch", + __func__, header[QUERY_DESC_DESC_TYPE_OFFSET], + desc_id); + ret = -EINVAL; + } + + *desc_length = header[QUERY_DESC_LENGTH_OFFSET]; + return ret; + +} + +/** + * ufshcd_map_desc_id_to_length - map descriptor IDN to its length + * @hba: Pointer to adapter instance + * @desc_id: descriptor idn value + * @desc_len: mapped desc length (out) + * + * Return 0 in case of success, non-zero otherwise + */ +int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, + enum desc_idn desc_id, int *desc_len) +{ + switch (desc_id) { + case QUERY_DESC_IDN_DEVICE: + *desc_len = hba->desc_size.dev_desc; + break; + case QUERY_DESC_IDN_POWER: + *desc_len = hba->desc_size.pwr_desc; + break; + case QUERY_DESC_IDN_GEOMETRY: + *desc_len = hba->desc_size.geom_desc; + break; + case QUERY_DESC_IDN_CONFIGURATION: + *desc_len = hba->desc_size.conf_desc; + break; + case QUERY_DESC_IDN_UNIT: + *desc_len = hba->desc_size.unit_desc; + break; + case QUERY_DESC_IDN_INTERCONNECT: + *desc_len = hba->desc_size.interc_desc; + break; + case QUERY_DESC_IDN_STRING: + *desc_len = QUERY_DESC_MAX_SIZE; + break; + case QUERY_DESC_IDN_RFU_0: + case QUERY_DESC_IDN_RFU_1: + *desc_len = 0; + break; + default: + *desc_len = 0; + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL(ufshcd_map_desc_id_to_length); + /** * ufshcd_read_desc_param - read the specified descriptor parameter * @hba: Pointer to adapter instance @@ -2951,42 +3024,49 @@ static int ufshcd_query_descriptor_retry(struct ufs_hba *hba, static int ufshcd_read_desc_param(struct ufs_hba *hba, enum desc_idn desc_id, int desc_index, - u32 param_offset, + u8 param_offset, u8 *param_read_buf, - u32 param_size) + u8 param_size) { int ret; u8 *desc_buf; - u32 buff_len; + int buff_len; bool is_kmalloc = true; - /* safety checks */ - if (desc_id >= QUERY_DESC_IDN_MAX) + /* Safety check */ + if (desc_id >= QUERY_DESC_IDN_MAX || !param_size) return -EINVAL; - buff_len = ufs_query_desc_max_size[desc_id]; - if ((param_offset + param_size) > buff_len) - return -EINVAL; + /* Get the max length of descriptor from structure filled up at probe + * time. + */ + ret = ufshcd_map_desc_id_to_length(hba, desc_id, &buff_len); - if (!param_offset && (param_size == buff_len)) { - /* memory space already available to hold full descriptor */ - desc_buf = param_read_buf; - is_kmalloc = false; - } else { - /* allocate memory to hold full descriptor */ + /* Sanity checks */ + if (ret || !buff_len) { + dev_err(hba->dev, "%s: Failed to get full descriptor length", + __func__); + return ret; + } + + /* Check whether we need temp memory */ + if (param_offset != 0 || param_size < buff_len) { desc_buf = kmalloc(buff_len, GFP_KERNEL); if (!desc_buf) return -ENOMEM; + } else { + desc_buf = param_read_buf; + is_kmalloc = false; } + /* Request for full descriptor */ ret = ufshcd_query_descriptor_retry(hba, UPIU_QUERY_OPCODE_READ_DESC, - desc_id, desc_index, 0, desc_buf, - &buff_len); + desc_id, desc_index, 0, + desc_buf, &buff_len); if (ret) { dev_err(hba->dev, "%s: Failed reading descriptor. desc_id %d, desc_index %d, param_offset %d, ret %d", __func__, desc_id, desc_index, param_offset, ret); - goto out; } @@ -2998,25 +3078,9 @@ static int ufshcd_read_desc_param(struct ufs_hba *hba, goto out; } - /* - * While reading variable size descriptors (like string descriptor), - * some UFS devices may report the "LENGTH" (field in "Transaction - * Specific fields" of Query Response UPIU) same as what was requested - * in Query Request UPIU instead of reporting the actual size of the - * variable size descriptor. - * Although it's safe to ignore the "LENGTH" field for variable size - * descriptors as we can always derive the length of the descriptor from - * the descriptor header fields. Hence this change impose the length - * match check only for fixed size descriptors (for which we always - * request the correct size as part of Query Request UPIU). - */ - if ((desc_id != QUERY_DESC_IDN_STRING) && - (buff_len != desc_buf[QUERY_DESC_LENGTH_OFFSET])) { - dev_err(hba->dev, "%s: desc_buf length mismatch: buff_len %d, buff_len(desc_header) %d", - __func__, buff_len, desc_buf[QUERY_DESC_LENGTH_OFFSET]); - ret = -EINVAL; - goto out; - } + /* Check wherher we will not copy more data, than available */ + if (is_kmalloc && param_size > buff_len) + param_size = buff_len; if (is_kmalloc) memcpy(param_read_buf, &desc_buf[param_offset], param_size); @@ -5919,8 +5983,8 @@ static int ufshcd_set_icc_levels_attr(struct ufs_hba *hba, u32 icc_level) static void ufshcd_init_icc_levels(struct ufs_hba *hba) { int ret; - int buff_len = QUERY_DESC_POWER_MAX_SIZE; - u8 desc_buf[QUERY_DESC_POWER_MAX_SIZE]; + int buff_len = hba->desc_size.pwr_desc; + u8 desc_buf[hba->desc_size.pwr_desc]; ret = ufshcd_read_power_desc(hba, desc_buf, buff_len); if (ret) { @@ -6017,11 +6081,10 @@ static int ufs_get_device_desc(struct ufs_hba *hba, { int err; u8 model_index; - u8 str_desc_buf[QUERY_DESC_STRING_MAX_SIZE + 1] = {0}; - u8 desc_buf[QUERY_DESC_DEVICE_MAX_SIZE]; + u8 str_desc_buf[QUERY_DESC_MAX_SIZE + 1] = {0}; + u8 desc_buf[hba->desc_size.dev_desc]; - err = ufshcd_read_device_desc(hba, desc_buf, - QUERY_DESC_DEVICE_MAX_SIZE); + err = ufshcd_read_device_desc(hba, desc_buf, hba->desc_size.dev_desc); if (err) { dev_err(hba->dev, "%s: Failed reading Device Desc. err = %d\n", __func__, err); @@ -6038,14 +6101,14 @@ static int ufs_get_device_desc(struct ufs_hba *hba, model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; err = ufshcd_read_string_desc(hba, model_index, str_desc_buf, - QUERY_DESC_STRING_MAX_SIZE, ASCII_STD); + QUERY_DESC_MAX_SIZE, ASCII_STD); if (err) { dev_err(hba->dev, "%s: Failed reading Product Name. err = %d\n", __func__, err); goto out; } - str_desc_buf[QUERY_DESC_STRING_MAX_SIZE] = '\0'; + str_desc_buf[QUERY_DESC_MAX_SIZE] = '\0'; strlcpy(dev_desc->model, (str_desc_buf + QUERY_DESC_HDR_SIZE), min_t(u8, str_desc_buf[QUERY_DESC_LENGTH_OFFSET], MAX_MODEL_LEN)); @@ -6251,6 +6314,51 @@ static void ufshcd_clear_dbg_ufs_stats(struct ufs_hba *hba) hba->req_abort_count = 0; } +static void ufshcd_init_desc_sizes(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_DEVICE, 0, + &hba->desc_size.dev_desc); + if (err) + hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_POWER, 0, + &hba->desc_size.pwr_desc); + if (err) + hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_INTERCONNECT, 0, + &hba->desc_size.interc_desc); + if (err) + hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_CONFIGURATION, 0, + &hba->desc_size.conf_desc); + if (err) + hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_UNIT, 0, + &hba->desc_size.unit_desc); + if (err) + hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE; + + err = ufshcd_read_desc_length(hba, QUERY_DESC_IDN_GEOMETRY, 0, + &hba->desc_size.geom_desc); + if (err) + hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE; +} + +static void ufshcd_def_desc_sizes(struct ufs_hba *hba) +{ + hba->desc_size.dev_desc = QUERY_DESC_DEVICE_DEF_SIZE; + hba->desc_size.pwr_desc = QUERY_DESC_POWER_DEF_SIZE; + hba->desc_size.interc_desc = QUERY_DESC_INTERCONNECT_DEF_SIZE; + hba->desc_size.conf_desc = QUERY_DESC_CONFIGURATION_DEF_SIZE; + hba->desc_size.unit_desc = QUERY_DESC_UNIT_DEF_SIZE; + hba->desc_size.geom_desc = QUERY_DESC_GEOMETRY_DEF_SIZE; +} + /** * ufshcd_probe_hba - probe hba to detect device and initialize * @hba: per-adapter instance @@ -6285,6 +6393,9 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) if (ret) goto out; + /* Init check for device descriptor sizes */ + ufshcd_init_desc_sizes(hba); + ret = ufs_get_device_desc(hba, &card); if (ret) { dev_err(hba->dev, "%s: Failed getting device info. err = %d\n", @@ -6320,6 +6431,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba) /* set the state as operational after switching to desired gear */ hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL; + /* * If we are in error handling context or in power management callbacks * context, no need to scan the host @@ -7774,6 +7886,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->mmio_base = mmio_base; hba->irq = irq; + /* Set descriptor lengths to specification defaults */ + ufshcd_def_desc_sizes(hba); + err = ufshcd_hba_init(hba); if (err) goto out_error; diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 7630600217a2..cdc8bd05f7df 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -220,6 +220,15 @@ struct ufs_dev_cmd { struct ufs_query query; }; +struct ufs_desc_size { + int dev_desc; + int pwr_desc; + int geom_desc; + int interc_desc; + int unit_desc; + int conf_desc; +}; + /** * struct ufs_clk_info - UFS clock related info * @list: list headed by hba->clk_list_head @@ -483,6 +492,7 @@ struct ufs_stats { * @clk_list_head: UFS host controller clocks list node head * @pwr_info: holds current power mode * @max_pwr_info: keeps the device max valid pwm + * @desc_size: descriptor sizes reported by device * @urgent_bkops_lvl: keeps track of urgent bkops level for device * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for * device is known or not. @@ -666,6 +676,7 @@ struct ufs_hba { bool is_urgent_bkops_lvl_checked; struct rw_semaphore clk_scaling_lock; + struct ufs_desc_size desc_size; }; /* Returns true if clocks can be gated. Otherwise false */ @@ -832,6 +843,10 @@ int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, bool *flag_res); int ufshcd_hold(struct ufs_hba *hba, bool async); void ufshcd_release(struct ufs_hba *hba); + +int ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id, + int *desc_length); + u32 ufshcd_get_local_unipro_ver(struct ufs_hba *hba); /* Wrapper functions for safely calling variant operations */ From c46f09175dabd5dd6a1507f36250bfa734a0156e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 1 Mar 2017 17:27:00 +0900 Subject: [PATCH 0464/1299] scsi: sd: Check for unaligned partial completion Commit ("mpt3sas: Force request partial completion alignment") was not considering the case of commands not operating on logical block size units (e.g. REQ_OP_ZONE_REPORT and its 64B aligned partial replies). In this case, forcing alignment of resid to the device logical block size can break the command result, e.g. in the case of REQ_OP_ZONE_REPORT, the exact number of zone reported by the device. Move the partial completion alignement check of mpt3sas to a generic implementation in sd_done(). The check is added within the default section of the initial req_op() switch case so that the report and reset zone commands are ignored. In addition, as sd_done() is not called for passthrough requests, resid corrections are not done as intended by the initial mpt3sas patch. Fixes: f2e767bb5d6e ("mpt3sas: Force request partial completion alignment") Cc: # v4.10 Signed-off-by: Damien Le Moal Acked-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 15 --------------- drivers/scsi/sd.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 46e866c36c8a..69c29c560575 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -4677,7 +4677,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) struct MPT3SAS_DEVICE *sas_device_priv_data; u32 response_code = 0; unsigned long flags; - unsigned int sector_sz; mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply); @@ -4742,20 +4741,6 @@ _scsih_io_done(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, u32 reply) } xfer_cnt = le32_to_cpu(mpi_reply->TransferCount); - - /* In case of bogus fw or device, we could end up having - * unaligned partial completion. We can force alignment here, - * then scsi-ml does not need to handle this misbehavior. - */ - sector_sz = scmd->device->sector_size; - if (unlikely(!blk_rq_is_passthrough(scmd->request) && sector_sz && - xfer_cnt % sector_sz)) { - sdev_printk(KERN_INFO, scmd->device, - "unaligned partial completion avoided (xfer_cnt=%u, sector_sz=%u)\n", - xfer_cnt, sector_sz); - xfer_cnt = round_down(xfer_cnt, sector_sz); - } - scsi_set_resid(scmd, scsi_bufflen(scmd) - xfer_cnt); if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) log_info = le32_to_cpu(mpi_reply->IOCLogInfo); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c7839f6c35cc..fb9b4d29af0b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1783,6 +1783,8 @@ static int sd_done(struct scsi_cmnd *SCpnt) { int result = SCpnt->result; unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt); + unsigned int sector_size = SCpnt->device->sector_size; + unsigned int resid; struct scsi_sense_hdr sshdr; struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk); struct request *req = SCpnt->request; @@ -1813,6 +1815,21 @@ static int sd_done(struct scsi_cmnd *SCpnt) scsi_set_resid(SCpnt, blk_rq_bytes(req)); } break; + default: + /* + * In case of bogus fw or device, we could end up having + * an unaligned partial completion. Check this here and force + * alignment. + */ + resid = scsi_get_resid(SCpnt); + if (resid & (sector_size - 1)) { + sd_printk(KERN_INFO, sdkp, + "Unaligned partial completion (resid=%u, sector_sz=%u)\n", + resid, sector_size); + resid = min(scsi_bufflen(SCpnt), + round_up(resid, sector_size)); + scsi_set_resid(SCpnt, resid); + } } if (result) { From 8893cf6cb1cf56334c05120e23092dbfc9423ebb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 1 Mar 2017 09:00:36 -0800 Subject: [PATCH 0465/1299] scsi: mpt3sas: Avoid sleeping in interrupt context Commit 669f044170d8 ("scsi: srp_transport: Move queuecommand() wait code to SCSI core") can make scsi_internal_device_block() sleep. However, the mpt3sas driver can call this function from an interrupt handler. Hence add a second argument to scsi_internal_device_block() that restores the old behavior of this function for the mpt3sas handler. The call chain that triggered an "IRQ handler enabled interrupts" complaint is as follows: _base_interrupt() -> _base_async_event() -> mpt3sas_scsih_event_callback() -> _scsih_check_topo_delete_events() -> _scsih_block_io_to_children_attached_directly() -> _scsih_block_io_device() -> _scsih_internal_device_block() -> scsi_internal_device_block() Reported-by: Omar Sandoval Signed-off-by: Bart Van Assche Cc: Omar Sandoval Cc: Hannes Reinecke Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Sathya Prakash Cc: Chaitra P B Cc: Suganath Prabu Subramani Cc: Sreekanth Reddy Cc: # v4.10+ Tested-by: Omar Sandoval Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.h | 3 --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 ++-- drivers/scsi/scsi_lib.c | 14 ++++++++++---- drivers/scsi/scsi_priv.h | 3 --- include/scsi/scsi_device.h | 4 ++++ 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 7fe7e6ed595b..8981806fb13f 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1442,9 +1442,6 @@ void mpt3sas_transport_update_links(struct MPT3SAS_ADAPTER *ioc, u64 sas_address, u16 handle, u8 phy_number, u8 link_rate); extern struct sas_function_template mpt3sas_transport_functions; extern struct scsi_transport_template *mpt3sas_transport_template; -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); /* trigger data externs */ void mpt3sas_send_trigger_data_event(struct MPT3SAS_ADAPTER *ioc, struct SL_WH_TRIGGERS_EVENT_DATA_T *event_data); diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 69c29c560575..919ba2bb15f1 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2859,7 +2859,7 @@ _scsih_internal_device_block(struct scsi_device *sdev, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r == -EINVAL) sdev_printk(KERN_WARNING, sdev, "device_block failed with return(%d) for handle(0x%04x)\n", @@ -2895,7 +2895,7 @@ _scsih_internal_device_unblock(struct scsi_device *sdev, "performing a block followed by an unblock\n", r, sas_device_priv_data->sas_target->handle); sas_device_priv_data->block = 1; - r = scsi_internal_device_block(sdev); + r = scsi_internal_device_block(sdev, false); if (r) sdev_printk(KERN_WARNING, sdev, "retried device_block " "failed with return(%d) for handle(0x%04x)\n", diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f5e45a252485..f41e6b84a1bd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2932,6 +2932,8 @@ EXPORT_SYMBOL(scsi_target_resume); /** * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state * @sdev: device to block + * @wait: Whether or not to wait until ongoing .queuecommand() / + * .queue_rq() calls have finished. * * Block request made by scsi lld's to temporarily stop all * scsi commands on the specified device. May sleep. @@ -2949,7 +2951,7 @@ EXPORT_SYMBOL(scsi_target_resume); * remove the rport mutex lock and unlock calls from srp_queuecommand(). */ int -scsi_internal_device_block(struct scsi_device *sdev) +scsi_internal_device_block(struct scsi_device *sdev, bool wait) { struct request_queue *q = sdev->request_queue; unsigned long flags; @@ -2969,12 +2971,16 @@ scsi_internal_device_block(struct scsi_device *sdev) * request queue. */ if (q->mq_ops) { - blk_mq_quiesce_queue(q); + if (wait) + blk_mq_quiesce_queue(q); + else + blk_mq_stop_hw_queues(q); } else { spin_lock_irqsave(q->queue_lock, flags); blk_stop_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_wait_for_queuecommand(sdev); + if (wait) + scsi_wait_for_queuecommand(sdev); } return 0; @@ -3036,7 +3042,7 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock); static void device_block(struct scsi_device *sdev, void *data) { - scsi_internal_device_block(sdev); + scsi_internal_device_block(sdev, true); } static int diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 99bfc985e190..f11bd102d6d5 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -188,8 +188,5 @@ static inline void scsi_dh_remove_device(struct scsi_device *sdev) { } */ #define SCSI_DEVICE_BLOCK_MAX_TIMEOUT 600 /* units in seconds */ -extern int scsi_internal_device_block(struct scsi_device *sdev); -extern int scsi_internal_device_unblock(struct scsi_device *sdev, - enum scsi_device_state new_state); #endif /* _SCSI_PRIV_H */ diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6f22b39f1b0c..080c7ce9bae8 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -472,6 +472,10 @@ static inline int scsi_device_created(struct scsi_device *sdev) sdev->sdev_state == SDEV_CREATED_BLOCK; } +int scsi_internal_device_block(struct scsi_device *sdev, bool wait); +int scsi_internal_device_unblock(struct scsi_device *sdev, + enum scsi_device_state new_state); + /* accessor functions for the SCSI parameters */ static inline int scsi_device_sync(struct scsi_device *sdev) { From 52e51f16407b7b34e26affb500a21e250d9fce0b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Mar 2017 19:04:35 +0000 Subject: [PATCH 0466/1299] efi/libstub: Treat missing SecureBoot variable as Secure Boot disabled The newly refactored code that infers the firmware's Secure Boot state prints the following error when the EFI variable 'SecureBoot' does not exist: EFI stub: ERROR: Could not determine UEFI Secure Boot status. However, this variable is only guaranteed to be defined on a system that is Secure Boot capable to begin with, and so it is not an error if it is missing. So report Secure Boot as being disabled in this case, without printing any error messages. Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1488395076-29712-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/secureboot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c index 6def402bf569..5da36e56b36a 100644 --- a/drivers/firmware/efi/libstub/secureboot.c +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -45,6 +45,8 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg) size = sizeof(secboot); status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid, NULL, &size, &secboot); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; if (status != EFI_SUCCESS) goto out_efi_err; @@ -78,7 +80,5 @@ enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg) out_efi_err: pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n"); - if (status == EFI_NOT_FOUND) - return efi_secureboot_mode_disabled; return efi_secureboot_mode_unknown; } From d1eb98143c56f24fef125f5bbed49ae0b52fb7d6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Mar 2017 19:05:54 +0000 Subject: [PATCH 0467/1299] efi/arm: Fix boot crash with CONFIG_CPUMASK_OFFSTACK=y On ARM and arm64, we use a dedicated mm_struct to map the UEFI Runtime Services regions, which allows us to map those regions on demand, and in a way that is guaranteed to be compatible with incoming kernels across kexec. As it turns out, we don't fully initialize the mm_struct in the same way as process mm_structs are initialized on fork(), which results in the following crash on ARM if CONFIG_CPUMASK_OFFSTACK=y is enabled: ... EFI Variables Facility v0.08 2004-May-17 Unable to handle kernel NULL pointer dereference at virtual address 00000000 [...] Process swapper/0 (pid: 1) ... __memzero() check_and_switch_context() virt_efi_get_next_variable() efivar_init() efivars_sysfs_init() do_one_initcall() ... This is due to a missing call to mm_init_cpumask(), so add it. Signed-off-by: Ard Biesheuvel Cc: # v4.5+ Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1488395154-29786-1-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/arm-runtime.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c index 349dc3e1e52e..974c5a31a005 100644 --- a/drivers/firmware/efi/arm-runtime.c +++ b/drivers/firmware/efi/arm-runtime.c @@ -65,6 +65,7 @@ static bool __init efi_virtmap_init(void) bool systab_found; efi_mm.pgd = pgd_alloc(&efi_mm); + mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); systab_found = false; From 4ec3dd89052a437304e1451733c989b8cec681af Mon Sep 17 00:00:00 2001 From: Zhao Yan Date: Thu, 2 Mar 2017 15:12:47 +0800 Subject: [PATCH 0468/1299] drm/i915/gvt: fix an error for F_RO flag the ro_mask is not stored into each mmio entry Fixes: 12d14cc43b34 ("drm/i915/gvt: Introduce a framework for tracking HW registers.") Signed-off-by: Zhao Yan Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 548aedfbd402..8e43395c748a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -121,6 +121,7 @@ static int new_mmio_info(struct intel_gvt *gvt, info->size = size; info->length = (i + 4) < end ? 4 : (end - i); info->addr_mask = addr_mask; + info->ro_mask = ro_mask; info->device = device; info->read = read ? read : intel_vgpu_default_mmio_read; info->write = write ? write : intel_vgpu_default_mmio_write; From 4c77b18cf8b7ab37c7d5737b4609010d2ceec5f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 11:24:35 +0100 Subject: [PATCH 0469/1299] sched/fair: Make select_idle_cpu() more aggressive Kitsunyan reported desktop latency issues on his Celeron 887 because of commit: 1b568f0aabf2 ("sched/core: Optimize SCHED_SMT") ... even though his CPU doesn't do SMT. The effect of running the SMT code on a !SMT part is basically a more aggressive select_idle_cpu(). Removing the avg condition fixed things for him. I also know FB likes this test gone, even though other workloads like having it. For now, take it out by default, until we get a better idea. Reported-by: kitsunyan Signed-off-by: Peter Zijlstra (Intel) Cc: Chris Mason Cc: Linus Torvalds Cc: Mike Galbraith Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- kernel/sched/features.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 274c747a01ce..b3ee10dd3e85 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5797,7 +5797,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t * Due to large variance we need a large fuzz factor; hackbench in * particularly is sensitive here. */ - if ((avg_idle / 512) < avg_cost) + if (sched_feat(SIS_AVG_CPU) && (avg_idle / 512) < avg_cost) return -1; time = local_clock(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 69631fa46c2f..1b3c8189b286 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -51,6 +51,11 @@ SCHED_FEAT(NONTASK_CAPACITY, true) */ SCHED_FEAT(TTWU_QUEUE, true) +/* + * When doing wakeups, attempt to limit superfluous scans of the LLC domain. + */ +SCHED_FEAT(SIS_AVG_CPU, false) + #ifdef HAVE_RT_PUSH_IPI /* * In order to avoid a thundering herd attack of CPUs that are From 0ba87bb27d66b78e278167ac7e20c66520b8a612 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 10:51:47 +0100 Subject: [PATCH 0470/1299] sched/core: Fix pick_next_task() for RT,DL Pavan noticed that the following commit: 49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class") ... broke RT,DL balancing by robbing them of the opportinty to do new-'idle' balancing when their last runnable task (on that runqueue) goes away. Reported-by: Pavan Kondeti Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: 49ee576809d8 ("sched/core: Optimize pick_next_task() for idle_sched_class") Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bbfb917a9b49..6699d43a8843 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3273,10 +3273,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) struct task_struct *p; /* - * Optimization: we know that if all tasks are in - * the fair class we can call that function directly: + * Optimization: we know that if all tasks are in the fair class we can + * call that function directly, but only if the @prev task wasn't of a + * higher scheduling class, because otherwise those loose the + * opportunity to pull in more work from other CPUs. */ - if (likely(rq->nr_running == rq->cfs.h_nr_running)) { + if (likely((prev->sched_class == &idle_sched_class || + prev->sched_class == &fair_sched_class) && + rq->nr_running == rq->cfs.h_nr_running)) { + p = fair_sched_class.pick_next_task(rq, prev, rf); if (unlikely(p == RETRY_TASK)) goto again; From f94c8d116997597fc00f0812b0ab9256e7b0c58f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 15:53:38 +0100 Subject: [PATCH 0471/1299] sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock() interface Wanpeng Li reported that since the following commit: acb04058de49 ("sched/clock: Fix hotplug crash") ... KVM always runs with unstable sched-clock even though KVM's kvm_clock _is_ stable. The problem is that we've tied clear_sched_clock_stable() to the TSC state, and overlooked that sched_clock() is a paravirt function. Solve this by doing two things: - tie the sched_clock() stable state more clearly to the TSC stable state for the normal (!paravirt) case. - only call clear_sched_clock_stable() when we mark TSC unstable when we use native_sched_clock(). The first means we can actually run with stable sched_clock in more situations then before, which is good. And since commit: 12907fbb1a69 ("sched/clock, clocksource: Add optional cs::mark_unstable() method") ... this should be reliable. Since any detection of TSC fail now results in marking the TSC unstable. Reported-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: acb04058de49 ("sched/clock: Fix hotplug crash") Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ---- arch/x86/kernel/cpu/centaur.c | 2 -- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/intel.c | 4 ---- arch/x86/kernel/cpu/transmeta.c | 2 -- arch/x86/kernel/tsc.c | 35 ++++++++++++++++++++++----------- 7 files changed, 23 insertions(+), 28 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4e95b2e0d95f..30d924ae5c34 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -555,10 +555,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 2c234a6d94c4..bad8ff078a21 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -104,8 +104,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - - clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c64ca5929cb5..9d98e2e15d54 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -86,7 +86,6 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1075,8 +1074,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); - else - clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 47416f959a48..31e679238e8d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -184,7 +184,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 017ecd3bb553..2388bafe5c37 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -161,10 +161,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c1ea5b999839..6a9ad73a2c54 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -15,8 +15,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - - clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2724dc82f992..911129fda2f9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -326,9 +326,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1111,8 +1118,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) { if (tsc_unstable) return; + tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1134,18 +1143,20 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) { + clocksource_mark_unstable(&clocksource_tsc); + } else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } } From 2b232e0c3b3a09f3e33750aa20e314f1b80e5361 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 09:40:11 +0000 Subject: [PATCH 0472/1299] locking/ww_mutex: Replace cpu_relax() with cond_resched() for tests When busy-spinning on a ww_mutex_trylock(), we depend upon the other thread advancing and releasing the lock. This can not happen on a single CPU unless we relinquish it: [ ] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [kworker/0:1:18] ... [ ] Call Trace: [ ] mutex_trylock() [ ] test_mutex_work+0x31/0x56 [ ] process_one_work+0x1b4/0x2f9 [ ] worker_thread+0x1b0/0x27c [ ] kthread+0xd1/0xd3 [ ] ret_from_fork+0x19/0x30 Reported-by: Fengguang Wu Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: f2a5fec17395 ("locking/ww_mutex: Begin kselftests for ww_mutex") Link: http://lkml.kernel.org/r/20170228094011.2595-1-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- kernel/locking/test-ww_mutex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index da6c9a34f62f..3eb39c588397 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -50,7 +50,7 @@ static void test_mutex_work(struct work_struct *work) if (mtx->flags & TEST_MTX_TRY) { while (!ww_mutex_trylock(&mtx->mutex)) - cpu_relax(); + cond_resched(); } else { ww_mutex_lock(&mtx->mutex, NULL); } @@ -88,7 +88,7 @@ static int __test_mutex(unsigned int flags) ret = -EINVAL; break; } - cpu_relax(); + cond_resched(); } while (time_before(jiffies, timeout)); } else { ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); From 7fb4a2cea6b18dab56d609530d077f168169ed6b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 16:23:30 +0100 Subject: [PATCH 0473/1299] locking/lockdep: Add nest_lock integrity test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Boqun reported that hlock->references can overflow. Add a debug test for that to generate a clear error when this happens. Without this, lockdep is likely to report a mysterious failure on unlock. Reported-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Linus Torvalds Cc: Nicolai Hähnle Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9812e5dd409e..c0ee8607c11e 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3260,10 +3260,17 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (depth) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { - if (hlock->references) + if (hlock->references) { + /* + * Check: unsigned int references:12, overflow. + */ + if (DEBUG_LOCKS_WARN_ON(hlock->references == (1 << 12)-1)) + return 0; + hlock->references++; - else + } else { hlock->references = 2; + } return 1; } From 857811a37129f5d2ba162d7be3986eff44724014 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 1 Mar 2017 23:01:38 +0800 Subject: [PATCH 0474/1299] locking/ww_mutex: Adjust the lock number for stress test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because there are only 12 bits in held_lock::references, so we only support 4095 nested lock held in the same time, adjust the lock number for ww_mutex stress test to kill one lockdep splat: [ ] [ BUG: bad unlock balance detected! ] [ ] kworker/u2:0/5 is trying to release lock (ww_class_mutex) at: [ ] ww_mutex_unlock() [ ] but there are no more locks to release! ... Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Chris Wilson Cc: Fengguang Wu Cc: Linus Torvalds Cc: Nicolai Hähnle Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170301150138.hdixnmafzfsox7nn@tardis.cn.ibm.com Signed-off-by: Ingo Molnar --- kernel/locking/test-ww_mutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 3eb39c588397..6b7abb334ca6 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -627,7 +627,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(4096, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); + ret = stress(4095, hweight32(STRESS_ALL)*ncpus, 1<<12, STRESS_ALL); if (ret) return ret; From bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2017 21:10:17 +0100 Subject: [PATCH 0475/1299] x86/hpet: Prevent might sleep splat on resume Sergey reported a might sleep warning triggered from the hpet resume path. It's caused by the call to disable_irq() from interrupt disabled context. The problem with the low level resume code is that it is not accounted as a special system_state like we do during the boot process. Calling the same code during system boot would not trigger the warning. That's inconsistent at best. In this particular case it's trivial to replace the disable_irq() with disable_hardirq() because this particular code path is solely used from system resume and the involved hpet interrupts can never be force threaded. Reported-and-tested-by: Sergey Senozhatsky Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Sergey Senozhatsky Cc: Borislav Petkov Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dc6ba5bda9fc..89ff7af2de50 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } From a746095c2db08d765222bf0058e76fb329d69e0e Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 1 Mar 2017 16:13:11 +0200 Subject: [PATCH 0476/1299] drm/i915: Enable DDI IO power domains in the DP MST path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 62b695662a24 ("drm/i915: Only enable DDI IO power domains after enabling DPLL") changed how the DDI IO power domains get enabled, but neglected the need to enable those domains when enabling a DP connector with MST enabled, leading to Kernel panic - not syncing: Timeout: Not all CPUs entered broadcast exception handler Fixes: 62b695662a24 ("drm/i915: Only enable DDI IO power domains after enabling DPLL") Cc: Imre Deak Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Ville Syrjälä Reported-by: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170301141318.3607-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_dp_mst.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index d94fd4b80963..a8334e18c4fc 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -163,6 +163,9 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, intel_ddi_clk_select(&intel_dig_port->base, pipe_config->shared_dpll); + intel_display_power_get(dev_priv, + intel_dig_port->ddi_io_power_domain); + intel_prepare_dp_ddi_buffers(&intel_dig_port->base); intel_dp_set_link_params(intel_dp, pipe_config->port_clock, From c998e8a0f43a9e73c65f6f3e969805090cf4ac93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 08:30:29 +0000 Subject: [PATCH 0477/1299] drm/i915: Hold rpm during GEM suspend in driver unload/suspend i915_gem_suspend() tries to access the device to ensure it is idle and all writes from the device are flushed to memory. It assumed is already held the runtime pm wakeref, but we should explicitly acquire it for our access to be safe. [ 619.926287] WARNING: CPU: 3 PID: 9353 at drivers/gpu/drm/i915/intel_drv.h:1750 gen6_write32+0x23e/0x2a0 [i915] [ 619.926300] RPM wakelock ref not held during HW access [ 619.926311] Modules linked in: vgem x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_codec coretemp snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul snd_pcm mei_me mei lpc_ich ghash_clmulni_intel i915(-) sdhci_pci sdhci mmc_core e1000e ptp pps_core prime_numbers [last unloaded: snd_hda_intel] [ 619.926578] CPU: 3 PID: 9353 Comm: drv_module_relo Tainted: G U 4.10.0-CI-Trybot_609+ #1 [ 619.926585] Hardware name: LENOVO 42962WU/42962WU, BIOS 8DET56WW (1.26 ) 12/01/2011 [ 619.926592] Call Trace: [ 619.926609] dump_stack+0x67/0x92 [ 619.926625] __warn+0xc6/0xe0 [ 619.926640] warn_slowpath_fmt+0x4a/0x50 [ 619.926726] gen6_write32+0x23e/0x2a0 [i915] [ 619.926801] gen6_mm_switch+0x38/0x70 [i915] [ 619.926871] i915_switch_context+0xec/0xa10 [i915] [ 619.926942] i915_gem_switch_to_kernel_context+0x13c/0x2b0 [i915] [ 619.927019] i915_gem_suspend+0x2b/0x180 [i915] [ 619.927079] i915_driver_unload+0x22/0x200 [i915] [ 619.927093] ? __this_cpu_preempt_check+0x13/0x20 [ 619.927105] ? trace_hardirqs_on_caller+0xe7/0x200 [ 619.927118] ? trace_hardirqs_on+0xd/0x10 [ 619.927128] ? _raw_spin_unlock_irqrestore+0x3d/0x60 [ 619.927192] i915_pci_remove+0x14/0x20 [i915] [ 619.927205] pci_device_remove+0x34/0xb0 [ 619.927219] device_release_driver_internal+0x158/0x210 [ 619.927234] driver_detach+0x3b/0x80 [ 619.927245] bus_remove_driver+0x53/0xd0 [ 619.927256] driver_unregister+0x27/0x50 [ 619.927267] pci_unregister_driver+0x25/0xa0 [ 619.927351] i915_exit+0x1a/0xb1a [i915] [ 619.927362] SyS_delete_module+0x193/0x1e0 [ 619.927378] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 619.927386] RIP: 0033:0x7f82b46c5d37 [ 619.927393] RSP: 002b:00007ffdb6f610d8 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 [ 619.927408] RAX: ffffffffffffffda RBX: ffffffff81481ff3 RCX: 00007f82b46c5d37 [ 619.927415] RDX: 0000000000000001 RSI: 0000000000000800 RDI: 000000000224f558 [ 619.927422] RBP: ffffc90001187f88 R08: 0000000000000000 R09: 00007ffdb6f61100 [ 619.927428] R10: 000000000224f4e0 R11: 0000000000000246 R12: 0000000000000000 [ 619.927435] R13: 00007ffdb6f612b0 R14: 0000000000000000 R15: 0000000000000000 [ 619.927451] ? __this_cpu_preempt_check+0x13/0x20 or [ 641.646590] WARNING: CPU: 1 PID: 8913 at drivers/gpu/drm/i915/intel_drv.h:1750 intel_runtime_pm_get_noresume+0x8b/0x90 [i915] [ 641.646595] RPM wakelock ref not held during HW access [ 641.646600] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul ghash_clmulni_intel snd_pcm mei_me mei i915(-) r8169 mii prime_numbers i2c_hid [last unloaded: snd_hda_intel] [ 641.646825] CPU: 1 PID: 8913 Comm: drv_module_relo Tainted: G U 4.10.0-CI-Trybot_609+ #1 [ 641.646836] Hardware name: TOSHIBA SATELLITE P50-C/06F4 , BIOS 1.20 10/08/2015 [ 641.646843] Call Trace: [ 641.646857] dump_stack+0x67/0x92 [ 641.646869] __warn+0xc6/0xe0 [ 641.646880] warn_slowpath_fmt+0x4a/0x50 [ 641.646893] ? __this_cpu_preempt_check+0x13/0x20 [ 641.646904] ? trace_hardirqs_on_caller+0xe7/0x200 [ 641.646957] intel_runtime_pm_get_noresume+0x8b/0x90 [i915] [ 641.647022] __i915_add_request+0x423/0x540 [i915] [ 641.647080] i915_gem_switch_to_kernel_context+0x148/0x2b0 [i915] [ 641.647145] i915_gem_suspend+0x2b/0x180 [i915] [ 641.647189] i915_driver_unload+0x22/0x200 [i915] [ 641.647200] ? __this_cpu_preempt_check+0x13/0x20 [ 641.647210] ? trace_hardirqs_on_caller+0xe7/0x200 [ 641.647220] ? trace_hardirqs_on+0xd/0x10 [ 641.647231] ? _raw_spin_unlock_irqrestore+0x3d/0x60 [ 641.647276] i915_pci_remove+0x14/0x20 [i915] [ 641.647293] pci_device_remove+0x34/0xb0 [ 641.647307] device_release_driver_internal+0x158/0x210 [ 641.647321] driver_detach+0x3b/0x80 [ 641.647330] bus_remove_driver+0x53/0xd0 [ 641.647338] driver_unregister+0x27/0x50 [ 641.647348] pci_unregister_driver+0x25/0xa0 [ 641.647415] i915_exit+0x1a/0xb1a [i915] [ 641.647429] SyS_delete_module+0x193/0x1e0 [ 641.647444] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 641.647453] RIP: 0033:0x7fc622bd2d37 [ 641.647463] RSP: 002b:00007ffff8ffb5c8 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 [ 641.647475] RAX: ffffffffffffffda RBX: ffffffff81481ff3 RCX: 00007fc622bd2d37 [ 641.647480] RDX: 0000000000000001 RSI: 0000000000000800 RDI: 0000000000d49118 [ 641.647485] RBP: ffffc90000997f88 R08: 0000000000000000 R09: 00007ffff8ffb5f0 [ 641.647491] R10: 0000000000d490a0 R11: 0000000000000246 R12: 0000000000000000 [ 641.647498] R13: 00007ffff8ffb7a0 R14: 0000000000000000 R15: 0000000000000000 [ 641.647510] ? __this_cpu_preempt_check+0x13/0x20 v2: Keep holding rpm until the end to cover i915_gem_sanitize() as well. Fixes: 5ab57c702069 ("drm/i915: Flush logical context image out to memory upon suspend") Fixes: 1c777c5d1dc ("drm/i915/hsw: Fix GPU hang during resume from S3-devices state") Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170302083029.19576-1-chris@chris-wilson.co.uk Reviewed-by: Imre Deak Cc: # v4.9+ --- drivers/gpu/drm/i915/i915_gem.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0ad080984877..275e9e0799b9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4244,6 +4244,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int ret; + intel_runtime_pm_get(dev_priv); intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); @@ -4258,13 +4259,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) */ ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) - goto err; + goto err_unlock; ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED); if (ret) - goto err; + goto err_unlock; i915_gem_retire_requests(dev_priv); GEM_BUG_ON(dev_priv->gt.active_requests); @@ -4310,11 +4311,12 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machine in an unusable condition. */ i915_gem_sanitize(dev_priv); + goto out_rpm_put; - return 0; - -err: +err_unlock: mutex_unlock(&dev->struct_mutex); +out_rpm_put: + intel_runtime_pm_put(dev_priv); return ret; } From 1f58c8e7eac0d4a7a59037dc18dbed2a9b5bd342 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 07:41:57 +0000 Subject: [PATCH 0478/1299] drm/i915: Restore the invalid access without RPM warning A long time ago we turned off the warning as it was too painful, we had too much broken code. Turn it back on now as we are mostly clean and need to prevent returning to such orangeness. Signed-off-by: Chris Wilson Cc: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170302074157.21631-2-chris@chris-wilson.co.uk Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_drv.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 741beba0c9c0..eadc72df5a17 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1739,10 +1739,8 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - /* FIXME: Needs to be converted back to WARN_ONCE, but currently causes - * too much noise. */ - if (!atomic_read(&dev_priv->pm.wakeref_count)) - DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access"); + WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + "RPM wakelock ref not held during HW access"); } /** From 13e867f6faaafe4d7fdaa4a894a061d041d0dc88 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 1 Mar 2017 11:58:55 -0800 Subject: [PATCH 0479/1299] i915/HuC: Add an extra check for platforms that do not have HUC Return silently without producing much noise on platforms that have a HuC but the firmware is absent. Cc: Ander Conselvan De Oliveira Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1488398335-13121-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c28543d220a2..e660109fc51e 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -181,12 +181,14 @@ void intel_huc_init(struct drm_i915_private *dev_priv) } huc_fw->path = fw_path; + + if (huc_fw->path == NULL) + return; + huc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("HuC firmware pending, path %s\n", fw_path); - WARN(huc_fw->path == NULL, "HuC present but no fw path\n"); - intel_uc_fw_fetch(dev_priv, huc_fw); } From 9ce53745edd515ebfc1d5f9bd3ded77dabce27fd Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 2 Mar 2017 00:01:22 +0530 Subject: [PATCH 0480/1299] drm/i915/glk: Fix DSI enable I/O sequence One of the if statement covers the next line in enable I/O sequence. This patch correct the same by adding error message. Fixes: 4644848369c0 ("drm/i915/glk: Add MIPIIO Enable/disable sequence") Reported-by: Chris Wilson Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488393082-30660-1-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index dd38b6835528..323fc097c3ee 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -425,6 +425,7 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) if (intel_wait_for_register(dev_priv, MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, GLK_ULPS_NOT_ACTIVE, 20)) + DRM_ERROR("ULPS is still active\n"); /* Exit ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); From a5509abda48ecfc133fac6268e83fc1a93dba039 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 17 Feb 2017 17:01:59 +0200 Subject: [PATCH 0481/1299] drm/i915: Fix legacy cursor vs. watermarks for ILK-BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make cursor updates actually safe wrt. watermark programming we have to clear the legacy_cursor_update flag in the atomic state. That will cause the regular atomic update path to do the necessary vblank wait after the plane update if needed, otherwise the vblank wait would be skipped and we'd feed the optimal watermarks to the hardware before the plane update has actually happened. To make the slow vs. fast path determination in intel_legacy_cursor_update() a little simpler we can ignore the actual visibility of the plane (which can only get computed once we've already chosen out path) and instead we simply check whether the fb is being set or cleared by the user. This means a fully clipped but logically visible cursor will be considered visible as far as watermark programming is concerned. We can do that for the cursor since it's a fixed size plane and the clipped size doesn't play a role in the watermark computation. This should fix underruns that can occur when the cursor gets enable/disabled or the size gets changed. Hopefully it's good enough that only pure cursor movement and flips go through unthrottled. Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Uwe Kleine-König Reported-by: Uwe Kleine-König Fixes: f79f26921ee1 ("drm/i915: Add a cursor hack to allow converting legacy page flip to atomic, v3.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217150159.11683-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Tested-by: Rafael Ristovski --- drivers/gpu/drm/i915/intel_display.c | 29 +++++++++++++++++----------- drivers/gpu/drm/i915/intel_pm.c | 22 ++++++++++++--------- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9d8c08c4b105..9908bba5b3d3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12955,6 +12955,17 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; + /* + * The intel_legacy_cursor_update() fast path takes care + * of avoiding the vblank waits for simple cursor + * movement and flips. For cursor on/off and size changes, + * we want to perform the vblank waits so that watermark + * updates happen during the correct frames. Gen9+ have + * double buffered watermarks and so shouldn't need this. + */ + if (INTEL_GEN(dev_priv) < 9) + state->legacy_cursor_update = false; + ret = drm_atomic_helper_setup_commit(state, nonblock); if (ret) return ret; @@ -13389,8 +13400,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, old_plane_state->src_h != src_h || old_plane_state->crtc_w != crtc_w || old_plane_state->crtc_h != crtc_h || - !old_plane_state->visible || - old_plane_state->fb->modifier != fb->modifier) + !old_plane_state->fb != !fb) goto slow; new_plane_state = intel_plane_duplicate_state(plane); @@ -13413,10 +13423,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - /* Visibility changed, must take slowpath. */ - if (!new_plane_state->visible) - goto slow_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); if (ret) goto out_free; @@ -13456,9 +13462,12 @@ intel_legacy_cursor_update(struct drm_plane *plane, new_plane_state->fb = old_fb; to_intel_plane_state(new_plane_state)->vma = old_vma; - intel_plane->update_plane(plane, - to_intel_crtc_state(crtc->state), - to_intel_plane_state(plane->state)); + if (plane->state->visible) + intel_plane->update_plane(plane, + to_intel_crtc_state(crtc->state), + to_intel_plane_state(plane->state)); + else + intel_plane->disable_plane(plane, crtc); intel_cleanup_plane_fb(plane, new_plane_state); @@ -13468,8 +13477,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, intel_plane_destroy_state(plane, new_plane_state); return ret; -slow_free: - intel_plane_destroy_state(plane, new_plane_state); slow: return drm_atomic_helper_update_plane(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c6938350a6c4..b00c95e4a81f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1841,20 +1841,24 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate, uint32_t mem_value) { - /* - * We treat the cursor plane as always-on for the purposes of watermark - * calculation. Until we have two-stage watermark programming merged, - * this is necessary to avoid flickering. - */ - int cpp = 4; - int width = pstate->base.visible ? pstate->base.crtc_w : 64; + int cpp; - if (!cstate->base.active) + /* + * Treat cursor with fb as always visible since cursor updates + * can happen faster than the vrefresh rate, and the current + * watermark code doesn't handle that correctly. Cursor updates + * which set/clear the fb or change the cursor size are going + * to get throttled by intel_legacy_cursor_update() to work + * around this problem with the watermark code. + */ + if (!cstate->base.active || !pstate->base.fb) return 0; + cpp = pstate->base.fb->format->cpp[0]; + return ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, - width, cpp, mem_value); + pstate->base.crtc_w, cpp, mem_value); } /* Only for WM_LP. */ From 528e649b5c79683202a0ccd22e33a41e35f81a0b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 24 Feb 2017 10:29:02 +0100 Subject: [PATCH 0482/1299] x86/platform/intel/iosf_mbi: Add a PMIC bus access notifier Some drivers may need to acquire P-Unit managed resources from interrupt context, where they cannot call iosf_mbi_punit_acquire(). This commit adds a notifier chain which allows a driver to get notified (in a process context) before other drivers start accessing the PMIC bus, so that the driver can acquire any resources, which it may need during the window the other driver is accessing the PMIC, before hand. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241 Signed-off-by: Hans de Goede Tested-by: tagorereddy Reviewed-by: Andy Shevchenko Signed-off-by: Daniel Vetter --- arch/x86/include/asm/iosf_mbi.h | 56 ++++++++++++++++++++++++++++++ arch/x86/platform/intel/iosf_mbi.c | 36 +++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h index f6119d0e8cba..c313cac36f56 100644 --- a/arch/x86/include/asm/iosf_mbi.h +++ b/arch/x86/include/asm/iosf_mbi.h @@ -5,6 +5,8 @@ #ifndef IOSF_MBI_SYMS_H #define IOSF_MBI_SYMS_H +#include + #define MBI_MCR_OFFSET 0xD0 #define MBI_MDR_OFFSET 0xD4 #define MBI_MCRX_OFFSET 0xD8 @@ -47,6 +49,10 @@ #define QRK_MBI_UNIT_MM 0x05 #define QRK_MBI_UNIT_SOC 0x31 +/* Action values for the pmic_bus_access_notifier functions */ +#define MBI_PMIC_BUS_ACCESS_BEGIN 1 +#define MBI_PMIC_BUS_ACCESS_END 2 + #if IS_ENABLED(CONFIG_IOSF_MBI) bool iosf_mbi_available(void); @@ -115,6 +121,38 @@ void iosf_mbi_punit_acquire(void); */ void iosf_mbi_punit_release(void); +/** + * iosf_mbi_register_pmic_bus_access_notifier - Register PMIC bus notifier + * + * This function can be used by drivers which may need to acquire P-Unit + * managed resources from interrupt context, where iosf_mbi_punit_acquire() + * can not be used. + * + * This function allows a driver to register a notifier to get notified (in a + * process context) before other drivers start accessing the PMIC bus. + * + * This allows the driver to acquire any resources, which it may need during + * the window the other driver is accessing the PMIC, before hand. + * + * @nb: notifier_block to register + */ +int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb); + +/** + * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier + * + * @nb: notifier_block to unregister + */ +int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb); + +/** + * iosf_mbi_call_pmic_bus_access_notifier_chain - Call PMIC bus notifier chain + * + * @val: action to pass into listener's notifier_call function + * @v: data pointer to pass into listener's notifier_call function + */ +int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v); + #else /* CONFIG_IOSF_MBI is not enabled */ static inline bool iosf_mbi_available(void) @@ -146,6 +184,24 @@ int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) static inline void iosf_mbi_punit_acquire(void) {} static inline void iosf_mbi_punit_release(void) {} +static inline +int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline +int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) +{ + return 0; +} + +static inline +int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v) +{ + return 0; +} + #endif /* CONFIG_IOSF_MBI */ #endif /* IOSF_MBI_SYMS_H */ diff --git a/arch/x86/platform/intel/iosf_mbi.c b/arch/x86/platform/intel/iosf_mbi.c index ed24fa9fb65d..a952ac199741 100644 --- a/arch/x86/platform/intel/iosf_mbi.c +++ b/arch/x86/platform/intel/iosf_mbi.c @@ -35,6 +35,7 @@ static struct pci_dev *mbi_pdev; static DEFINE_SPINLOCK(iosf_mbi_lock); static DEFINE_MUTEX(iosf_mbi_punit_mutex); +static BLOCKING_NOTIFIER_HEAD(iosf_mbi_pmic_bus_access_notifier); static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset) { @@ -203,6 +204,41 @@ void iosf_mbi_punit_release(void) } EXPORT_SYMBOL(iosf_mbi_punit_release); +int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb) +{ + int ret; + + /* Wait for the bus to go inactive before registering */ + mutex_lock(&iosf_mbi_punit_mutex); + ret = blocking_notifier_chain_register( + &iosf_mbi_pmic_bus_access_notifier, nb); + mutex_unlock(&iosf_mbi_punit_mutex); + + return ret; +} +EXPORT_SYMBOL(iosf_mbi_register_pmic_bus_access_notifier); + +int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb) +{ + int ret; + + /* Wait for the bus to go inactive before unregistering */ + mutex_lock(&iosf_mbi_punit_mutex); + ret = blocking_notifier_chain_unregister( + &iosf_mbi_pmic_bus_access_notifier, nb); + mutex_unlock(&iosf_mbi_punit_mutex); + + return ret; +} +EXPORT_SYMBOL(iosf_mbi_unregister_pmic_bus_access_notifier); + +int iosf_mbi_call_pmic_bus_access_notifier_chain(unsigned long val, void *v) +{ + return blocking_notifier_call_chain( + &iosf_mbi_pmic_bus_access_notifier, val, v); +} +EXPORT_SYMBOL(iosf_mbi_call_pmic_bus_access_notifier_chain); + #ifdef CONFIG_IOSF_MBI_DEBUG static u32 dbg_mdr; static u32 dbg_mcr; From 86524e54025f4ffb0a2e4966ad244424a080329f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:53 +0100 Subject: [PATCH 0483/1299] i2c: designware: Rename accessor_flags to flags Rename accessor_flags to flags, so that we can use the field for other flags too. This is a preparation patch for adding cherrytrail support to the punit semaphore code. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Tested-by: Takashi Iwai Acked-by: Wolfram Sang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-4-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-core.c | 14 +++++++------- drivers/i2c/busses/i2c-designware-core.h | 2 +- drivers/i2c/busses/i2c-designware-platdrv.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 6d81c56184d3..8c3ba426e553 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -177,13 +177,13 @@ static u32 dw_readl(struct dw_i2c_dev *dev, int offset) { u32 value; - if (dev->accessor_flags & ACCESS_16BIT) + if (dev->flags & ACCESS_16BIT) value = readw_relaxed(dev->base + offset) | (readw_relaxed(dev->base + offset + 2) << 16); else value = readl_relaxed(dev->base + offset); - if (dev->accessor_flags & ACCESS_SWAP) + if (dev->flags & ACCESS_SWAP) return swab32(value); else return value; @@ -191,10 +191,10 @@ static u32 dw_readl(struct dw_i2c_dev *dev, int offset) static void dw_writel(struct dw_i2c_dev *dev, u32 b, int offset) { - if (dev->accessor_flags & ACCESS_SWAP) + if (dev->flags & ACCESS_SWAP) b = swab32(b); - if (dev->accessor_flags & ACCESS_16BIT) { + if (dev->flags & ACCESS_16BIT) { writew_relaxed((u16)b, dev->base + offset); writew_relaxed((u16)(b >> 16), dev->base + offset + 2); } else { @@ -339,10 +339,10 @@ int i2c_dw_init(struct dw_i2c_dev *dev) reg = dw_readl(dev, DW_IC_COMP_TYPE); if (reg == ___constant_swab32(DW_IC_COMP_TYPE_VALUE)) { /* Configure register endianess access */ - dev->accessor_flags |= ACCESS_SWAP; + dev->flags |= ACCESS_SWAP; } else if (reg == (DW_IC_COMP_TYPE_VALUE & 0x0000ffff)) { /* Configure register access mode 16bit */ - dev->accessor_flags |= ACCESS_16BIT; + dev->flags |= ACCESS_16BIT; } else if (reg != DW_IC_COMP_TYPE_VALUE) { dev_err(dev->dev, "Unknown Synopsys component type: " "0x%08x\n", reg); @@ -926,7 +926,7 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) tx_aborted: if ((stat & (DW_IC_INTR_TX_ABRT | DW_IC_INTR_STOP_DET)) || dev->msg_err) complete(&dev->cmd_complete); - else if (unlikely(dev->accessor_flags & ACCESS_INTR_MASK)) { + else if (unlikely(dev->flags & ACCESS_INTR_MASK)) { /* workaround to trigger pending interrupt */ stat = dw_readl(dev, DW_IC_INTR_MASK); i2c_dw_disable_int(dev); diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 26250b425e2f..2c50571fb9c1 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -103,7 +103,7 @@ struct dw_i2c_dev { unsigned int status; u32 abort_source; int irq; - u32 accessor_flags; + u32 flags; struct i2c_adapter adapter; u32 functionality; u32 master_cfg; diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 6ce431323125..3eede7b0904b 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -112,7 +112,7 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev) id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev); if (id && id->driver_data) - dev->accessor_flags |= (u32)id->driver_data; + dev->flags |= (u32)id->driver_data; return 0; } From 62aee937535fcb7c5c23aafe5dfccd977bfe658e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:54 +0100 Subject: [PATCH 0484/1299] i2c: designware-baytrail: Pass dw_i2c_dev into helper functions Pass dw_i2c_dev into the helper functions, this is a preparation patch for the punit semaphore fixes done in the other patches in this set. Signed-off-by: Hans de Goede Reviewed-by: Takashi Iwai Tested-by: Takashi Iwai Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Acked-by: Wolfram Sang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-5-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-baytrail.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index 1590ad0a8081..a3f581cb1fba 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -28,14 +28,14 @@ static unsigned long acquired; -static int get_sem(struct device *dev, u32 *sem) +static int get_sem(struct dw_i2c_dev *dev, u32 *sem) { u32 data; int ret; ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, &data); if (ret) { - dev_err(dev, "iosf failed to read punit semaphore\n"); + dev_err(dev->dev, "iosf failed to read punit semaphore\n"); return ret; } @@ -44,18 +44,18 @@ static int get_sem(struct device *dev, u32 *sem) return 0; } -static void reset_semaphore(struct device *dev) +static void reset_semaphore(struct dw_i2c_dev *dev) { u32 data; if (iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, &data)) { - dev_err(dev, "iosf failed to reset punit semaphore during read\n"); + dev_err(dev->dev, "iosf failed to reset punit semaphore during read\n"); return; } data &= ~PUNIT_SEMAPHORE_BIT; if (iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, PUNIT_SEMAPHORE, data)) - dev_err(dev, "iosf failed to reset punit semaphore during write\n"); + dev_err(dev->dev, "iosf failed to reset punit semaphore during write\n"); } static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) @@ -83,7 +83,7 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) start = jiffies; end = start + msecs_to_jiffies(SEMAPHORE_TIMEOUT); do { - ret = get_sem(dev->dev, &sem); + ret = get_sem(dev, &sem); if (!ret && sem) { acquired = jiffies; dev_dbg(dev->dev, "punit semaphore acquired after %ums\n", @@ -95,7 +95,7 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) } while (time_before(jiffies, end)); dev_err(dev->dev, "punit semaphore timed out, resetting\n"); - reset_semaphore(dev->dev); + reset_semaphore(dev); ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, &sem); if (ret) @@ -116,7 +116,7 @@ static void baytrail_i2c_release(struct dw_i2c_dev *dev) if (!dev->acquire_lock) return; - reset_semaphore(dev->dev); + reset_semaphore(dev); dev_dbg(dev->dev, "punit semaphore held for %ums\n", jiffies_to_msecs(jiffies - acquired)); } From e234ed2f06fad95680710976bcddae91d7fb7af9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:55 +0100 Subject: [PATCH 0485/1299] i2c: designware-baytrail: Only check iosf_mbi_available() for shared hosts If (!shared_host) simply return 0, this avoids delaying the probe if iosf_mbi_available() returns false when an i2c bus is not using the punit semaphore. Also move the if (!iosf_mbi_available()) check to above the dev_info, so that we do not repeat the dev_info on every probe until iosf_mbi_available() returns true. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Tested-by: Takashi Iwai Acked-by: Wolfram Sang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-6-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-baytrail.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index a3f581cb1fba..cf022226b693 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -138,15 +138,16 @@ int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev) if (ACPI_FAILURE(status)) return 0; - if (shared_host) { - dev_info(dev->dev, "I2C bus managed by PUNIT\n"); - dev->acquire_lock = baytrail_i2c_acquire; - dev->release_lock = baytrail_i2c_release; - dev->pm_runtime_disabled = true; - } + if (!shared_host) + return 0; if (!iosf_mbi_available()) return -EPROBE_DEFER; + dev_info(dev->dev, "I2C bus managed by PUNIT\n"); + dev->acquire_lock = baytrail_i2c_acquire; + dev->release_lock = baytrail_i2c_release; + dev->pm_runtime_disabled = true; + return 0; } From 086cb4afef45262806ee5bf26c34244e5867712c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:56 +0100 Subject: [PATCH 0486/1299] i2c: designware-baytrail: Disallow the CPU to enter C6 or C7 while holding the punit semaphore On my cherrytrail tablet with axp288 pmic, just doing a bunch of repeated reads from the pmic, e.g. "i2cdump -y 14 0x34" would lookup the tablet in 1 - 3 runs guaranteed. This seems to be causes by the cpu trying to enter C6 or C7 while we hold the punit bus semaphore, at which point everything just hangs. Avoid this by disallowing the CPU to enter C6 or C7 before acquiring the punit bus semaphore. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=109051 Signed-off-by: Hans de Goede Tested-by: Takashi Iwai Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Acked-by: Wolfram Sang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-7-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-baytrail.c | 24 ++++++++++++++++++-- drivers/i2c/busses/i2c-designware-core.h | 9 ++++++-- drivers/i2c/busses/i2c-designware-platdrv.c | 4 +++- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index cf022226b693..650a700f215e 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -16,6 +16,7 @@ #include #include #include +#include #include @@ -56,6 +57,8 @@ static void reset_semaphore(struct dw_i2c_dev *dev) data &= ~PUNIT_SEMAPHORE_BIT; if (iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, PUNIT_SEMAPHORE, data)) dev_err(dev->dev, "iosf failed to reset punit semaphore during write\n"); + + pm_qos_update_request(&dev->pm_qos, PM_QOS_DEFAULT_VALUE); } static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) @@ -72,11 +75,18 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) if (!dev->release_lock) return 0; + /* + * Disallow the CPU to enter C6 or C7 state, entering these states + * requires the punit to talk to the pmic and if this happens while + * we're holding the semaphore, the SoC hangs. + */ + pm_qos_update_request(&dev->pm_qos, 0); + /* host driver writes to side band semaphore register */ ret = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, PUNIT_SEMAPHORE, sem); if (ret) { dev_err(dev->dev, "iosf punit semaphore request failed\n"); - return ret; + goto out; } /* host driver waits for bit 0 to be set in semaphore register */ @@ -95,6 +105,7 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) } while (time_before(jiffies, end)); dev_err(dev->dev, "punit semaphore timed out, resetting\n"); +out: reset_semaphore(dev); ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, &sem); @@ -121,7 +132,7 @@ static void baytrail_i2c_release(struct dw_i2c_dev *dev) jiffies_to_msecs(jiffies - acquired)); } -int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev) +int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev) { acpi_status status; unsigned long long shared_host = 0; @@ -149,5 +160,14 @@ int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev) dev->release_lock = baytrail_i2c_release; dev->pm_runtime_disabled = true; + pm_qos_add_request(&dev->pm_qos, PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); + return 0; } + +void i2c_dw_remove_lock_support(struct dw_i2c_dev *dev) +{ + if (dev->acquire_lock) + pm_qos_remove_request(&dev->pm_qos); +} diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 2c50571fb9c1..94a5fd193b40 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -23,6 +23,7 @@ */ #include +#include #define DW_IC_DEFAULT_FUNCTIONALITY (I2C_FUNC_I2C | \ I2C_FUNC_SMBUS_BYTE | \ @@ -75,6 +76,7 @@ * @fp_lcnt: fast plus LCNT value * @hs_hcnt: high speed HCNT value * @hs_lcnt: high speed LCNT value + * @pm_qos: pm_qos_request used while holding a hardware lock on the bus * @acquire_lock: function to acquire a hardware lock on the bus * @release_lock: function to release a hardware lock on the bus * @pm_runtime_disabled: true if pm runtime is disabled @@ -122,6 +124,7 @@ struct dw_i2c_dev { u16 fp_lcnt; u16 hs_hcnt; u16 hs_lcnt; + struct pm_qos_request pm_qos; int (*acquire_lock)(struct dw_i2c_dev *dev); void (*release_lock)(struct dw_i2c_dev *dev); bool pm_runtime_disabled; @@ -139,7 +142,9 @@ extern u32 i2c_dw_read_comp_param(struct dw_i2c_dev *dev); extern int i2c_dw_probe(struct dw_i2c_dev *dev); #if IS_ENABLED(CONFIG_I2C_DESIGNWARE_BAYTRAIL) -extern int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev); +extern int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev); +extern void i2c_dw_remove_lock_support(struct dw_i2c_dev *dev); #else -static inline int i2c_dw_eval_lock_support(struct dw_i2c_dev *dev) { return 0; } +static inline int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev) { return 0; } +static inline void i2c_dw_remove_lock_support(struct dw_i2c_dev *dev) {} #endif diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index 3eede7b0904b..d474db074b1a 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -238,7 +238,7 @@ static int dw_i2c_plat_probe(struct platform_device *pdev) return -EINVAL; } - r = i2c_dw_eval_lock_support(dev); + r = i2c_dw_probe_lock_support(dev); if (r) return r; @@ -307,6 +307,8 @@ static int dw_i2c_plat_remove(struct platform_device *pdev) if (!dev->pm_runtime_disabled) pm_runtime_disable(&pdev->dev); + i2c_dw_remove_lock_support(dev); + return 0; } From 519e23a7f51e853dbe5dc4abb193c0c0db303b3a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:57 +0100 Subject: [PATCH 0487/1299] i2c: designware-baytrail: Fix race when resetting the semaphore Use iosf_mbi_modify instead of iosf_mbi_read + iosf_mbi_write so that we keep the iosf_mbi_lock locked during the read-modify-write done to reset the semaphore. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Acked-by: Wolfram Sang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-8-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-baytrail.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index 650a700f215e..8df529c04f8e 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -47,15 +47,8 @@ static int get_sem(struct dw_i2c_dev *dev, u32 *sem) static void reset_semaphore(struct dw_i2c_dev *dev) { - u32 data; - - if (iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, &data)) { - dev_err(dev->dev, "iosf failed to reset punit semaphore during read\n"); - return; - } - - data &= ~PUNIT_SEMAPHORE_BIT; - if (iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, PUNIT_SEMAPHORE, data)) + if (iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, + 0, PUNIT_SEMAPHORE_BIT)) dev_err(dev->dev, "iosf failed to reset punit semaphore during write\n"); pm_qos_update_request(&dev->pm_qos, PM_QOS_DEFAULT_VALUE); From fd476fa22a1f432658b799b023f351f291f2db8b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:58 +0100 Subject: [PATCH 0488/1299] i2c: designware-baytrail: Add support for cherrytrail The cherrytrail punit has the pmic i2c bus access semaphore at a different register address. Signed-off-by: Hans de Goede Reviewed-by: Takashi Iwai Tested-by: Takashi Iwai Reviewed-by: Andy Shevchenko Acked-by: Jarkko Nikula Acked-by: Wolfram Sang Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-9-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-baytrail.c | 19 +++++++++++--- drivers/i2c/busses/i2c-designware-core.h | 2 ++ drivers/i2c/busses/i2c-designware-pcidrv.c | 26 ++++++++++++++------ drivers/i2c/busses/i2c-designware-platdrv.c | 2 +- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index 8df529c04f8e..3effc9a1778c 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -24,17 +24,27 @@ #define SEMAPHORE_TIMEOUT 100 #define PUNIT_SEMAPHORE 0x7 +#define PUNIT_SEMAPHORE_CHT 0x10e #define PUNIT_SEMAPHORE_BIT BIT(0) #define PUNIT_SEMAPHORE_ACQUIRE BIT(1) static unsigned long acquired; +static u32 get_sem_addr(struct dw_i2c_dev *dev) +{ + if (dev->flags & MODEL_CHERRYTRAIL) + return PUNIT_SEMAPHORE_CHT; + else + return PUNIT_SEMAPHORE; +} + static int get_sem(struct dw_i2c_dev *dev, u32 *sem) { + u32 addr = get_sem_addr(dev); u32 data; int ret; - ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, &data); + ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, addr, &data); if (ret) { dev_err(dev->dev, "iosf failed to read punit semaphore\n"); return ret; @@ -47,7 +57,7 @@ static int get_sem(struct dw_i2c_dev *dev, u32 *sem) static void reset_semaphore(struct dw_i2c_dev *dev) { - if (iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, + if (iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, get_sem_addr(dev), 0, PUNIT_SEMAPHORE_BIT)) dev_err(dev->dev, "iosf failed to reset punit semaphore during write\n"); @@ -56,6 +66,7 @@ static void reset_semaphore(struct dw_i2c_dev *dev) static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) { + u32 addr = get_sem_addr(dev); u32 sem = PUNIT_SEMAPHORE_ACQUIRE; int ret; unsigned long start, end; @@ -76,7 +87,7 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) pm_qos_update_request(&dev->pm_qos, 0); /* host driver writes to side band semaphore register */ - ret = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, PUNIT_SEMAPHORE, sem); + ret = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE, addr, sem); if (ret) { dev_err(dev->dev, "iosf punit semaphore request failed\n"); goto out; @@ -101,7 +112,7 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) out: reset_semaphore(dev); - ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, PUNIT_SEMAPHORE, &sem); + ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ, addr, &sem); if (ret) dev_err(dev->dev, "iosf failed to read punit semaphore\n"); else diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 94a5fd193b40..a8e74caf8df4 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -135,6 +135,8 @@ struct dw_i2c_dev { #define ACCESS_16BIT 0x00000002 #define ACCESS_INTR_MASK 0x00000004 +#define MODEL_CHERRYTRAIL 0x00000100 + extern int i2c_dw_init(struct dw_i2c_dev *dev); extern void i2c_dw_disable(struct dw_i2c_dev *dev); extern void i2c_dw_disable_int(struct dw_i2c_dev *dev); diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index d6423cfac588..ed485b69b449 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -45,6 +45,7 @@ enum dw_pci_ctl_id_t { medfield, merrifield, baytrail, + cherrytrail, haswell, }; @@ -63,6 +64,7 @@ struct dw_pci_controller { u32 rx_fifo_depth; u32 clk_khz; u32 functionality; + u32 flags; struct dw_scl_sda_cfg *scl_sda_cfg; int (*setup)(struct pci_dev *pdev, struct dw_pci_controller *c); }; @@ -170,6 +172,15 @@ static struct dw_pci_controller dw_pci_controllers[] = { .functionality = I2C_FUNC_10BIT_ADDR, .scl_sda_cfg = &hsw_config, }, + [cherrytrail] = { + .bus_num = -1, + .bus_cfg = INTEL_MID_STD_CFG | DW_IC_CON_SPEED_FAST, + .tx_fifo_depth = 32, + .rx_fifo_depth = 32, + .functionality = I2C_FUNC_10BIT_ADDR, + .flags = MODEL_CHERRYTRAIL, + .scl_sda_cfg = &byt_config, + }, }; #ifdef CONFIG_PM @@ -237,6 +248,7 @@ static int i2c_dw_pci_probe(struct pci_dev *pdev, dev->base = pcim_iomap_table(pdev)[0]; dev->dev = &pdev->dev; dev->irq = pdev->irq; + dev->flags |= controller->flags; if (controller->setup) { r = controller->setup(pdev, controller); @@ -317,13 +329,13 @@ static const struct pci_device_id i2_designware_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x9c61), haswell }, { PCI_VDEVICE(INTEL, 0x9c62), haswell }, /* Braswell / Cherrytrail */ - { PCI_VDEVICE(INTEL, 0x22C1), baytrail }, - { PCI_VDEVICE(INTEL, 0x22C2), baytrail }, - { PCI_VDEVICE(INTEL, 0x22C3), baytrail }, - { PCI_VDEVICE(INTEL, 0x22C4), baytrail }, - { PCI_VDEVICE(INTEL, 0x22C5), baytrail }, - { PCI_VDEVICE(INTEL, 0x22C6), baytrail }, - { PCI_VDEVICE(INTEL, 0x22C7), baytrail }, + { PCI_VDEVICE(INTEL, 0x22C1), cherrytrail }, + { PCI_VDEVICE(INTEL, 0x22C2), cherrytrail }, + { PCI_VDEVICE(INTEL, 0x22C3), cherrytrail }, + { PCI_VDEVICE(INTEL, 0x22C4), cherrytrail }, + { PCI_VDEVICE(INTEL, 0x22C5), cherrytrail }, + { PCI_VDEVICE(INTEL, 0x22C6), cherrytrail }, + { PCI_VDEVICE(INTEL, 0x22C7), cherrytrail }, { 0,} }; MODULE_DEVICE_TABLE(pci, i2_designware_pci_ids); diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index d474db074b1a..df0ff7d82b49 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -123,7 +123,7 @@ static const struct acpi_device_id dw_i2c_acpi_match[] = { { "INT3432", 0 }, { "INT3433", 0 }, { "80860F41", 0 }, - { "808622C1", 0 }, + { "808622C1", MODEL_CHERRYTRAIL }, { "AMD0010", ACCESS_INTR_MASK }, { "AMDI0010", ACCESS_INTR_MASK }, { "AMDI0510", 0 }, From 5b2cacceb7a877f5eea60ffc7bb6ccd62c4d51cc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:27:59 +0100 Subject: [PATCH 0489/1299] i2c: designware-baytrail: Acquire P-Unit access on bus acquire Acquire P-Unit access to stop others from accessing the P-Unit while the PMIC i2c bus is in use. This is necessary because accessing the P-Unit from the kernel may result in the P-Unit trying to access the PMIC i2c bus, which results in a hang when it happens while we own the PMIC i2c bus semaphore. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241 Signed-off-by: Hans de Goede Tested-by: tagorereddy Acked-by: Wolfram Sang Acked-by: Jarkko Nikula Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-10-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-baytrail.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index 3effc9a1778c..7eddc3b38852 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -62,6 +62,8 @@ static void reset_semaphore(struct dw_i2c_dev *dev) dev_err(dev->dev, "iosf failed to reset punit semaphore during write\n"); pm_qos_update_request(&dev->pm_qos, PM_QOS_DEFAULT_VALUE); + + iosf_mbi_punit_release(); } static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) @@ -79,6 +81,8 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) if (!dev->release_lock) return 0; + iosf_mbi_punit_acquire(); + /* * Disallow the CPU to enter C6 or C7 state, entering these states * requires the punit to talk to the pmic and if this happens while From d93a6ed370667be38c465c675229d3229fc8681e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:28:00 +0100 Subject: [PATCH 0490/1299] i2c: designware-baytrail: Call pmic_bus_access_notifier_chain Call the iosf_mbi pmic_bus_access_notifier_chain on bus acquire / release. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241 Signed-off-by: Hans de Goede Tested-by: tagorereddy Reviewed-by: Andy Shevchenko Acked-by: Wolfram Sang Acked-by: Jarkko Nikula Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-11-hdegoede@redhat.com --- drivers/i2c/busses/i2c-designware-baytrail.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/i2c/busses/i2c-designware-baytrail.c b/drivers/i2c/busses/i2c-designware-baytrail.c index 7eddc3b38852..1749a0f5a9fa 100644 --- a/drivers/i2c/busses/i2c-designware-baytrail.c +++ b/drivers/i2c/busses/i2c-designware-baytrail.c @@ -63,6 +63,8 @@ static void reset_semaphore(struct dw_i2c_dev *dev) pm_qos_update_request(&dev->pm_qos, PM_QOS_DEFAULT_VALUE); + iosf_mbi_call_pmic_bus_access_notifier_chain(MBI_PMIC_BUS_ACCESS_END, + NULL); iosf_mbi_punit_release(); } @@ -82,6 +84,8 @@ static int baytrail_i2c_acquire(struct dw_i2c_dev *dev) return 0; iosf_mbi_punit_acquire(); + iosf_mbi_call_pmic_bus_access_notifier_chain(MBI_PMIC_BUS_ACCESS_BEGIN, + NULL); /* * Disallow the CPU to enter C6 or C7 state, entering these states From 68f60946c1b6d1504af8994f4a419769217e4f23 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:28:01 +0100 Subject: [PATCH 0491/1299] drm/i915: Add intel_uncore_suspend / resume functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename intel_uncore_early_sanitize to intel_uncore_resume, dropping the (always true) restore_forcewake argument and add a new intel_uncore_resume function to replace the intel_uncore_forcewake_reset(dev_priv, false) calls done from the suspend / runtime_suspend functions and make intel_uncore_forcewake_reset private. This is a preparation patch for adding PMIC bus access notifier support. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241 Signed-off-by: Hans de Goede Tested-by: tagorereddy Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170210102802.20898-12-hdegoede@redhat.com --- drivers/gpu/drm/i915/i915_drv.c | 6 +++--- drivers/gpu/drm/i915/i915_drv.h | 6 ++---- drivers/gpu/drm/i915/intel_uncore.c | 14 +++++++++----- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f6017f2cfb86..58eb308fca41 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1456,7 +1456,7 @@ static int i915_drm_suspend(struct drm_device *dev) opregion_target_state = suspend_to_idle(dev_priv) ? PCI_D1 : PCI_D3cold; intel_opregion_notify_adapter(dev_priv, opregion_target_state); - intel_uncore_forcewake_reset(dev_priv, false); + intel_uncore_suspend(dev_priv); intel_opregion_unregister(dev_priv); intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true); @@ -1701,7 +1701,7 @@ static int i915_drm_resume_early(struct drm_device *dev) DRM_ERROR("Resume prepare failed: %d, continuing anyway\n", ret); - intel_uncore_early_sanitize(dev_priv, true); + intel_uncore_resume_early(dev_priv); if (IS_GEN9_LP(dev_priv)) { if (!dev_priv->suspended_to_idle) @@ -2343,7 +2343,7 @@ static int intel_runtime_suspend(struct device *kdev) return ret; } - intel_uncore_forcewake_reset(dev_priv, false); + intel_uncore_suspend(dev_priv); enable_rpm_wakeref_asserts(dev_priv); WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakeref_count)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e44c598ecb82..5f70546f7636 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2984,14 +2984,12 @@ int intel_irq_install(struct drm_i915_private *dev_priv); void intel_irq_uninstall(struct drm_i915_private *dev_priv); extern void intel_uncore_sanitize(struct drm_i915_private *dev_priv); -extern void intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, - bool restore_forcewake); extern void intel_uncore_init(struct drm_i915_private *dev_priv); extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv); extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv); extern void intel_uncore_fini(struct drm_i915_private *dev_priv); -extern void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, - bool restore); +extern void intel_uncore_suspend(struct drm_i915_private *dev_priv); +extern void intel_uncore_resume_early(struct drm_i915_private *dev_priv); const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id); void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, enum forcewake_domains domains); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..df84b04f215e 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -250,8 +250,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } -void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, - bool restore) +static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv, + bool restore) { unsigned long irqflags; struct intel_uncore_forcewake_domain *domain; @@ -427,10 +427,14 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, intel_uncore_forcewake_reset(dev_priv, restore_forcewake); } -void intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, - bool restore_forcewake) +void intel_uncore_suspend(struct drm_i915_private *dev_priv) { - __intel_uncore_early_sanitize(dev_priv, restore_forcewake); + intel_uncore_forcewake_reset(dev_priv, false); +} + +void intel_uncore_resume_early(struct drm_i915_private *dev_priv) +{ + __intel_uncore_early_sanitize(dev_priv, true); i915_check_and_clear_faults(dev_priv); } From 264ec1a8221c60f9ccf13f58ac597da21235132d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 10 Feb 2017 11:28:02 +0100 Subject: [PATCH 0492/1299] drm/i915: Listen for PMIC bus access notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Listen for PMIC bus access notifications and get FORCEWAKE_ALL while the bus is accessed to avoid needing to do any forcewakes, which need PMIC bus access, while the PMIC bus is busy: This fixes errors like these showing up in dmesg, usually followed by a gfx or system freeze: [drm:fw_domains_get [i915]] *ERROR* render: timed out waiting for forcewake ack request. [drm:fw_domains_get [i915]] *MEDIA* render: timed out waiting for forcewake ack request. i2c_designware 808622C1:06: punit semaphore timed out, resetting i2c_designware 808622C1:06: PUNIT SEM: 2 i2c_designware 808622C1:06: couldn't acquire bus ownership Downside of this approach is that it causes wakeups whenever the PMIC bus is accessed. Unfortunately we cannot simply wait for the PMIC bus to go idle when we hit a race, as forcewakes may be done from interrupt handlers where we cannot sleep to wait for the i2c PMIC bus access to finish. Note that the notifications and thus the wakeups will only happen on baytrail / cherrytrail devices using PMICs with a shared i2c bus for P-Unit and host PMIC access (i2c busses with a _SEM method in their APCI node), e.g. an axp288 PMIC. I plan to write some patches for drivers accessing the PMIC bus to limit their bus accesses to a bare minimum (e.g. cache registers, do not update battery level more often then 4 times a minute), to limit the amount of wakeups. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=155241 Signed-off-by: Hans de Goede Tested-by: tagorereddy Reviewed-by: Ville Syrjälä [danvet: Wiggle in conflicts.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_uncore.c | 39 +++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 183f5dc1c3f2..88e60c9291e4 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,7 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select IOSF_MBI help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5f70546f7636..19a51882adf5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -722,6 +722,7 @@ struct intel_uncore { const struct intel_forcewake_range *fw_domains_table; unsigned int fw_domains_table_entries; + struct notifier_block pmic_bus_access_nb; struct intel_uncore_funcs funcs; unsigned fifo_count; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index df84b04f215e..6106a54ecff5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -25,6 +25,7 @@ #include "intel_drv.h" #include "i915_vgpu.h" +#include #include #define FORCEWAKE_ACK_TIMEOUT_MS 50 @@ -429,12 +430,16 @@ static void __intel_uncore_early_sanitize(struct drm_i915_private *dev_priv, void intel_uncore_suspend(struct drm_i915_private *dev_priv) { + iosf_mbi_unregister_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); intel_uncore_forcewake_reset(dev_priv, false); } void intel_uncore_resume_early(struct drm_i915_private *dev_priv) { __intel_uncore_early_sanitize(dev_priv, true); + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); i915_check_and_clear_faults(dev_priv); } @@ -1390,6 +1395,32 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) dev_priv->uncore.fw_domains_table_entries = ARRAY_SIZE((d)); \ } +static int i915_pmic_bus_access_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct drm_i915_private *dev_priv = container_of(nb, + struct drm_i915_private, uncore.pmic_bus_access_nb); + + switch (action) { + case MBI_PMIC_BUS_ACCESS_BEGIN: + /* + * forcewake all now to make sure that we don't need to do a + * forcewake later which on systems where this notifier gets + * called requires the punit to access to the shared pmic i2c + * bus, which will be busy after this notification, leading to: + * "render: timed out waiting for forcewake ack request." + * errors. + */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + break; + case MBI_PMIC_BUS_ACCESS_END: + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + break; + } + + return NOTIFY_OK; +} + void intel_uncore_init(struct drm_i915_private *dev_priv) { i915_check_vgpu(dev_priv); @@ -1399,6 +1430,8 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) __intel_uncore_early_sanitize(dev_priv, false); dev_priv->uncore.unclaimed_mmio_check = 1; + dev_priv->uncore.pmic_bus_access_nb.notifier_call = + i915_pmic_bus_access_notifier; switch (INTEL_INFO(dev_priv)->gen) { default: @@ -1458,6 +1491,9 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) ASSIGN_READ_MMIO_VFUNCS(vgpu); } + iosf_mbi_register_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); + i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS @@ -1465,6 +1501,9 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) void intel_uncore_fini(struct drm_i915_private *dev_priv) { + iosf_mbi_unregister_pmic_bus_access_notifier( + &dev_priv->uncore.pmic_bus_access_nb); + /* Paranoia: make sure we have disabled everything before we exit. */ intel_uncore_sanitize(dev_priv); intel_uncore_forcewake_reset(dev_priv, false); From e60a870d7f83517e583e6094ff5646d3a8d732db Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 11:51:30 +0000 Subject: [PATCH 0493/1299] drm/i915: Assert that fence->lock is held in an irq-safe manner Everytime we take the fence->lock (aka request->lock), we must do so with irqs disabled since it may be used from within an hardirq context. As sometimes we are taking the lock in a nested manner, assert that the caller did disable the irqs for us. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170302115130.28434-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 5 ++++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 77c3ee7a3fd0..582471c00fce 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -400,12 +400,14 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; u32 seqno; + GEM_BUG_ON(!irqs_disabled()); + assert_spin_locked(&engine->timeline->lock); + trace_i915_gem_request_execute(request); /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); - assert_spin_locked(&timeline->lock); seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); @@ -446,6 +448,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; + GEM_BUG_ON(!irqs_disabled()); assert_spin_locked(&engine->timeline->lock); /* Only unwind in reverse order, required so that the per-context list diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5752f23fd289..ba54dd1949e8 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -653,6 +653,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) */ /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ + GEM_BUG_ON(!irqs_disabled()); assert_spin_locked(&request->lock); seqno = i915_gem_request_global_seqno(request); @@ -709,6 +710,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; + GEM_BUG_ON(!irqs_disabled()); assert_spin_locked(&request->lock); GEM_BUG_ON(!request->signaling.wait.seqno); From 675204153e82c1048886da419b411bb95e83c797 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 13:28:01 +0000 Subject: [PATCH 0494/1299] drm/i915: s/assert_spin_locked/lockdep_assert_held/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit assert_spin_locked() becomes an unconditionally compiled BUG_ON(), adding debug code right into the heart of critical routines like interrupt handlers. text data bss dec hex 1296480 19944 2272 1318696 141f28 before (lockdep disabled) 1295984 19944 2272 1318200 141d38 after 1336261 21139 3208 1360608 14c2e0 before (lockdep enabled) 1339920 21139 3208 1364267 14d12b after Small saving for release; hopefully more instructive in debug. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302132801.599-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 4 +-- drivers/gpu/drm/i915/i915_irq.c | 30 +++++++++++----------- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 12 ++++----- drivers/gpu/drm/i915/intel_fifo_underrun.c | 14 +++++----- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- drivers/gpu/drm/i915/intel_pipe_crc.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 8 +++--- drivers/gpu/drm/i915/intel_uncore.c | 4 +-- 9 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 582471c00fce..b881d4e1bd1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -401,7 +401,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) u32 seqno; GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline->lock); trace_i915_gem_request_execute(request); @@ -449,7 +449,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline->lock); /* Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 19892854707f..e0aa686f9c68 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -180,7 +180,7 @@ i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, { uint32_t val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(bits & ~mask); val = I915_READ(PORT_HOTPLUG_EN); @@ -222,7 +222,7 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -250,7 +250,7 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, uint32_t interrupt_mask, uint32_t enabled_irq_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -302,7 +302,7 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); new_val = dev_priv->pm_imr; new_val &= ~interrupt_mask; @@ -340,7 +340,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) { i915_reg_t reg = gen6_pm_iir(dev_priv); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); I915_WRITE(reg, reset_mask); I915_WRITE(reg, reset_mask); @@ -349,7 +349,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier |= enable_mask; I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier); @@ -359,7 +359,7 @@ void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier &= ~disable_mask; __gen6_mask_pm_irq(dev_priv, disable_mask); @@ -463,7 +463,7 @@ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, uint32_t new_val; uint32_t old_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -496,7 +496,7 @@ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -530,7 +530,7 @@ void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; @@ -546,7 +546,7 @@ __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -573,7 +573,7 @@ __i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -3399,7 +3399,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev) void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) return; @@ -3414,7 +3414,7 @@ void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (!dev_priv->display_irqs_enabled) return; @@ -4090,7 +4090,7 @@ static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_en; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); /* Note HDMI and DP share hotplug bits */ /* enable bits are the same for all generations */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a1b7eec58be2..8c121187ff39 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1008,7 +1008,7 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->perf.hook_lock); + lockdep_assert_held(&dev_priv->perf.hook_lock); if (dev_priv->perf.oa.exclusive_stream->enabled) { struct i915_gem_context *ctx = diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index ba54dd1949e8..235d4645a5cf 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -164,7 +164,7 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); if (b->irq_enabled) { irq_disable(engine); @@ -228,7 +228,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); if (b->irq_armed) return; @@ -276,7 +276,7 @@ static inline struct intel_wait *to_wait(struct rb_node *node) static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, struct intel_wait *wait) { - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); /* This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. @@ -432,7 +432,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, { struct intel_breadcrumbs *b = &engine->breadcrumbs; - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); if (RB_EMPTY_NODE(&wait->node)) goto out; @@ -654,7 +654,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&request->lock); + lockdep_assert_held(&request->lock); seqno = i915_gem_request_global_seqno(request); if (!seqno) @@ -711,7 +711,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) struct intel_breadcrumbs *b = &engine->breadcrumbs; GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&request->lock); + lockdep_assert_held(&request->lock); GEM_BUG_ON(!request->signaling.wait.seqno); spin_lock(&b->lock); diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index e660d8b4bbc3..f5ddacc90fde 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -54,7 +54,7 @@ static bool ivb_can_enable_err_int(struct drm_device *dev) struct intel_crtc *crtc; enum pipe pipe; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -72,7 +72,7 @@ static bool cpt_can_enable_serr_int(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -90,7 +90,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) i915_reg_t reg = PIPESTAT(crtc->pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0) return; @@ -109,7 +109,7 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (enable) { I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); @@ -139,7 +139,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; uint32_t err_int = I915_READ(GEN7_ERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((err_int & ERR_INT_FIFO_UNDERRUN(pipe)) == 0) return; @@ -204,7 +204,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) enum transcoder pch_transcoder = (enum transcoder) crtc->pipe; uint32_t serr_int = I915_READ(SERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) == 0) return; @@ -248,7 +248,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); bool old; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); old = !crtc->cpu_fifo_underrun_disabled; crtc->cpu_fifo_underrun_disabled = !enable; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 0756bdc672b3..ba763e7d7dcf 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -157,7 +157,7 @@ static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) enum hpd_pin pin; bool hpd_disabled = false; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); list_for_each_entry(connector, &mode_config->connector_list, head) { if (connector->polled != DRM_CONNECTOR_POLL_HPD) diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index c0b1f99da37b..c782b7878288 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -105,7 +105,7 @@ static int i915_pipe_crc_release(struct inode *inode, struct file *filep) static int pipe_crc_data_count(struct intel_pipe_crc *pipe_crc) { - assert_spin_locked(&pipe_crc->lock); + lockdep_assert_held(&pipe_crc->lock); return CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b00c95e4a81f..50e672f84276 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4672,7 +4672,7 @@ bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) { u16 rgvswctl; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); rgvswctl = I915_READ16(MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { @@ -6203,7 +6203,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) unsigned long now = jiffies_to_msecs(jiffies), diff1; int i; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); diff1 = now - dev_priv->ips.last_time1; @@ -6308,7 +6308,7 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) u64 now, diff, diffms; u32 count; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); now = ktime_get_raw_ns(); diffms = now - dev_priv->ips.last_time2; @@ -6353,7 +6353,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long t, corr, state1, corr2, state2; u32 pxvid, ext_v; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 441c51fd9746..b35b7a03eeaf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -506,7 +506,7 @@ void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_get) return; @@ -564,7 +564,7 @@ void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_put) return; From 25afdf89ad8c8751be25100b55ee7f09fddaa52e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 14:53:23 +0000 Subject: [PATCH 0495/1299] drm/i915/guc: Disable irq for __i915_guc_submit wq_lock __i915_guc_submit may be, despite my assertion, called from outside of an irq-safe spinlock so we need to use a full spin_lock_irqsave and not cheat using a spin_lock. (The initial notify callback from the completed fence is called before the spinlock is taken to wake up all waiters and call their callbacks.) [ 48.166581] kernel BUG at drivers/gpu/drm/i915/i915_guc_submission.c:527! [ 48.166617] invalid opcode: 0000 [#1] PREEMPT SMP [ 48.166644] Modules linked in: i915 prime_numbers x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me mei i2c_i801 netconsole i2c_hid [last unloaded: i915] [ 48.166733] CPU: 2 PID: 5 Comm: kworker/u8:0 Tainted: G U 4.10.0nightly-170302-guc_scrub+ #19 [ 48.166778] Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 [ 48.166835] Workqueue: i915 __intel_autoenable_gt_powersave [i915] [ 48.166865] task: ffff88084ab7cf40 task.stack: ffffc90000064000 [ 48.166921] RIP: 0010:__i915_guc_submit+0x1e6/0x2a0 [i915] [ 48.166953] RSP: 0018:ffffc90000067c80 EFLAGS: 00010202 [ 48.166979] RAX: 0000000000000202 RBX: ffff8808465e0c68 RCX: 0000000000000201 [ 48.167016] RDX: 0000000080000201 RSI: ffff88084ab7d798 RDI: ffff88082b8a8040 [ 48.167054] RBP: ffffc90000067cd8 R08: 0000000000000001 R09: 0000000000000000 [ 48.167085] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88082b8a8148 [ 48.167126] R13: 0000000000000000 R14: ffff88082f440000 R15: ffff88082e85e660 [ 48.167156] FS: 0000000000000000(0000) GS:ffff88086ed00000(0000) knlGS:0000000000000000 [ 48.167195] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 48.167226] CR2: 000055862ffcdc2c CR3: 0000000001e0f000 CR4: 00000000003406e0 [ 48.167257] Call Trace: [ 48.168112] ? trace_hardirqs_on+0xd/0x10 [ 48.168966] ? _raw_spin_unlock_irqrestore+0x4a/0x80 [ 48.169831] i915_guc_submit+0x1a/0x20 [i915] [ 48.170680] submit_notify+0x89/0xc0 [i915] [ 48.171512] __i915_sw_fence_complete+0x175/0x220 [i915] [ 48.172340] i915_sw_fence_complete+0x2a/0x50 [i915] [ 48.173158] i915_sw_fence_commit+0x21/0x30 [i915] [ 48.173968] __i915_add_request+0x238/0x530 [i915] [ 48.174764] __intel_autoenable_gt_powersave+0x8b/0xb0 [i915] [ 48.175549] process_one_work+0x218/0x690 [ 48.176318] ? process_one_work+0x197/0x690 [ 48.177183] worker_thread+0x4e/0x4a0 [ 48.178039] kthread+0x10c/0x140 [ 48.178878] ? process_one_work+0x690/0x690 [ 48.179718] ? kthread_create_on_node+0x40/0x40 [ 48.180568] ret_from_fork+0x31/0x40 [ 48.181423] Code: 02 00 00 43 89 84 ae 50 11 00 00 e8 75 01 62 e1 48 83 c4 30 5b 41 5c 41 5d 41 5e 41 5f 5d c3 48 c1 e0 20 48 09 c2 49 89 d0 eb 82 <0f> 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 0b 49 c1 e8 20 44 89 43 34 4a [ 48.183336] RIP: __i915_guc_submit+0x1e6/0x2a0 [i915] RSP: ffffc90000067c80 Reported-by: Arkadiusz Hiler Fixes: 349ab9192cc3 ("drm/i915/guc: Make wq_lock irq-safe") Fixes: 67b807a89230 ("drm/i915: Delay disabling the user interrupt for breadcrumbs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Arkadiusz Hiler Link: http://patchwork.freedesktop.org/patch/msgid/20170302145323.12886-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin Reviewed-by: Arkadiusz Hiler Tested-by: Arkadiusz Hiler --- drivers/gpu/drm/i915/i915_guc_submission.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7b535a32fc27..beb38e30d0e9 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -515,6 +515,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) unsigned int engine_id = engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; + unsigned long flags; int b_ret; /* WA to flush out the pending GMADR writes to ring buffer. */ @@ -523,10 +524,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) trace_i915_gem_request_in(rq, 0); - /* We are always called with irqs disabled */ - GEM_BUG_ON(!irqs_disabled()); - - spin_lock(&client->wq_lock); + spin_lock_irqsave(&client->wq_lock, flags); guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); @@ -539,7 +537,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; - spin_unlock(&client->wq_lock); + spin_unlock_irqrestore(&client->wq_lock, flags); } static void i915_guc_submit(struct drm_i915_gem_request *rq) From f73b567462e0f086bbff5ae22040ff1bc3d87487 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 15:03:56 +0000 Subject: [PATCH 0496/1299] drm/i915: Include GT/seqno activity in engine/hangcheck debugfs Whilst investigating some mysterious failures with hangcheck not running during gem_busy/basic-hang-default, the question is why did we decide to cancel the retire_work (which queues the hangcheck)? That decision is based around GT activity, so include that information in the debug report. v2: Include the GT awake status in the error state Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170302150356.9713-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++++++---- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1a28b5279bec..b84ee7a323d2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1361,14 +1361,17 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) } else seq_printf(m, "Hangcheck inactive\n"); + seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); + for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", + seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine)); + intel_engine_last_submit(engine), + engine->timeline->inflight_seqnos); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -3253,6 +3256,11 @@ static int i915_engine_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); + seq_printf(m, "GT awake? %s\n", + yesno(dev_priv->gt.awake)); + seq_printf(m, "Global active requests: %d\n", + dev_priv->gt.active_requests); + for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_gem_request *rq; @@ -3260,11 +3268,12 @@ static int i915_engine_info(struct seq_file *m, void *unused) u64 addr; seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", + seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 48b34a937792..d9f29d19285f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -934,6 +934,7 @@ struct i915_gpu_state { char error_msg[128]; bool simulated; + bool awake; int iommu; u32 reset_count; u32 suspend_count; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2b1d15668192..95797700384a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -632,6 +632,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, CSR_VERSION_MINOR(csr->version)); } + err_printf(m, "GT awake: %s\n", yesno(error->awake)); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); for (i = 0; i < error->ngtier; i++) @@ -1615,6 +1616,8 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { + error->awake = dev_priv->gt.awake; + error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU error->iommu = intel_iommu_gfx_mapped; From 7afbeb6df2aa5f9e3a0fc228817a85c16dea0faa Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 24 Feb 2017 12:56:12 +0100 Subject: [PATCH 0497/1299] s390/ipl: always use load normal for CCW-type re-IPL commit 14890678687c ("s390/ipl: use load normal for LPAR re-ipl") missed to convert one code path to use load normal semantics for re-IPL. Convert the missing code path as well. Fixes: 14890678687c ("s390/ipl: use load normal for LPAR re-ipl") Reported-by: Michael Holzheu Acked-by: Michael Holzheu Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index b67dafb7b7cf..e545ffe5155a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -564,6 +564,8 @@ static struct kset *ipl_kset; static void __ipl_run(void *unused) { + if (MACHINE_IS_LPAR && ipl_info.type == IPL_TYPE_CCW) + diag308(DIAG308_LOAD_NORMAL_DUMP, NULL); diag308(DIAG308_LOAD_CLEAR, NULL); if (MACHINE_IS_VM) __cpcmd("IPL", NULL, 0, NULL); From 2e4d88009f57057df7672fa69a32b5224af54d37 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 2 Mar 2017 15:23:42 +0100 Subject: [PATCH 0498/1299] KVM: s390: Fix guest migration for huge guests resulting in panic While we can technically not run huge page guests right now, we can setup a guest with huge pages. Trying to migrate it will trigger a VM_BUG_ON and, if the kernel is not configured to panic on a BUG, it will happily try to work on non-existing page table entries. With this patch, we always return "dirty" if we encounter a large page when migrating. This at least fixes the immediate problem until we have proper handling for both kind of pages. Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.") Cc: # 3.16+ Signed-off-by: Janosch Frank Acked-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index b48dc5f1900b..463e5ef02304 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -608,12 +608,29 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; pgste_t pgste; pte_t *ptep; pte_t pte; bool dirty; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + if (!pud) + return false; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return false; + /* We can't run guests backed by huge pages, but userspace can + * still set them up and then try to migrate them without any + * migration support. + */ + if (pmd_large(*pmd)) + return true; + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return false; From e5aac87eaed761f39e747e8711faadb7306df488 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 15:15:44 +0000 Subject: [PATCH 0499/1299] drm/i915: Include power-management state in gpu error dump Useful for double checking that the device is powered up when it hung, include both the status of the power management and our rpm wakelock. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170302151544.16915-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d9f29d19285f..430c40dd1622 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -935,6 +935,8 @@ struct i915_gpu_state { char error_msg[128]; bool simulated; bool awake; + bool wakelock; + bool suspended; int iommu; u32 reset_count; u32 suspend_count; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 95797700384a..061af8040498 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -633,6 +633,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } err_printf(m, "GT awake: %s\n", yesno(error->awake)); + err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); + err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); for (i = 0; i < error->ngtier; i++) @@ -1617,6 +1619,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { error->awake = dev_priv->gt.awake; + error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); + error->suspended = dev_priv->pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU From 5be6e33400992d3450e1c8234a5af353e1560580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 20 Feb 2017 16:04:43 +0200 Subject: [PATCH 0500/1299] drm/i915: Do .init_clock_gating() earlier to avoid it clobbering watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ILK-BDW explicitly disable LP1+ watermarks from their .init_clock_gating() hooks. Unfortunately that hook gets called way too late since by that time we've already initialized all the watermark state tracking which then gets out of sync with the hardware state. We may eventually want to consider killing off the explicit LP1+ disable from .init_clock_gating(). In the meantime however, we can avoid the problem by reordering the init sequence such that intel_modeset_init_hw()->intel_init_clock_gating() gets called prior to the hardware state takeover. I suppose prior to the two stage watermark programming we were magically saved by something that forced the watermarks to be reprogrammed fully after .init_clock_gating() got called. But now that no longer happens. Note that the diff might look a bit odd as it kills off one call of intel_update_cdclk(), but that's fine because intel_modeset_init_hw() does the exact same thing. Previously we just did it twice. Actually even this new init sequence is pretty bogus as .init_clock_gating() really should be called before any gem hardware init since it can configure various clock gating workarounds and whatnot that affect the GT side as well. Also intel_modeset_init() really should get split up into better defined init stages. Another "fun" detail is that intel_modeset_gem_init() is where RPS/RC6 gets configured. Why that is done from the display code is beyond me. I've decided to leave all this be for now, and just try to fix the init sequence enough for watermarks to work. Cc: stable@vger.kernel.org Cc: Gabriele Mazzotta Cc: David Purton Cc: Matt Roper Cc: Maarten Lankhorst Reported-by: Gabriele Mazzotta Reported-by: David Purton Tested-by: Gabriele Mazzotta Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96645 Fixes: ed4a6a7ca853 ("drm/i915: Add two-stage ILK-style watermark programming (v11)") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170220140443.30891-1-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9908bba5b3d3..5edf1ff8b688 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14970,12 +14970,11 @@ int intel_modeset_init(struct drm_device *dev) } } - intel_update_czclk(dev_priv); - intel_update_cdclk(dev_priv); - dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; - intel_shared_dpll_init(dev); + intel_update_czclk(dev_priv); + intel_modeset_init_hw(dev); + if (dev_priv->max_cdclk_freq == 0) intel_update_max_cdclk(dev_priv); @@ -15555,8 +15554,6 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev_priv); - intel_modeset_init_hw(dev); - intel_setup_overlay(dev_priv); } From c8659efac544b7df1106022d78617a1ca1d9a38c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 12:25:25 +0000 Subject: [PATCH 0501/1299] drm/i915: Drop spinlocks around adding to the client request list Adding to the tail of the client request list as the only other user is in the throttle ioctl that iterates forwards over the list. It only needs protection against deletion of a request as it reads it, it simply won't see a new request added to the end of the list, or it would be too early and rejected. We can further reduce the number of spinlocks required when throttling by removing stale requests from the client_list as we throttle. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170302122525.19675-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 14 ++++----- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++++++--- drivers/gpu/drm/i915/i915_gem_request.c | 34 ++++------------------ drivers/gpu/drm/i915/i915_gem_request.h | 4 +-- 5 files changed, 24 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b84ee7a323d2..15a76096fbb0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -506,7 +506,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mutex_lock(&dev->struct_mutex); request = list_first_entry_or_null(&file_priv->mm.request_list, struct drm_i915_gem_request, - client_list); + client_link); rcu_read_lock(); task = pid_task(request && request->ctx->pid ? request->ctx->pid : file->pid, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 275e9e0799b9..edc59bd45f53 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3666,16 +3666,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return -EIO; spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) { + list_for_each_entry(request, &file_priv->mm.request_list, client_link) { if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - /* - * Note that the request might not have been submitted yet. - * In which case emitted_jiffies will be zero. - */ - if (!request->emitted_jiffies) - continue; + if (target) { + list_del(&target->client_link); + target->file_priv = NULL; + } target = request; } @@ -4734,7 +4732,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) * file_priv. */ spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) + list_for_each_entry(request, &file_priv->mm.request_list, client_link) request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index aa75ea2d0578..dd7181ed5eca 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1407,6 +1407,14 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, return vma; } +static void +add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + req->file_priv = file->driver_priv; + list_add_tail(&req->client_link, &req->file_priv->mm.request_list); +} + static int execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, @@ -1772,10 +1780,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ params->request->batch = params->batch; - ret = i915_gem_request_add_to_client(params->request, file); - if (ret) - goto err_request; - /* * Save assorted stuff away to pass through to *_submission(). * NB: This data should be 'persistent' and not local as it will @@ -1793,6 +1797,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + add_to_client(params->request, file); + if (out_fence) { if (ret == 0) { fd_install(out_fence_fd, out_fence->file); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b881d4e1bd1e..c2cee9674cb0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -82,42 +82,20 @@ const struct dma_fence_ops i915_fence_ops = { .release = i915_fence_release, }; -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_private *dev_private; - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - dev_private = req->i915; - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - return 0; -} - static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { - struct drm_i915_file_private *file_priv = request->file_priv; + struct drm_i915_file_private *file_priv; + file_priv = request->file_priv; if (!file_priv) return; spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; + if (request->file_priv) { + list_del(&request->client_link); + request->file_priv = NULL; + } spin_unlock(&file_priv->mm.lock); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0efee879df23..5018e55922f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -180,7 +180,7 @@ struct drm_i915_gem_request { struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ - struct list_head client_list; + struct list_head client_link; }; extern const struct dma_fence_ops i915_fence_ops; @@ -193,8 +193,6 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) struct drm_i915_gem_request * __must_check i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); static inline struct drm_i915_gem_request * From e148bd17f48bd17fca2f4f089ec879fa6e47e34c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:42 +0530 Subject: [PATCH 0502/1299] powerpc: Emulation support for load/store instructions on LE emulate_step() uses a number of underlying kernel functions that were initially not enabled for LE. This has been rectified since. So, fix emulate_step() for LE for the corresponding instructions. Cc: stable@vger.kernel.org # v3.18+ Reported-by: Anton Blanchard Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 846dba2c6360..9c542ec70c5b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif From 4ceae137bdab2232e57b2e6fd5631a22a0160938 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:43 +0530 Subject: [PATCH 0503/1299] powerpc: emulate_step() tests for load/store instructions Add new selftest that test emulate_step for Normal, Floating Point, Vector and Vector Scalar - load/store instructions. Test should run at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set. Sample log: emulate_step_test: ld : PASS emulate_step_test: lwz : PASS emulate_step_test: lwzx : PASS emulate_step_test: std : PASS emulate_step_test: ldarx / stdcx. : PASS emulate_step_test: lfsx : PASS emulate_step_test: stfsx : PASS emulate_step_test: lfdx : PASS emulate_step_test: stfdx : PASS emulate_step_test: lvx : PASS emulate_step_test: stvx : PASS emulate_step_test: lxvd2x : PASS emulate_step_test: stxvd2x : PASS Signed-off-by: Ravi Bangoria [mpe: Drop start/complete lines, make it all __init] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 7 + arch/powerpc/lib/Makefile | 1 + arch/powerpc/lib/test_emulate_step.c | 434 ++++++++++++++++++++++++++ 3 files changed, 442 insertions(+) create mode 100644 arch/powerpc/lib/test_emulate_step.c diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d99bd442aacb..e7d6d86563ee 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -284,6 +284,13 @@ #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa #define PPC_INST_STBCIX 0x7c0007aa +#define PPC_INST_LWZX 0x7c00002e +#define PPC_INST_LFSX 0x7c00042e +#define PPC_INST_STFSX 0x7c00052e +#define PPC_INST_LFDX 0x7c0004ae +#define PPC_INST_STFDX 0x7c0005ae +#define PPC_INST_LVX 0x7c0000ce +#define PPC_INST_STVX 0x7c0001ce /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0e649d72fe8d..2b5e09020cfe 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -20,6 +20,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..2534c1447554 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,434 @@ +/* + * Simple sanity test for emulate_step load/store instructions. + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include +#include +#include + +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) + +/* + * Defined with TEST_ prefix so it does not conflict with other + * definitions. + */ +#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ + ___PPC_RA(base) | ((i) & 0xfffc)) +#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) + + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *ins, char *result) +{ + pr_info("%-14s : %s\n", ins, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + + if (stepped == 1) + show_result("lxvd2x", "PASS"); + else + show_result("lxvd2x", "FAIL"); + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stxvd2x", "PASS"); + else + show_result("stxvd2x", "FAIL"); +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static int __init test_emulate_step(void) +{ + test_ld(); + test_lwz(); + test_lwzx(); + test_std(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_lfdx_stfdx(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + + return 0; +} +late_initcall(test_emulate_step); From 7a70d7288c926ae88e0c773fbb506aa374e99c2d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 27 Feb 2017 14:32:41 +1100 Subject: [PATCH 0504/1299] powerpc/64: Invalidate process table caching after setting process table The POWER9 MMU reads and caches entries from the process table. When we kexec from one kernel to another, the second kernel sets its process table pointer but doesn't currently do anything to make the CPU invalidate any cached entries from the old process table. This adds a tlbie (TLB invalidate entry) instruction with parameters to invalidate caching of the process table after the new process table is installed. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable-radix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index feeda90cd06d..01c94f141164 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) From 424f8acd328a111319ae30bf384e5dfb9bc8f8cb Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Mon, 27 Feb 2017 11:10:07 +0530 Subject: [PATCH 0505/1299] powerpc/powernv: Fix bug due to labeling ambiguity in power_enter_stop Commit 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") added additional code in power_enter_stop() to distinguish between stop requests whose PSSCR had ESL=EC=1 from those which did not. When ESL=EC=1, we do a forward-jump to a location labelled by "1", which had the code to handle the ESL=EC=1 case. Unfortunately just a couple of instructions before this label, is the macro IDLE_STATE_ENTER_SEQ() which also has a label "1" in its expansion. As a result, the current code can result in directly executing stop instruction for deep stop requests with PSSCR ESL=EC=1, without saving the hypervisor state. Fix this BUG by labeling the location that handles ESL=EC=1 case with a more descriptive label ".Lhandle_esl_ec_set" (local label suggestion a la .Lxx from Anton Blanchard). While at it, rename the label "2" labelling the location of the code handling entry into deep stop states with ".Lhandle_deep_stop". For a good measure, change the label in IDLE_STATE_ENTER_SEQ() macro to an not-so commonly used value in order to avoid similar mishaps in the future. Fixes: 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 4 ++-- arch/powerpc/kernel/idle_book3s.S | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index fd321eb423cb..155731557c9b 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmpd cr0,r0,r0; \ - bne 1b; \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ IDLE_INST; \ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5f61cc0349c0..995728736677 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -276,19 +276,21 @@ power_enter_stop: */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne 1f + bne .Lhandle_esl_ec_set IDLE_STATE_ENTER_SEQ(PPC_STOP) li r3,0 /* Since we didn't lose state, return 0 */ b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: /* * Check if the requested state is a deep idle state. */ -1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f + bge .Lhandle_deep_stop IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -2: +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to From a6d8a21596df041f36f4c2ccc260c459e3e851f1 Mon Sep 17 00:00:00 2001 From: Sachin Sant Date: Sun, 26 Feb 2017 11:38:39 +0530 Subject: [PATCH 0506/1299] selftest/powerpc: Fix false failures for skipped tests Tests under alignment subdirectory are skipped when executed on previous generation hardware, but harness still marks them as failed. test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [FAIL] The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till the program exit which causes the test to be marked as failed. This patch resets the value before returning to the main() routine. With this patch the test o/p is as follows: test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [PASS] Signed-off-by: Sachin Sant Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/harness.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; From 4dc831aa88132f835cefe876aa0206977c4d7710 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2016 13:46:20 +1100 Subject: [PATCH 0507/1299] powerpc: Fix compiling a BE kernel with a powerpc64le toolchain GCC can compile with either endian, but the default ABI version is set based on the default endianness of the toolchain. Alan Modra says: you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc generate powerpc64 code The opposite is true for powerpc64 when generating -mlittle it requires -mabi=elfv2 to generate v2 ABI, which we were already doing. This change adds ABI annotations together with endianness for all cases, LE and BE. This fixes the case of building a BE kernel with a toolchain that is LE by default. Signed-off-by: Nicholas Piggin Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 31286fa7873c..19b0d1a81959 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) From 3fb66a70a4ae886445743354e4b60e54058bb3ff Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 16 Feb 2017 09:11:29 -0600 Subject: [PATCH 0508/1299] powerpc/booke: Fix boot crash due to null hugepd On 32-bit book-e machines, hugepd_ok() no longer takes into account null hugepd values, causing this crash at boot: Unable to handle kernel paging request for data at address 0x80000000 ... NIP [c0018378] follow_huge_addr+0x38/0xf0 LR [c001836c] follow_huge_addr+0x2c/0xf0 Call Trace: follow_huge_addr+0x2c/0xf0 (unreliable) follow_page_mask+0x40/0x3e0 __get_user_pages+0xc8/0x450 get_user_pages_remote+0x8c/0x250 copy_strings+0x110/0x390 copy_strings_kernel+0x2c/0x50 do_execveat_common+0x478/0x630 do_execve+0x2c/0x40 try_to_run_init_process+0x18/0x60 kernel_init+0xbc/0x110 ret_from_kernel_thread+0x5c/0x64 This impacts all nxp (ex-freescale) 32-bit booke platforms. This was caused by the change of hugepd_t.pd from signed to unsigned, and the update to the nohash version of hugepd_ok(). Previously hugepd_ok() could exclude all non-huge and NULL pgds using > 0, whereas now we need to explicitly check that the value is not zero and also that PD_HUGE is *clear*. This isn't protected by the pgd_none() check in __find_linux_pte_or_hugepte() because on 32-bit we use pgtable-nopud.h, which causes the pgd_none() check to be always false. Fixes: 20717e1ff526 ("powerpc/mm: Fix little-endian 4K hugetlb") Cc: stable@vger.kernel.org # v4.7+ Reported-by: Madalin-Cristian Bucur Signed-off-by: Laurentiu Tudor [mpe: Flesh out change log details.] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 0cd8a3852763..e5805ad78e12 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd) return ((hpd_val(hpd) & 0x4) != 0); #else /* We clear the top bit to indicate hugepd */ - return ((hpd_val(hpd) & PD_HUGE) == 0); + return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); #endif } From 8352aea3c3f44ea9065b77d3baedc58921710d49 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 09:00:56 +0000 Subject: [PATCH 0509/1299] drm/i915: Differentiate between hangcheck waiting for timer or scheduler Check timer_pending() as well as work_pending() to see if the timer for the hangcheck has already expired and the work is pending execution on some list somewhere. v2: Use a more compact if-chain Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303090056.19973-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 15a76096fbb0..70acbbf50c75 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1354,12 +1354,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_put(dev_priv); - if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) { - seq_printf(m, "Hangcheck active, fires in %dms\n", + if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) + seq_printf(m, "Hangcheck active, timer fires in %dms\n", jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - jiffies)); - } else - seq_printf(m, "Hangcheck inactive\n"); + else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) + seq_puts(m, "Hangcheck active, work pending\n"); + else + seq_puts(m, "Hangcheck inactive\n"); seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); From 0265634eb9e04a16ae99941c320718c38eb865e0 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Sat, 25 Feb 2017 08:28:16 -0300 Subject: [PATCH 0510/1299] [media] serial_ir: ensure we're ready to receive interrupts When the interrupt requested with devm_request_irq(), serial_ir.rcdev is still null so will cause null deference if the irq handler is called early on. Also ensure that timeout_timer is setup. Link: http://lkml.kernel.org/r/CA+55aFxsh2uF8gi5sN_guY3Z+tiLv7LpJYKBw+y8vqLzp+TsnQ@mail.gmail.com [mchehab@s-opensource.com: moved serial_ir_probe() back to its original place] Cc: # 4.10 Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/serial_ir.c | 123 ++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 61 deletions(-) diff --git a/drivers/media/rc/serial_ir.c b/drivers/media/rc/serial_ir.c index 923fb2299553..41b54e40176c 100644 --- a/drivers/media/rc/serial_ir.c +++ b/drivers/media/rc/serial_ir.c @@ -487,10 +487,69 @@ static void serial_ir_timeout(unsigned long arg) ir_raw_event_handle(serial_ir.rcdev); } +/* Needed by serial_ir_probe() */ +static int serial_ir_tx(struct rc_dev *dev, unsigned int *txbuf, + unsigned int count); +static int serial_ir_tx_duty_cycle(struct rc_dev *dev, u32 cycle); +static int serial_ir_tx_carrier(struct rc_dev *dev, u32 carrier); +static int serial_ir_open(struct rc_dev *rcdev); +static void serial_ir_close(struct rc_dev *rcdev); + static int serial_ir_probe(struct platform_device *dev) { + struct rc_dev *rcdev; int i, nlow, nhigh, result; + rcdev = devm_rc_allocate_device(&dev->dev, RC_DRIVER_IR_RAW); + if (!rcdev) + return -ENOMEM; + + if (hardware[type].send_pulse && hardware[type].send_space) + rcdev->tx_ir = serial_ir_tx; + if (hardware[type].set_send_carrier) + rcdev->s_tx_carrier = serial_ir_tx_carrier; + if (hardware[type].set_duty_cycle) + rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle; + + switch (type) { + case IR_HOMEBREW: + rcdev->input_name = "Serial IR type home-brew"; + break; + case IR_IRDEO: + rcdev->input_name = "Serial IR type IRdeo"; + break; + case IR_IRDEO_REMOTE: + rcdev->input_name = "Serial IR type IRdeo remote"; + break; + case IR_ANIMAX: + rcdev->input_name = "Serial IR type AnimaX"; + break; + case IR_IGOR: + rcdev->input_name = "Serial IR type IgorPlug"; + break; + } + + rcdev->input_phys = KBUILD_MODNAME "/input0"; + rcdev->input_id.bustype = BUS_HOST; + rcdev->input_id.vendor = 0x0001; + rcdev->input_id.product = 0x0001; + rcdev->input_id.version = 0x0100; + rcdev->open = serial_ir_open; + rcdev->close = serial_ir_close; + rcdev->dev.parent = &serial_ir.pdev->dev; + rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER; + rcdev->driver_name = KBUILD_MODNAME; + rcdev->map_name = RC_MAP_RC6_MCE; + rcdev->min_timeout = 1; + rcdev->timeout = IR_DEFAULT_TIMEOUT; + rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; + rcdev->rx_resolution = 250000; + + serial_ir.rcdev = rcdev; + + setup_timer(&serial_ir.timeout_timer, serial_ir_timeout, + (unsigned long)&serial_ir); + result = devm_request_irq(&dev->dev, irq, serial_ir_irq_handler, share_irq ? IRQF_SHARED : 0, KBUILD_MODNAME, &hardware); @@ -516,9 +575,6 @@ static int serial_ir_probe(struct platform_device *dev) return -EBUSY; } - setup_timer(&serial_ir.timeout_timer, serial_ir_timeout, - (unsigned long)&serial_ir); - result = hardware_init_port(); if (result < 0) return result; @@ -552,7 +608,8 @@ static int serial_ir_probe(struct platform_device *dev) sense ? "low" : "high"); dev_dbg(&dev->dev, "Interrupt %d, port %04x obtained\n", irq, io); - return 0; + + return devm_rc_register_device(&dev->dev, rcdev); } static int serial_ir_open(struct rc_dev *rcdev) @@ -723,7 +780,6 @@ static void serial_ir_exit(void) static int __init serial_ir_init_module(void) { - struct rc_dev *rcdev; int result; switch (type) { @@ -754,63 +810,9 @@ static int __init serial_ir_init_module(void) sense = !!sense; result = serial_ir_init(); - if (result) - return result; - - rcdev = devm_rc_allocate_device(&serial_ir.pdev->dev, RC_DRIVER_IR_RAW); - if (!rcdev) { - result = -ENOMEM; - goto serial_cleanup; - } - - if (hardware[type].send_pulse && hardware[type].send_space) - rcdev->tx_ir = serial_ir_tx; - if (hardware[type].set_send_carrier) - rcdev->s_tx_carrier = serial_ir_tx_carrier; - if (hardware[type].set_duty_cycle) - rcdev->s_tx_duty_cycle = serial_ir_tx_duty_cycle; - - switch (type) { - case IR_HOMEBREW: - rcdev->input_name = "Serial IR type home-brew"; - break; - case IR_IRDEO: - rcdev->input_name = "Serial IR type IRdeo"; - break; - case IR_IRDEO_REMOTE: - rcdev->input_name = "Serial IR type IRdeo remote"; - break; - case IR_ANIMAX: - rcdev->input_name = "Serial IR type AnimaX"; - break; - case IR_IGOR: - rcdev->input_name = "Serial IR type IgorPlug"; - break; - } - - rcdev->input_phys = KBUILD_MODNAME "/input0"; - rcdev->input_id.bustype = BUS_HOST; - rcdev->input_id.vendor = 0x0001; - rcdev->input_id.product = 0x0001; - rcdev->input_id.version = 0x0100; - rcdev->open = serial_ir_open; - rcdev->close = serial_ir_close; - rcdev->dev.parent = &serial_ir.pdev->dev; - rcdev->allowed_protocols = RC_BIT_ALL_IR_DECODER; - rcdev->driver_name = KBUILD_MODNAME; - rcdev->map_name = RC_MAP_RC6_MCE; - rcdev->min_timeout = 1; - rcdev->timeout = IR_DEFAULT_TIMEOUT; - rcdev->max_timeout = 10 * IR_DEFAULT_TIMEOUT; - rcdev->rx_resolution = 250000; - - serial_ir.rcdev = rcdev; - - result = rc_register_device(rcdev); - if (!result) return 0; -serial_cleanup: + serial_ir_exit(); return result; } @@ -818,7 +820,6 @@ static int __init serial_ir_init_module(void) static void __exit serial_ir_exit_module(void) { del_timer_sync(&serial_ir.timeout_timer); - rc_unregister_device(serial_ir.rcdev); serial_ir_exit(); } From 2e1e4949f9dfb053122785cd73540bb1e61f768b Mon Sep 17 00:00:00 2001 From: Qi Hou Date: Fri, 3 Mar 2017 15:57:11 +0800 Subject: [PATCH 0511/1299] i2c: add missing of_node_put in i2c_mux_del_adapters Refcount of of_node is increased with of_node_get() in i2c_mux_add_adapter(). It must be decreased with of_node_put() in i2c_mux_del_adapters(). Cc: stable@vger.kernel.org Signed-off-by: Qi Hou Reviewed-by: Zhang Xiao Signed-off-by: Peter Rosin --- drivers/i2c/i2c-mux.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c index 83768e85a919..2178266bca79 100644 --- a/drivers/i2c/i2c-mux.c +++ b/drivers/i2c/i2c-mux.c @@ -429,6 +429,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) while (muxc->num_adapters) { struct i2c_adapter *adap = muxc->adapter[--muxc->num_adapters]; struct i2c_mux_priv *priv = adap->algo_data; + struct device_node *np = adap->dev.of_node; muxc->adapter[muxc->num_adapters] = NULL; @@ -438,6 +439,7 @@ void i2c_mux_del_adapters(struct i2c_mux_core *muxc) sysfs_remove_link(&priv->adap.dev.kobj, "mux_device"); i2c_del_adapter(adap); + of_node_put(np); kfree(priv); } } From db5b15b74ed9a5c04bb808d18ffa2c773f5c18c0 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Mon, 13 Feb 2017 10:35:44 -0200 Subject: [PATCH 0512/1299] [media] lirc: fix dead lock between open and wakeup_filter The locking in lirc needs improvement, but for now just fix this potential deadlock. ====================================================== [ INFO: possible circular locking dependency detected ] 4.10.0-rc1+ #1 Not tainted ------------------------------------------------------- bash/2502 is trying to acquire lock: (ir_raw_handler_lock){+.+.+.}, at: [] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] but task is already holding lock: (&dev->lock){+.+.+.}, at: [] store_filter+0x9f/0x240 [rc_core] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&dev->lock){+.+.+.}: [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] rc_open+0x2a/0x80 [rc_core] [] lirc_dev_fop_open+0xda/0x1e0 [lirc_dev] [] chrdev_open+0xb0/0x210 [] do_dentry_open+0x20a/0x2f0 [] vfs_open+0x4c/0x80 [] path_openat+0x5bc/0xc00 [] do_filp_open+0x91/0x100 [] do_sys_open+0x130/0x220 [] SyS_open+0x1e/0x20 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 -> #1 (lirc_dev_lock){+.+.+.}: [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] lirc_register_driver+0x67/0x59b [lirc_dev] [] ir_lirc_register+0x1f4/0x260 [ir_lirc_codec] [] ir_raw_handler_register+0x7c/0xb0 [rc_core] [] 0xffffffffc0398010 [] do_one_initcall+0x52/0x1b0 [] do_init_module+0x5f/0x1fa [] load_module+0x2675/0x2b00 [] SYSC_finit_module+0xdf/0x110 [] SyS_finit_module+0xe/0x10 [] do_syscall_64+0x6c/0x1f0 [] return_from_SYSCALL_64+0x0/0x7a -> #0 (ir_raw_handler_lock){+.+.+.}: [] __lock_acquire+0x10f7/0x1290 [] lock_acquire+0xfd/0x200 [] mutex_lock_nested+0x77/0x6d0 [] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] [] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] [] store_filter+0x1aa/0x240 [rc_core] [] dev_attr_store+0x18/0x30 [] sysfs_kf_write+0x45/0x60 [] kernfs_fop_write+0x155/0x1e0 [] __vfs_write+0x37/0x160 [] vfs_write+0xc8/0x1e0 [] SyS_write+0x58/0xc0 [] entry_SYSCALL_64_fastpath+0x1f/0xc2 other info that might help us debug this: Chain exists of: ir_raw_handler_lock --> lirc_dev_lock --> &dev->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->lock); lock(lirc_dev_lock); lock(&dev->lock); lock(ir_raw_handler_lock); *** DEADLOCK *** 4 locks held by bash/2502: #0: (sb_writers#4){.+.+.+}, at: [] vfs_write+0x195/0x1e0 #1: (&of->mutex){+.+.+.}, at: [] kernfs_fop_write+0x11f/0x1e0 #2: (s_active#215){.+.+.+}, at: [] kernfs_fop_write+0x128/0x1e0 #3: (&dev->lock){+.+.+.}, at: [] store_filter+0x9f/0x240 [rc_core] stack backtrace: CPU: 3 PID: 2502 Comm: bash Not tainted 4.10.0-rc1+ #1 Hardware name: /DG45ID, BIOS IDG4510H.86A.0135.2011.0225.1100 02/25/2011 Call Trace: dump_stack+0x86/0xc3 print_circular_bug+0x1be/0x210 __lock_acquire+0x10f7/0x1290 lock_acquire+0xfd/0x200 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] mutex_lock_nested+0x77/0x6d0 ? ir_raw_encode_scancode+0x3e/0xb0 [rc_core] ? loop_set_wakeup_filter+0x44/0xbd [rc_loopback] ir_raw_encode_scancode+0x3e/0xb0 [rc_core] loop_set_wakeup_filter+0x62/0xbd [rc_loopback] ? loop_set_tx_duty_cycle+0x70/0x70 [rc_loopback] store_filter+0x1aa/0x240 [rc_core] dev_attr_store+0x18/0x30 sysfs_kf_write+0x45/0x60 kernfs_fop_write+0x155/0x1e0 __vfs_write+0x37/0x160 ? rcu_read_lock_sched_held+0x4a/0x80 ? rcu_sync_lockdep_assert+0x2f/0x60 ? __sb_start_write+0x10c/0x220 ? vfs_write+0x195/0x1e0 ? security_file_permission+0x3b/0xc0 vfs_write+0xc8/0x1e0 SyS_write+0x58/0xc0 entry_SYSCALL_64_fastpath+0x1f/0xc2 Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/lirc_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index a54ca531d8ef..4630f3e67538 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -436,6 +436,8 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) return -ERESTARTSYS; ir = irctls[iminor(inode)]; + mutex_unlock(&lirc_dev_lock); + if (!ir) { retval = -ENODEV; goto error; @@ -476,8 +478,6 @@ int lirc_dev_fop_open(struct inode *inode, struct file *file) } error: - mutex_unlock(&lirc_dev_lock); - nonseekable_open(inode, file); return retval; From c1305a40722f3e02f3d971acc22f5991aff239a6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 12 Feb 2017 15:01:22 -0200 Subject: [PATCH 0513/1299] [media] rc: nuvoton: fix deadlock in nvt_write_wakeup_codes nvt_write_wakeup_codes acquires the same lock as the ISR but doesn't disable interrupts on the local CPU. This caused the following deadlock. Fix this by using spin_lock_irqsave. [ 432.362008] ================================ [ 432.362074] WARNING: inconsistent lock state [ 432.362144] 4.10.0-rc7-next-20170210 #1 Not tainted [ 432.362219] -------------------------------- [ 432.362286] inconsistent {HARDIRQ-ON-W} -> {IN-HARDIRQ-W} usage. [ 432.362379] swapper/0/0 [HC1[1]:SC0[0]:HE0:SE1] takes: [ 432.362457] (&(&nvt->lock)->rlock){?.+...}, at: [] nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.362611] {HARDIRQ-ON-W} state was registered at: [ 432.362686] [ 432.362698] [] __lock_acquire+0x5dc/0x1260 [ 432.362812] [ 432.362817] [] lock_acquire+0xe9/0x1d0 [ 432.362927] [ 432.362934] [] _raw_spin_lock+0x33/0x50 [ 432.363045] [ 432.363051] [] nvt_write_wakeup_codes.isra.12+0x22/0xe0 [nuvoton_cir] [ 432.363193] [ 432.363199] [] wakeup_data_store+0xdf/0xf0 [nuvoton_cir] [ 432.363327] [ 432.363333] [] dev_attr_store+0x13/0x20 [ 432.363441] [ 432.363449] [] sysfs_kf_write+0x40/0x50 [ 432.363558] [ 432.363564] [] kernfs_fop_write+0x150/0x1e0 [ 432.363676] [ 432.363685] [] __vfs_write+0x23/0x120 [ 432.363791] [ 432.363798] [] vfs_write+0xc3/0x1e0 [ 432.363902] [ 432.363909] [] SyS_write+0x44/0xa0 [ 432.364012] [ 432.364021] [] do_syscall_64+0x57/0x140 [ 432.364129] [ 432.364135] [] return_from_SYSCALL_64+0x0/0x7a [ 432.364252] irq event stamp: 415118 [ 432.364313] hardirqs last enabled at (415115): [] cpuidle_enter_state+0x11b/0x370 [ 432.364445] hardirqs last disabled at (415116): [] common_interrupt+0x8b/0x90 [ 432.364573] softirqs last enabled at (415118): [] _local_bh_enable+0x1c/0x50 [ 432.364699] softirqs last disabled at (415117): [] irq_enter+0x43/0x60 [ 432.364814] other info that might help us debug this: [ 432.364909] Possible unsafe locking scenario: [ 432.367821] CPU0 [ 432.370645] ---- [ 432.373432] lock(&(&nvt->lock)->rlock); [ 432.376228] [ 432.378982] lock(&(&nvt->lock)->rlock); [ 432.381757] *** DEADLOCK *** [ 432.389888] no locks held by swapper/0/0. [ 432.392574] stack backtrace: [ 432.397774] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.10.0-rc7-next-20170210 #1 [ 432.400375] Hardware name: ZOTAC ZBOX-CI321NANO/ZBOX-CI321NANO, BIOS B246P105 06/01/2015 [ 432.403023] Call Trace: [ 432.405636] [ 432.408208] dump_stack+0x68/0x93 [ 432.410775] print_usage_bug+0x1dd/0x1f0 [ 432.413334] mark_lock+0x559/0x5c0 [ 432.415871] ? print_shortest_lock_dependencies+0x1a0/0x1a0 [ 432.418431] __lock_acquire+0x6b1/0x1260 [ 432.420941] lock_acquire+0xe9/0x1d0 [ 432.423396] ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.425844] _raw_spin_lock+0x33/0x50 [ 432.428252] ? nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.430670] nvt_cir_isr+0x2d/0x520 [nuvoton_cir] [ 432.433085] __handle_irq_event_percpu+0x43/0x330 [ 432.435493] handle_irq_event_percpu+0x1e/0x50 [ 432.437884] handle_irq_event+0x34/0x60 [ 432.440236] handle_edge_irq+0x6a/0x150 [ 432.442561] handle_irq+0x15/0x20 [ 432.444854] do_IRQ+0x57/0x110 [ 432.447115] common_interrupt+0x90/0x90 [ 432.449380] RIP: 0010:cpuidle_enter_state+0x120/0x370 [ 432.451653] RSP: 0018:ffffffff81c03dd8 EFLAGS: 00000206 ORIG_RAX: ffffffffffffffcc [ 432.453994] RAX: ffffffff81c14500 RBX: 0000000000000008 RCX: 00000064aac6f2d2 [ 432.456349] RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffff81c14500 [ 432.458704] RBP: ffffffff81c03e18 R08: cccccccccccccccd R09: 0000000000000018 [ 432.461072] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880100a21260 [ 432.463450] R13: ffffffff81c7e6f8 R14: 0000000000000008 R15: ffffffff81c7e6e0 [ 432.465819] [ 432.468104] ? cpuidle_enter_state+0x11b/0x370 [ 432.470413] cpuidle_enter+0x12/0x20 [ 432.472698] call_cpuidle+0x1e/0x40 [ 432.474967] do_idle+0xe3/0x1c0 [ 432.477172] cpu_startup_entry+0x18/0x20 [ 432.479376] rest_init+0x130/0x140 [ 432.481565] start_kernel+0x3cc/0x3d9 [ 432.483750] x86_64_start_reservations+0x2a/0x2c [ 432.485980] x86_64_start_kernel+0x178/0x18b [ 432.488222] start_cpu+0x14/0x14 [ 432.490453] ? start_cpu+0x14/0x14 Fixes: 97c129747af5 "[media] rc: nuvoton-cir: Add support wakeup via sysfs filter callback" Signed-off-by: Heiner Kallweit Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/nuvoton-cir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/nuvoton-cir.c b/drivers/media/rc/nuvoton-cir.c index b109f8246b96..ec4b25bd2ec2 100644 --- a/drivers/media/rc/nuvoton-cir.c +++ b/drivers/media/rc/nuvoton-cir.c @@ -176,12 +176,13 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev, { u8 tolerance, config; struct nvt_dev *nvt = dev->priv; + unsigned long flags; int i; /* hardcode the tolerance to 10% */ tolerance = DIV_ROUND_UP(count, 10); - spin_lock(&nvt->lock); + spin_lock_irqsave(&nvt->lock, flags); nvt_clear_cir_wake_fifo(nvt); nvt_cir_wake_reg_write(nvt, count, CIR_WAKE_FIFO_CMP_DEEP); @@ -203,7 +204,7 @@ static void nvt_write_wakeup_codes(struct rc_dev *dev, nvt_cir_wake_reg_write(nvt, config, CIR_WAKE_IRCON); - spin_unlock(&nvt->lock); + spin_unlock_irqrestore(&nvt->lock, flags); } static ssize_t wakeup_data_show(struct device *dev, From 413808685dd7c9b54bbc5af79da2eaddd0fc3cb2 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Wed, 22 Feb 2017 18:48:01 -0300 Subject: [PATCH 0514/1299] [media] rc: raw decoder for keymap protocol is not loaded on register When the protocol is set via the sysfs protocols attribute, the decoder is loaded. However, when it is not when a device is first plugged in or registered. Fixes: acc1c3c ("[media] media: rc: load decoder modules on-demand") Signed-off-by: Sean Young Cc: # v4.5+ Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 2424946740e6..a158b321b40a 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1663,6 +1663,7 @@ static int rc_setup_rx_device(struct rc_dev *dev) { int rc; struct rc_map *rc_map; + u64 rc_type; if (!dev->map_name) return -EINVAL; @@ -1677,15 +1678,18 @@ static int rc_setup_rx_device(struct rc_dev *dev) if (rc) return rc; - if (dev->change_protocol) { - u64 rc_type = (1ll << rc_map->rc_type); + rc_type = BIT_ULL(rc_map->rc_type); + if (dev->change_protocol) { rc = dev->change_protocol(dev, &rc_type); if (rc < 0) goto out_table; dev->enabled_protocols = rc_type; } + if (dev->driver_type == RC_DRIVER_IR_RAW) + ir_raw_load_modules(&rc_type); + set_bit(EV_KEY, dev->input_dev->evbit); set_bit(EV_REP, dev->input_dev->evbit); set_bit(EV_MSC, dev->input_dev->evbit); From 5df62771c556e4ec9d7ecea1f070ff6da4bce151 Mon Sep 17 00:00:00 2001 From: Sean Young Date: Thu, 23 Feb 2017 06:11:21 -0300 Subject: [PATCH 0515/1299] [media] rc: protocol is not set on register for raw IR devices ir_raw_event_register() sets up change_protocol(), and without that set, rc_setup_rx_device() does not set the protocol for the device on register. The standard udev rules run ir-keytable, which writes to the protocols file again, which hides this problem. Fixes: 7ff2c2b ("[media] rc-main: split setup and unregister functions") Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- drivers/media/rc/rc-main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index a158b321b40a..d84533699668 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -1781,12 +1781,6 @@ int rc_register_device(struct rc_dev *dev) dev->input_name ?: "Unspecified device", path ?: "N/A"); kfree(path); - if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { - rc = rc_setup_rx_device(dev); - if (rc) - goto out_dev; - } - if (dev->driver_type == RC_DRIVER_IR_RAW || dev->driver_type == RC_DRIVER_IR_RAW_TX) { if (!raw_init) { @@ -1795,7 +1789,13 @@ int rc_register_device(struct rc_dev *dev) } rc = ir_raw_event_register(dev); if (rc < 0) - goto out_rx; + goto out_dev; + } + + if (dev->driver_type != RC_DRIVER_IR_RAW_TX) { + rc = rc_setup_rx_device(dev); + if (rc) + goto out_raw; } /* Allow the RC sysfs nodes to be accessible */ @@ -1807,8 +1807,8 @@ int rc_register_device(struct rc_dev *dev) return 0; -out_rx: - rc_free_rx_device(dev); +out_raw: + ir_raw_event_unregister(dev); out_dev: device_del(&dev->dev); out_unlock: From 4cbe4b2b175c9dcfffa2311ea9dddd34be1a4f41 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:51 +0200 Subject: [PATCH 0516/1299] drm/i915: Pass intel_crtc to fdi_link_train() hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The implementation of the fdi_link_train() hooks need an intel_crtc so just pass that instead of the generic crtc type. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_ddi.c | 13 +++--- drivers/gpu/drm/i915/intel_display.c | 63 +++++++++++++--------------- drivers/gpu/drm/i915/intel_drv.h | 2 +- 4 files changed, 37 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 430c40dd1622..b25c1df05145 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -669,7 +669,7 @@ struct drm_i915_display_funcs { struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode); void (*audio_codec_disable)(struct intel_encoder *encoder); - void (*fdi_link_train)(struct drm_crtc *crtc); + void (*fdi_link_train)(struct intel_crtc *crtc); void (*init_clock_gating)(struct drm_i915_private *dev_priv); int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index a7c08d7027fa..111f6605bf67 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -674,15 +674,14 @@ static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) * DDI A (which is used for eDP) */ -void hsw_fdi_link_train(struct drm_crtc *crtc) +void hsw_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; u32 temp, i, rx_ctl_val, ddi_pll_sel; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); intel_prepare_dp_ddi_buffers(encoder); } @@ -701,7 +700,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) /* Enable the PCH Receiver FDI PLL */ rx_ctl_val = dev_priv->fdi_rx_config | FDI_RX_ENHANCE_FRAME_ENABLE | FDI_RX_PLL_ENABLE | - FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); POSTING_READ(FDI_RX_CTL(PIPE_A)); udelay(220); @@ -711,7 +710,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); /* Configure Port Clock Select */ - ddi_pll_sel = hsw_pll_to_ddi_pll_sel(intel_crtc->config->shared_dpll); + ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc->config->shared_dpll); I915_WRITE(PORT_CLK_SEL(PORT_E), ddi_pll_sel); WARN_ON(ddi_pll_sel != PORT_CLK_SEL_SPLL); @@ -731,7 +730,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) * port reversal bit */ I915_WRITE(DDI_BUF_CTL(PORT_E), DDI_BUF_CTL_ENABLE | - ((intel_crtc->config->fdi_lanes - 1) << 1) | + ((crtc->config->fdi_lanes - 1) << 1) | DDI_BUF_TRANS_SELECT(i / 2)); POSTING_READ(DDI_BUF_CTL(PORT_E)); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5edf1ff8b688..f65a37dce954 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3646,12 +3646,11 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, } } -static void intel_fdi_normal_train(struct drm_crtc *crtc) +static void intel_fdi_normal_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -3689,12 +3688,11 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct drm_crtc *crtc) +static void ironlake_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, tries; @@ -3715,7 +3713,7 @@ static void ironlake_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_TX_ENABLE); @@ -3790,12 +3788,11 @@ static const int snb_b_fdi_train_param[] = { }; /* The FDI link training functions for SNB/Cougarpoint. */ -static void gen6_fdi_link_train(struct drm_crtc *crtc) +static void gen6_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -3814,7 +3811,7 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; @@ -3923,12 +3920,11 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) } /* Manual link training for Ivy Bridge A0 parts */ -static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) +static void ivb_manual_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, j; @@ -3966,7 +3962,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[j/2]; @@ -4437,12 +4433,12 @@ static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) /* Return which DP Port should be selected for Transcoder DP control */ static enum port -intel_trans_dp_port_sel(struct drm_crtc *crtc) +intel_trans_dp_port_sel(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct intel_encoder *encoder; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_EDP) return enc_to_dig_port(&encoder->base)->port; @@ -4459,18 +4455,17 @@ intel_trans_dp_port_sel(struct drm_crtc *crtc) * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(struct drm_crtc *crtc) +static void ironlake_pch_enable(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; u32 temp; assert_pch_transcoder_disabled(dev_priv, pipe); if (IS_IVYBRIDGE(dev_priv)) - ivybridge_update_fdi_bc_bifurcation(intel_crtc); + ivybridge_update_fdi_bc_bifurcation(crtc); /* Write the TU size bits before fdi link training, so that error * detection works. */ @@ -4488,7 +4483,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) temp = I915_READ(PCH_DPLL_SEL); temp |= TRANS_DPLL_ENABLE(pipe); sel = TRANS_DPLLB_SEL(pipe); - if (intel_crtc->config->shared_dpll == + if (crtc->config->shared_dpll == intel_get_shared_dpll_by_id(dev_priv, DPLL_ID_PCH_PLL_B)) temp |= sel; else @@ -4503,19 +4498,19 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) * Note that enable_shared_dpll tries to do the right thing, but * get_shared_dpll unconditionally resets the pll - we need that to have * the right LVDS enable sequence. */ - intel_enable_shared_dpll(intel_crtc); + intel_enable_shared_dpll(crtc); /* set transcoder timing, panel must allow it */ assert_panel_unlocked(dev_priv, pipe); - ironlake_pch_transcoder_set_timings(intel_crtc, pipe); + ironlake_pch_transcoder_set_timings(crtc, pipe); intel_fdi_normal_train(crtc); /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev_priv) && - intel_crtc_has_dp_encoder(intel_crtc->config)) { + intel_crtc_has_dp_encoder(crtc->config)) { const struct drm_display_mode *adjusted_mode = - &intel_crtc->config->base.adjusted_mode; + &crtc->config->base.adjusted_mode; u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5; i915_reg_t reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); @@ -5289,7 +5284,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - ironlake_pch_enable(crtc); + ironlake_pch_enable(intel_crtc); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5371,7 +5366,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(intel_crtc); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(intel_crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index eadc72df5a17..9caf37170335 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1225,7 +1225,7 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state); void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); -void hsw_fdi_link_train(struct drm_crtc *crtc); +void hsw_fdi_link_train(struct intel_crtc *crtc); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); From 0dcdc382c3823e959e9253f2f0c4e436e7ef9a32 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:52 +0200 Subject: [PATCH 0517/1299] drm/i915: Pass intel_crtc to intel_lpt_pch_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function intel_lpt_pch_enable() needs an intel_crtc so pass that instead of the generic crtc type. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f65a37dce954..954210b974c1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4249,10 +4249,10 @@ void lpt_disable_iclkip(struct drm_i915_private *dev_priv) } /* Program iCLKIP clock to the desired frequency */ -static void lpt_program_iclkip(struct drm_crtc *crtc) +static void lpt_program_iclkip(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - int clock = to_intel_crtc(crtc)->config->base.adjusted_mode.crtc_clock; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int clock = crtc->config->base.adjusted_mode.crtc_clock; u32 divsel, phaseinc, auxdiv, phasedir = 0; u32 temp; @@ -4545,19 +4545,17 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) ironlake_enable_pch_transcoder(dev_priv, pipe); } -static void lpt_pch_enable(struct drm_crtc *crtc) +static void lpt_pch_enable(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; assert_pch_transcoder_disabled(dev_priv, TRANSCODER_A); lpt_program_iclkip(crtc); /* Set transcoder timing. */ - ironlake_pch_transcoder_set_timings(intel_crtc, PIPE_A); + ironlake_pch_transcoder_set_timings(crtc, PIPE_A); lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } @@ -5394,7 +5392,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - lpt_pch_enable(crtc); + lpt_pch_enable(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(crtc, true); From 2ce42273248a8b0fe7a819cb2624896ef1c60fe1 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:53 +0200 Subject: [PATCH 0518/1299] drm/i915: Pass pipe_config to pch_enable() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using crtc->config directly is being removed in favor of passing a pipe_config. Follow the trend and pass pipe_config to pch_enable() functions. v2: s/pipe_config/crtc_state/ (Ville) - constify crtc_state. (Ville) - take crtc from crtc_state. (Ville) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-4-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 954210b974c1..5640f66f3a60 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4455,8 +4455,9 @@ intel_trans_dp_port_sel(struct intel_crtc *crtc) * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(struct intel_crtc *crtc) +static void ironlake_pch_enable(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); int pipe = crtc->pipe; @@ -4483,7 +4484,7 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) temp = I915_READ(PCH_DPLL_SEL); temp |= TRANS_DPLL_ENABLE(pipe); sel = TRANS_DPLLB_SEL(pipe); - if (crtc->config->shared_dpll == + if (crtc_state->shared_dpll == intel_get_shared_dpll_by_id(dev_priv, DPLL_ID_PCH_PLL_B)) temp |= sel; else @@ -4508,9 +4509,9 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev_priv) && - intel_crtc_has_dp_encoder(crtc->config)) { + intel_crtc_has_dp_encoder(crtc_state)) { const struct drm_display_mode *adjusted_mode = - &crtc->config->base.adjusted_mode; + &crtc_state->base.adjusted_mode; u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5; i915_reg_t reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); @@ -4545,10 +4546,11 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) ironlake_enable_pch_transcoder(dev_priv, pipe); } -static void lpt_pch_enable(struct intel_crtc *crtc) +static void lpt_pch_enable(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; assert_pch_transcoder_disabled(dev_priv, TRANSCODER_A); @@ -5282,7 +5284,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - ironlake_pch_enable(intel_crtc); + ironlake_pch_enable(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5392,7 +5394,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - lpt_pch_enable(intel_crtc); + lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(crtc, true); From dc4a109474c6bffacaf754440fa88ff0dd957667 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:54 +0200 Subject: [PATCH 0519/1299] drm/i915: Pass pipe_config to fdi_link_train() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is preferred to pass pipe_config to functions instead of accessing crtc->config directly. Follow suit and pass pipe_config to the fdi link train functions. v2: Add const; s/pipe_config/crtc_state/ (Ville) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-5-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/intel_ddi.c | 9 +++++---- drivers/gpu/drm/i915/intel_display.c | 19 +++++++++++-------- drivers/gpu/drm/i915/intel_drv.h | 3 ++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b25c1df05145..07bf2e32ffa7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -669,7 +669,8 @@ struct drm_i915_display_funcs { struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode); void (*audio_codec_disable)(struct intel_encoder *encoder); - void (*fdi_link_train)(struct intel_crtc *crtc); + void (*fdi_link_train)(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void (*init_clock_gating)(struct drm_i915_private *dev_priv); int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 111f6605bf67..a320378681b3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -674,7 +674,8 @@ static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) * DDI A (which is used for eDP) */ -void hsw_fdi_link_train(struct intel_crtc *crtc) +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -700,7 +701,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc) /* Enable the PCH Receiver FDI PLL */ rx_ctl_val = dev_priv->fdi_rx_config | FDI_RX_ENHANCE_FRAME_ENABLE | FDI_RX_PLL_ENABLE | - FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); POSTING_READ(FDI_RX_CTL(PIPE_A)); udelay(220); @@ -710,7 +711,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc) I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); /* Configure Port Clock Select */ - ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc->config->shared_dpll); + ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc_state->shared_dpll); I915_WRITE(PORT_CLK_SEL(PORT_E), ddi_pll_sel); WARN_ON(ddi_pll_sel != PORT_CLK_SEL_SPLL); @@ -730,7 +731,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc) * port reversal bit */ I915_WRITE(DDI_BUF_CTL(PORT_E), DDI_BUF_CTL_ENABLE | - ((crtc->config->fdi_lanes - 1) << 1) | + ((crtc_state->fdi_lanes - 1) << 1) | DDI_BUF_TRANS_SELECT(i / 2)); POSTING_READ(DDI_BUF_CTL(PORT_E)); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5640f66f3a60..649251c01639 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3688,7 +3688,8 @@ static void intel_fdi_normal_train(struct intel_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct intel_crtc *crtc) +static void ironlake_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3713,7 +3714,7 @@ static void ironlake_fdi_link_train(struct intel_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_TX_ENABLE); @@ -3788,7 +3789,8 @@ static const int snb_b_fdi_train_param[] = { }; /* The FDI link training functions for SNB/Cougarpoint. */ -static void gen6_fdi_link_train(struct intel_crtc *crtc) +static void gen6_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3811,7 +3813,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; @@ -3920,7 +3922,8 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc) } /* Manual link training for Ivy Bridge A0 parts */ -static void ivb_manual_fdi_link_train(struct intel_crtc *crtc) +static void ivb_manual_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3962,7 +3965,7 @@ static void ivb_manual_fdi_link_train(struct intel_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[j/2]; @@ -4474,7 +4477,7 @@ static void ironlake_pch_enable(const struct intel_crtc_state *crtc_state) I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK); /* For PCH output, training FDI link */ - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(crtc, crtc_state); /* We need to program the right clock selection before writing the pixel * mutliplier into the DPLL. */ @@ -5366,7 +5369,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(intel_crtc); + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(intel_crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9caf37170335..b3ded830c1b3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1225,7 +1225,8 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state); void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); -void hsw_fdi_link_train(struct intel_crtc *crtc); +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); From e9ce1a625fcae9d9dd64a877535c8d10ddfb89a0 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:55 +0200 Subject: [PATCH 0520/1299] drm/i915: Pass intel_crtc to DDI functions called from crtc en/disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass intel_crtc to functions intel_ddi_enable_transcoder_func(), intel_ddi_set_pipe_settings() and intel_ddi_set_vc_payload_alloc(), instead of the generic crtc type. By changing the functions intel_ddi_get_crtc_encoder() so that it receives an intel_crtc parameter, there is no need for the drm_crtc in the callers. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-6-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 62 ++++++++++++---------------- drivers/gpu/drm/i915/intel_display.c | 8 ++-- drivers/gpu/drm/i915/intel_drv.h | 6 +-- 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index a320378681b3..0db526ec2f44 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -818,21 +818,20 @@ void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) } static struct intel_encoder * -intel_ddi_get_crtc_encoder(struct drm_crtc *crtc) +intel_ddi_get_crtc_encoder(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = crtc->base.dev; struct intel_encoder *intel_encoder, *ret = NULL; int num_encoders = 0; - for_each_encoder_on_crtc(dev, crtc, intel_encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, intel_encoder) { ret = intel_encoder; num_encoders++; } if (num_encoders != 1) WARN(1, "%d encoders on crtc for pipe %c\n", num_encoders, - pipe_name(intel_crtc->pipe)); + pipe_name(crtc->pipe)); BUG_ON(ret == NULL); return ret; @@ -1194,12 +1193,11 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, intel_encoder); } -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) +void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; int type = intel_encoder->type; uint32_t temp; @@ -1207,7 +1205,7 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; - switch (intel_crtc->config->pipe_bpp) { + switch (crtc->config->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; break; @@ -1227,12 +1225,10 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) } } -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) +void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; uint32_t temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1242,14 +1238,12 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) +void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = intel_crtc->pipe; - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; uint32_t temp; @@ -1258,7 +1252,7 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) temp = TRANS_DDI_FUNC_ENABLE; temp |= TRANS_DDI_SELECT_PORT(port); - switch (intel_crtc->config->pipe_bpp) { + switch (crtc->config->pipe_bpp) { case 18: temp |= TRANS_DDI_BPC_6; break; @@ -1275,9 +1269,9 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) BUG(); } - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) + if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DDI_PVSYNC; - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) + if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DDI_PHSYNC; if (cpu_transcoder == TRANSCODER_EDP) { @@ -1288,8 +1282,8 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) * using motion blur mitigation (which we don't * support). */ if (IS_HASWELL(dev_priv) && - (intel_crtc->config->pch_pfit.enabled || - intel_crtc->config->pch_pfit.force_thru)) + (crtc->config->pch_pfit.enabled || + crtc->config->pch_pfit.force_thru)) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -1307,20 +1301,20 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) } if (type == INTEL_OUTPUT_HDMI) { - if (intel_crtc->config->has_hdmi_sink) + if (crtc->config->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; - temp |= (intel_crtc->config->fdi_lanes - 1) << 1; + temp |= (crtc->config->fdi_lanes - 1) << 1; } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc->config->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc->config->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", intel_encoder->type, pipe_name(pipe)); @@ -1484,14 +1478,12 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) return 0; } -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) +void intel_ddi_enable_pipe_clock(struct intel_crtc *crtc) { - struct drm_crtc *crtc = &intel_crtc->base; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); enum port port = intel_ddi_get_encoder_port(intel_encoder); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 649251c01639..ebaf9655cbd6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5385,9 +5385,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(&pipe_config->base); - intel_ddi_set_pipe_settings(crtc); + intel_ddi_set_pipe_settings(intel_crtc); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(crtc); + intel_ddi_enable_transcoder_func(intel_crtc); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5400,7 +5400,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, true); + intel_ddi_set_vc_payload_alloc(intel_crtc, true); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5522,7 +5522,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_disable_pipe(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, false); + intel_ddi_set_vc_payload_alloc(intel_crtc, false); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b3ded830c1b3..4cb1f57ad6ba 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1230,14 +1230,14 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc); +void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder); void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); bool intel_ddi_pll_select(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc); +void intel_ddi_set_pipe_settings(struct intel_crtc *crtc); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, @@ -1250,7 +1250,7 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); +void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); From 3dc38eea665f383c84cc8d858b9a7645c0b29c54 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:56 +0200 Subject: [PATCH 0521/1299] drm/i915: Remove direct usages of intel_crtc->config from DDI code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove direct usages of intel_crtc->config from the DDI code. Functions that didn't yet take a pipe_config as an argument were coverted to do so. v2: s/pipe_config/const crtc_state/ (Ville) - take crtc from crtc_state. (Ville) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-7-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 62 ++++++++++++++-------------- drivers/gpu/drm/i915/intel_display.c | 12 +++--- drivers/gpu/drm/i915/intel_drv.h | 11 ++--- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 0db526ec2f44..ebccd4863d7f 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1193,11 +1193,12 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, intel_encoder); } -void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; int type = intel_encoder->type; uint32_t temp; @@ -1205,7 +1206,7 @@ void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; - switch (crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; break; @@ -1225,10 +1226,12 @@ void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) } } -void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state) +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; uint32_t temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1238,12 +1241,13 @@ void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state) I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; uint32_t temp; @@ -1252,7 +1256,7 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) temp = TRANS_DDI_FUNC_ENABLE; temp |= TRANS_DDI_SELECT_PORT(port); - switch (crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_DDI_BPC_6; break; @@ -1269,9 +1273,9 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) BUG(); } - if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DDI_PVSYNC; - if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DDI_PHSYNC; if (cpu_transcoder == TRANSCODER_EDP) { @@ -1282,8 +1286,8 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) * using motion blur mitigation (which we don't * support). */ if (IS_HASWELL(dev_priv) && - (crtc->config->pch_pfit.enabled || - crtc->config->pch_pfit.force_thru)) + (crtc_state->pch_pfit.enabled || + crtc_state->pch_pfit.force_thru)) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -1301,20 +1305,20 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) } if (type == INTEL_OUTPUT_HDMI) { - if (crtc->config->has_hdmi_sink) + if (crtc_state->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; - temp |= (crtc->config->fdi_lanes - 1) << 1; + temp |= (crtc_state->fdi_lanes - 1) << 1; } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; - temp |= DDI_PORT_WIDTH(crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", intel_encoder->type, pipe_name(pipe)); @@ -1478,22 +1482,23 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) return 0; } -void intel_ddi_enable_pipe_clock(struct intel_crtc *crtc) +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); enum port port = intel_ddi_get_encoder_port(intel_encoder); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), TRANS_CLK_SEL_PORT(port)); } -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), @@ -1759,23 +1764,21 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); int type = intel_encoder->type; if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { intel_ddi_pre_enable_dp(intel_encoder, - crtc->config->port_clock, - crtc->config->lane_count, - crtc->config->shared_dpll, - intel_crtc_has_type(crtc->config, + pipe_config->port_clock, + pipe_config->lane_count, + pipe_config->shared_dpll, + intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); } if (type == INTEL_OUTPUT_HDMI) { intel_ddi_pre_enable_hdmi(intel_encoder, pipe_config->has_hdmi_sink, pipe_config, conn_state, - crtc->config->shared_dpll); + pipe_config->shared_dpll); } } @@ -1922,8 +1925,7 @@ static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - uint8_t mask = intel_crtc->config->lane_lat_optim_mask; + uint8_t mask = pipe_config->lane_lat_optim_mask; bxt_ddi_phy_set_lane_optim_mask(encoder, mask); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ebaf9655cbd6..b2b8a3b9f982 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5372,7 +5372,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, dev_priv->display.fdi_link_train(intel_crtc, pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_pipe_clock(intel_crtc); + intel_ddi_enable_pipe_clock(pipe_config); if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); @@ -5385,9 +5385,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(&pipe_config->base); - intel_ddi_set_pipe_settings(intel_crtc); + intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(intel_crtc); + intel_ddi_enable_transcoder_func(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5400,7 +5400,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(intel_crtc, true); + intel_ddi_set_vc_payload_alloc(pipe_config, true); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5522,7 +5522,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_disable_pipe(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(intel_crtc, false); + intel_ddi_set_vc_payload_alloc(intel_crtc->config, false); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); @@ -5533,7 +5533,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, ironlake_pfit_disable(intel_crtc, false); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_disable_pipe_clock(intel_crtc); + intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4cb1f57ad6ba..9434c293a581 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1230,14 +1230,14 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc); +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder); -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); bool intel_ddi_pll_select(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(struct intel_crtc *crtc); +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, @@ -1250,7 +1250,8 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state); +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); From e081c8463ac947e75f20ee11c6516f089efddaf6 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:57 +0200 Subject: [PATCH 0522/1299] drm/i915: Remove duplicate DDI enabling logic from MST path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic to enable a DDI in intel_mst_pre_enable_dp() is essentially the same as in intel_ddi_pre_enable_dp(). So reuse the latter function by calling the post_disable hook on the intel_dig_port instead of duplicating that code. v2: Don't oops because of a NULL encoder->crtc. (Ville) v3: Warn for MST + PORT_E too. (Ville) Cc: Imre Deak Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-8-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 2 ++ drivers/gpu/drm/i915/intel_dp_mst.c | 23 +++-------------------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ebccd4863d7f..04676760e6fd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1713,6 +1713,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, enum port port = intel_ddi_get_encoder_port(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + WARN_ON(link_mst && (port == PORT_A || port == PORT_E)); + intel_dp_set_link_params(intel_dp, link_rate, lane_count, link_mst); if (encoder->type == INTEL_OUTPUT_EDP) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index a8334e18c4fc..094cbdcbcd6d 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -159,26 +159,9 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); - if (intel_dp->active_mst_links == 0) { - intel_ddi_clk_select(&intel_dig_port->base, - pipe_config->shared_dpll); - - intel_display_power_get(dev_priv, - intel_dig_port->ddi_io_power_domain); - - intel_prepare_dp_ddi_buffers(&intel_dig_port->base); - intel_dp_set_link_params(intel_dp, - pipe_config->port_clock, - pipe_config->lane_count, - true); - - intel_ddi_init_dp_buf_reg(&intel_dig_port->base); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); - - intel_dp_start_link_train(intel_dp); - intel_dp_stop_link_train(intel_dp); - } + if (intel_dp->active_mst_links == 0) + intel_dig_port->base.pre_enable(&intel_dig_port->base, + pipe_config, NULL); ret = drm_dp_mst_allocate_vcpi(&intel_dp->mst_mgr, connector->port, From 5400367a864ddafdfb9d5d04cf6f9979665ef349 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 12:19:46 +0000 Subject: [PATCH 0523/1299] drm/i915: Ensure the engine is idle before manually changing HWS During reset_all_global_seqno() on seqno rollover, we have to update the HWS. This causes all in flight requests to be completed, so first we wait. However, we were only waiting for the requests themselves to be completed and clearing out the waiter rbtrees - what I had missed was the extra reference in execlists->port[]. Since commit fe9ae7a3bfdb ("drm/i915/execlists: Detect an out-of-order context switch") we can detect when the request is retired before the context switch interrupt is completed. The impact should be neglible outside of debugging. Testcase: igt/gem_exec_whisper Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303121947.20482-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_request.c | 3 +++ drivers/gpu/drm/i915/intel_engine_cs.c | 31 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index c2cee9674cb0..b36a7644e055 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -196,6 +196,9 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) for_each_engine(engine, i915, id) { struct intel_timeline *tl = &timeline->engine[id]; + if (wait_for(intel_engine_is_idle(engine), 50)) + return -EBUSY; + if (!i915_seqno_passed(seqno, tl->seqno)) { /* spin until threads are complete */ while (intel_breadcrumbs_busy(engine)) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index c58339b22f6a..53d65dc0c9fb 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1071,6 +1071,37 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) return 0; } +/** + * intel_engine_is_idle() - Report if the engine has finished process all work + * @engine: the intel_engine_cs + * + * Return true if there are no requests pending, nothing left to be submitted + * to hardware, and that the engine is idle. + */ +bool intel_engine_is_idle(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* Any inflight/incomplete requests? */ + if (!i915_seqno_passed(intel_engine_get_seqno(engine), + intel_engine_last_submit(engine))) + return false; + + /* Interrupt/tasklet pending? */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + return false; + + /* Both ports drained, no more ELSP submission? */ + if (engine->execlist_port[0].request) + return false; + + /* Ring stopped? */ + if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3dd6eee4a08b..38580765bfd6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -664,4 +664,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) return batch + 6; } +bool intel_engine_is_idle(struct intel_engine_cs *engine); + #endif /* _INTEL_RINGBUFFER_H_ */ From 0542524944c2ae3264ca8e6b5f0c7a111f09a2c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 12:19:47 +0000 Subject: [PATCH 0524/1299] drm/i915: Generalise wait for execlists to be idle The code to check for execlists completion is generic, so move it to intel_engine_cs.c, where we can reuse the new intel_engine_is_idle(). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170303121947.20482-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 8 +++--- drivers/gpu/drm/i915/intel_engine_cs.c | 13 +++++++++ drivers/gpu/drm/i915/intel_lrc.c | 28 ------------------- drivers/gpu/drm/i915/intel_lrc.h | 1 - drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + .../gpu/drm/i915/selftests/i915_gem_request.c | 2 +- 6 files changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index edc59bd45f53..f7e85fb1464f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2959,8 +2959,8 @@ i915_gem_idle_work_handler(struct work_struct *work) * new request is submitted. */ wait_for(READ_ONCE(dev_priv->gt.active_requests) || - intel_execlists_idle(dev_priv), 10); - + intel_engines_are_idle(dev_priv), + 10); if (READ_ONCE(dev_priv->gt.active_requests)) return; @@ -2985,7 +2985,7 @@ i915_gem_idle_work_handler(struct work_struct *work) if (dev_priv->gt.active_requests) goto out_unlock; - if (wait_for(intel_execlists_idle(dev_priv), 10)) + if (wait_for(intel_engines_are_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); for_each_engine(engine, dev_priv, id) { @@ -4287,7 +4287,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); - WARN_ON(!intel_execlists_idle(dev_priv)); + WARN_ON(!intel_engines_are_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 53d65dc0c9fb..5fd4883db235 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1102,6 +1102,19 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; } +bool intel_engines_are_idle(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + if (!intel_engine_is_idle(engine)) + return false; + } + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f9a8545474bc..3834a84fe084 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -498,34 +498,6 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine) return !engine->execlist_port[0].request; } -/** - * intel_execlists_idle() - Determine if all engine submission ports are idle - * @dev_priv: i915 device private - * - * Return true if there are no requests pending on any of the submission ports - * of any engines. - */ -bool intel_execlists_idle(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (!i915.enable_execlists) - return true; - - for_each_engine(engine, dev_priv, id) { - /* Interrupt/tasklet pending? */ - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) - return false; - - /* Both ports drained, no more ELSP submission? */ - if (!execlists_elsp_idle(engine)) - return false; - } - - return true; -} - static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { const struct execlist_port *port = engine->execlist_port; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index c8009c7bfbdd..5fc07761caff 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -88,6 +88,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); -bool intel_execlists_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 38580765bfd6..55a6a3f8274c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -665,5 +665,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) } bool intel_engine_is_idle(struct intel_engine_cs *engine); +bool intel_engines_are_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 42bdeac93324..926b24c117d6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -303,7 +303,7 @@ static int end_live_test(struct live_test *t) { struct drm_i915_private *i915 = t->i915; - if (wait_for(intel_execlists_idle(i915), 1)) { + if (wait_for(intel_engines_are_idle(i915), 1)) { pr_err("%s(%s): GPU not idle\n", t->func, t->name); return -EIO; } From 3e490042a80f82df80141fce1dbef1baa7850160 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:07 +0200 Subject: [PATCH 0525/1299] drm/i915/gtt: Make I915_PDPES_PER_PDP inline function The macro takes a vm pointer at some sites, and dev_priv on others We were saved as the internal macro never deferences the pointer given. As the number of pdpes depend on vm configuration, make it as a inline function that accepts vm pointer. Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 26 ++++++++++++++++---------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e0c9542a90c1..5299600b4733 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -528,7 +528,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm, static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915); + const unsigned int pdpes = i915_pdpes_per_pdp(vm); unsigned int i; pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), @@ -852,7 +852,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, gen8_pte_t *vaddr; bool ret; - GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); pd = pdp->page_directory[idx->pdpe]; vaddr = kmap_atomic_px(pd->page_table[idx->pde]); do { @@ -883,7 +883,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, break; } - GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); pd = pdp->page_directory[idx->pdpe]; } @@ -1036,9 +1036,10 @@ static void gen8_free_scratch(struct i915_address_space *vm) static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { + const unsigned int pdpes = i915_pdpes_per_pdp(vm); int i; - for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) { + for (i = 0; i < pdpes; i++) { if (pdp->page_directory[i] == vm->scratch_pd) continue; @@ -1127,7 +1128,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); pdp->used_pdpes++; - GEM_BUG_ON(pdp->used_pdpes > I915_PDPES_PER_PDP(vm)); + GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } @@ -1201,6 +1202,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, gen8_pte_t scratch_pte, struct seq_file *m) { + struct i915_address_space *vm = &ppgtt->base; struct i915_page_directory *pd; u32 pdpe; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f7d4e194a227..562c6329aff6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -125,9 +125,6 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_LEGACY_PDPES 4 #define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) -#define I915_PDPES_PER_PDP(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - GEN8_PML4ES_PER_PML4 : GEN8_LEGACY_PDPES) - #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ @@ -332,6 +329,12 @@ struct i915_address_space { #define i915_is_ggtt(V) (!(V)->file) +static inline bool +i915_vm_is_48bit(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -457,6 +460,15 @@ static inline u32 gen6_pde_index(u32 addr) return i915_pde_index(addr, GEN6_PDE_SHIFT); } +static inline unsigned int +i915_pdpes_per_pdp(const struct i915_address_space *vm) +{ + if (i915_vm_is_48bit(vm)) + return GEN8_PML4ES_PER_PML4; + + return GEN8_LEGACY_PDPES; +} + /* Equivalent to the gen6 version, For each pde iterates over every pde * between from start until start + length. On gen8+ it simply iterates * over every page directory entry in a page directory. @@ -471,7 +483,7 @@ static inline u32 gen6_pde_index(u32 addr) #define gen8_for_each_pdpe(pd, pdp, start, length, iter) \ for (iter = gen8_pdpe_index(start); \ - length > 0 && iter < I915_PDPES_PER_PDP(dev) && \ + length > 0 && iter < i915_pdpes_per_pdp(vm) && \ (pd = (pdp)->page_directory[iter], true); \ ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \ temp = min(temp - start, length); \ @@ -523,12 +535,6 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } -static inline bool -i915_vm_is_48bit(const struct i915_address_space *vm) -{ - return (vm->total - 1) >> 32; -} - int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); From 92ad18ec26611e8a47639e600bd9dc42fbe2edcf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Mar 2017 12:53:26 -0500 Subject: [PATCH 0526/1299] ftrace/graph: Do not modify the EMPTY_HASH for the function_graph filter On boot up, if the kernel command line sets a graph funtion with the kernel command line options "ftrace_graph_filter" or "ftrace_graph_notrace" then it updates the corresponding function graph hash, ftrace_graph_hash or ftrace_graph_notrace_hash respectively. Unfortunately, at boot up, these variables are pointers to the "EMPTY_HASH" which is a constant used as a placeholder when a hash has no entities. The problem was that the comand line version to set the hashes updated the actual EMPTY_HASH instead of creating a new hash for the function graph. This broke the EMPTY_HASH because not only did it modify a constant (not sure how that was allowed to happen, except maybe because it was done at early boot, const variables were still mutable), but it made the filters have functions listed in them when they were actually empty. The kernel command line function needs to allocate a new hash for the function graph filters and assign the necessary variables to that new hash instead. Link: http://lkml.kernel.org/r/1488420091.7212.17.camel@linux.intel.com Cc: Namhyung Kim Fixes: b9b0c831bed2 ("ftrace: Convert graph filter to use hash tables") Reported-by: Todd Brandt Tested-by: Todd Brandt Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fd84f2e30b6d..44122e7a6418 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4421,10 +4421,9 @@ static void __init set_ftrace_early_graph(char *buf, int enable) char *func; struct ftrace_hash *hash; - if (enable) - hash = ftrace_graph_hash; - else - hash = ftrace_graph_notrace_hash; + hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); + if (WARN_ON(!hash)) + return; while (buf) { func = strsep(&buf, ","); @@ -4434,6 +4433,11 @@ static void __init set_ftrace_early_graph(char *buf, int enable) printk(KERN_DEBUG "ftrace: function %s not " "traceable\n", func); } + + if (enable) + ftrace_graph_hash = hash; + else + ftrace_graph_notrace_hash = hash; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From ab42632156becd35d3884ee5c14da2bedbf3149a Mon Sep 17 00:00:00 2001 From: David Daney Date: Wed, 1 Mar 2017 14:04:53 -0800 Subject: [PATCH 0527/1299] module: set __jump_table alignment to 8 For powerpc the __jump_table section in modules is not aligned, this causes a WARN_ON() splat when loading a module containing a __jump_table. Strict alignment became necessary with commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key"), currently in linux-next, which uses the two least significant bits of pointers to __jump_table elements. Fix by forcing __jump_table to 8, which is the same alignment used for this section in the kernel proper. Link: http://lkml.kernel.org/r/20170301220453.4756-1-david.daney@cavium.com Reviewed-by: Jason Baron Acked-by: Jessica Yu Acked-by: Michael Ellerman (powerpc) Tested-by: Sachin Sant Signed-off-by: David Daney Signed-off-by: Steven Rostedt (VMware) --- scripts/module-common.lds | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 73a2c7da0e55..53234e85192a 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -19,4 +19,6 @@ SECTIONS { . = ALIGN(8); .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } + + __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } } From cd8d860dcce906cd477be7d0648ba6f56a52eaa6 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 28 Feb 2017 11:32:22 -0500 Subject: [PATCH 0528/1299] jump_label: Fix anonymous union initialization Pre-4.6 gcc do not allow direct static initialization of members of anonymous structs/unions. After commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") STATIC_KEY_INIT_{TRUE|FALSE} definitions cannot be compiled with those older compilers. Placing initializers inside curved brackets works around this problem. Link: http://lkml.kernel.org/r/1488299542-30765-1-git-send-email-boris.ostrovsky@oracle.com Fixes: 3821fd35b58d ("jump_label: Reduce the size of struct static_key") Reviewed-by: Jason Baron Compiled-by: Chris Mason Signed-off-by: Boris Ostrovsky Signed-off-by: Steven Rostedt (VMware) --- include/linux/jump_label.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 680c98b2f41c..a7f90117cf7d 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -166,10 +166,10 @@ extern void static_key_disable(struct static_key *key); */ #define STATIC_KEY_INIT_TRUE \ { .enabled = { 1 }, \ - .entries = (void *)JUMP_TYPE_TRUE } + { .entries = (void *)JUMP_TYPE_TRUE } } #define STATIC_KEY_INIT_FALSE \ { .enabled = { 0 }, \ - .entries = (void *)JUMP_TYPE_FALSE } + { .entries = (void *)JUMP_TYPE_FALSE } } #else /* !HAVE_JUMP_LABEL */ From b17ef2ed624aa7c1f68ed11acd16ecbf80fe01d7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 2 Mar 2017 17:28:45 -0500 Subject: [PATCH 0529/1299] jump_label: Add comment about initialization order for anonymous unions Commit 3821fd35b58d ("jump_label: Reduce the size of struct static_key") broke old compilers that could not handle static initialization of anonymous unions. Boris fixed it with a patch that added brackets around the static initializer. But this creates a dependency between those initializers and the structure's order of its fields. Document this dependency in case new fields are added to struct static_key in the future. Noted-by: Boris Ostrovsky Suggested-by: Chris Mason Signed-off-by: Steven Rostedt (VMware) --- include/linux/jump_label.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index a7f90117cf7d..28e04a33535a 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -90,6 +90,13 @@ extern bool static_key_initialized; struct static_key { atomic_t enabled; /* + * Note: + * To make anonymous unions work with old compilers, the static + * initialization of them requires brackets. This creates a dependency + * on the order of the struct with the initializers. If any fields + * are added, STATIC_KEY_INIT_TRUE and STATIC_KEY_INIT_FALSE may need + * to be modified. + * * bit 0 => 1 if key is initially true * 0 if initially false * bit 1 => 1 if points to struct static_key_mod From bf7165cfa23695c51998231c4efa080fe1d3548d Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Wed, 28 Sep 2016 22:55:54 -0400 Subject: [PATCH 0530/1299] tracing: Add #undef to fix compile error There are several trace include files that define TRACE_INCLUDE_FILE. Include several of them in the same .c file (as I currently have in some code I am working on), and the compile will blow up with a "warning: "TRACE_INCLUDE_FILE" redefined #define TRACE_INCLUDE_FILE syscalls" Every other include file in include/trace/events/ avoids that issue by having a #undef TRACE_INCLUDE_FILE before the #define; syscalls.h should have one, too. Link: http://lkml.kernel.org/r/20160928225554.13bd7ac6@annuminas.surriel.com Cc: stable@vger.kernel.org Fixes: b8007ef74222 ("tracing: Separate raw syscall from syscall tracer") Signed-off-by: Rik van Riel Signed-off-by: Steven Rostedt (VMware) --- include/trace/events/syscalls.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h index 14e49c798135..b35533b94277 100644 --- a/include/trace/events/syscalls.h +++ b/include/trace/events/syscalls.h @@ -1,5 +1,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM raw_syscalls +#undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE syscalls #if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ) From 65a50c656276b0846bea09dd011c0a3d35b77f3e Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Thu, 2 Mar 2017 16:12:15 -0800 Subject: [PATCH 0531/1299] ftrace/graph: Add ftrace_graph_max_depth kernel parameter Early trace callgraphs can be extremely large on systems with several seconds of boot time. The max_depth parameter limits how deep the graph trace goes and reduces the output size. This parameter is the same as the max_graph_depth file in tracefs. Link: http://lkml.kernel.org/r/1488499935-23216-1-git-send-email-todd.e.brandt@linux.intel.com Signed-off-by: Todd Brandt [ changed comments about debugfs to tracefs ] Signed-off-by: Steven Rostedt (VMware) --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ kernel/trace/ftrace.c | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 21e2d8863705..1c9016b27ee9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1173,6 +1173,12 @@ functions that can be changed at run time by the set_graph_notrace file in the debugfs tracing directory. + ftrace_graph_max_depth= + [FTRACE] Used with the function graph tracer. This is + the max depth it will trace into a function. This value + can be changed at run time by the max_graph_depth file + in the tracefs tracing directory. default: 0 (no limit) + gamecon.map[2|3]= [HW,JOY] Multisystem joystick and NES/SNES/PSX pad support via parallel port (up to 5 devices per port) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 44122e7a6418..d129ae51329a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4415,6 +4415,15 @@ static int __init set_graph_notrace_function(char *str) } __setup("ftrace_graph_notrace=", set_graph_notrace_function); +static int __init set_graph_max_depth_function(char *str) +{ + if (!str) + return 0; + fgraph_max_depth = simple_strtoul(str, NULL, 0); + return 1; +} +__setup("ftrace_graph_max_depth=", set_graph_max_depth_function); + static void __init set_ftrace_early_graph(char *buf, int enable) { int ret; From 54af56dbf883f65396917a88e4ce8c37f9d52009 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:08 +0200 Subject: [PATCH 0532/1299] drm/i915: Don't mark pdps clear if pdps are not submitted Don't mark pdps clear if never do the necessary actions with the hardware to make them clear. v2: totally get rid of confusing ppgtt bool (Chris) Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-2-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3834a84fe084..c8943c42f9e2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1291,9 +1291,8 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, - unsigned int dispatch_flags) + const unsigned int flags) { - bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); u32 *cs; int ret; @@ -1304,13 +1303,12 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ if (req->ctx->ppgtt && - (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { - if (!i915_vm_is_48bit(&req->ctx->ppgtt->base) && - !intel_vgpu_active(req->i915)) { - ret = intel_logical_ring_emit_pdps(req); - if (ret) - return ret; - } + (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&req->ctx->ppgtt->base) && + !intel_vgpu_active(req->i915)) { + ret = intel_logical_ring_emit_pdps(req); + if (ret) + return ret; req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } @@ -1320,8 +1318,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & - I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | + (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); *cs++ = MI_NOOP; From 1e6437b0e064746a4a2b6b3578c639797cafe4b1 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:09 +0200 Subject: [PATCH 0533/1299] drm/i915/gtt: Prefer i915_vm_is_48bit() over macro If we setup the vm size early, we can use the newly introduced i915_vm_is_48bit() in majority of callsites wanting to know the vm size. As we operate either with 3lvl or 4lvl page table structure, wrap the vm size query inside a function which tells us if 4lvl setup is needed for particular vm, as the following code uses the function names where level is noted. v2: use_4lvl (Chris) Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-3-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 63 +++++++++++++++++------------ 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5299600b4733..9399906e5528 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -548,13 +548,18 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) pdp->page_directory = NULL; } +static inline bool use_4lvl(const struct i915_address_space *vm) +{ + return i915_vm_is_48bit(vm); +} + static struct i915_page_directory_pointer * alloc_pdp(struct i915_address_space *vm) { struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); + WARN_ON(!use_4lvl(vm)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) @@ -582,10 +587,12 @@ static void free_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(vm->i915)) { - cleanup_px(vm, pdp); - kfree(pdp); - } + + if (!use_4lvl(vm)) + return; + + cleanup_px(vm, pdp); + kfree(pdp); } static void gen8_initialize_pdp(struct i915_address_space *vm, @@ -739,7 +746,7 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, gen8_ppgtt_pdpe_t *vaddr; pdp->page_directory[pdpe] = pd; - if (!USES_FULL_48BIT_PPGTT(vm->i915)) + if (!use_4lvl(vm)) return; vaddr = kmap_atomic_px(pdp); @@ -804,7 +811,7 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp; unsigned int pml4e; - GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); + GEM_BUG_ON(!use_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { GEM_BUG_ON(pdp == vm->scratch_pdp); @@ -968,7 +975,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) goto free_pt; } - if (USES_FULL_48BIT_PPGTT(dev)) { + if (use_4lvl(vm)) { vm->scratch_pdp = alloc_pdp(vm); if (IS_ERR(vm->scratch_pdp)) { ret = PTR_ERR(vm->scratch_pdp); @@ -978,7 +985,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) gen8_initialize_pt(vm, vm->scratch_pt); gen8_initialize_pd(vm, vm->scratch_pd); - if (USES_FULL_48BIT_PPGTT(dev_priv)) + if (use_4lvl(vm)) gen8_initialize_pdp(vm, vm->scratch_pdp); return 0; @@ -995,12 +1002,13 @@ static int gen8_init_scratch(struct i915_address_space *vm) static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) { + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; enum vgt_g2v_type msg; - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - u64 daddr = px_dma(&ppgtt->pml4); + if (use_4lvl(vm)) { + const u64 daddr = px_dma(&ppgtt->pml4); I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); @@ -1009,7 +1017,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); } else { for (i = 0; i < GEN8_LEGACY_PDPES; i++) { - u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); @@ -1026,7 +1034,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) static void gen8_free_scratch(struct i915_address_space *vm) { - if (USES_FULL_48BIT_PPGTT(vm->i915)) + if (use_4lvl(vm)) free_pdp(vm, vm->scratch_pdp); free_pd(vm, vm->scratch_pd); free_pt(vm, vm->scratch_pt); @@ -1072,10 +1080,10 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (!USES_FULL_48BIT_PPGTT(vm->i915)) - gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); - else + if (use_4lvl(vm)) gen8_ppgtt_cleanup_4lvl(ppgtt); + else + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); gen8_free_scratch(vm); } @@ -1258,9 +1266,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); u64 start = 0, length = ppgtt->base.total; - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); - } else { + if (use_4lvl(vm)) { u64 pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; @@ -1272,6 +1278,8 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) seq_printf(m, " PML4E #%llu\n", pml4e); gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); } + } else { + gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } } @@ -1316,12 +1324,19 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; int ret; + ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ? + 1ULL << 48 : + 1ULL << 32; + ret = gen8_init_scratch(&ppgtt->base); - if (ret) + if (ret) { + ppgtt->base.total = 0; return ret; + } ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; @@ -1334,14 +1349,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) ppgtt->base.pt_kmap_wc = true; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { + if (use_4lvl(vm)) { ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) goto free_scratch; gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); - ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; @@ -1352,7 +1366,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) goto free_scratch; - ppgtt->base.total = 1ULL << 32; ppgtt->switch_mm = gen8_legacy_mm_switch; if (intel_vgpu_active(dev_priv)) { From e71677698b27f3b460d44f67389b43c4353522dd Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:10 +0200 Subject: [PATCH 0534/1299] drm/i915: Avoid using word legacy with ppgtt The term legacy is subjective. Use 3lvl and 4lvl where appropriate. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-4-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 18 ++++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 21 +++++++++++---------- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9399906e5528..9c9a03ee44b9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -641,12 +641,12 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, return 0; } -static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { int i, ret; - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); ret = gen8_write_pdp(req, i, pd_daddr); @@ -657,8 +657,8 @@ static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); } @@ -1016,7 +1016,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); } else { - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); @@ -1356,8 +1356,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); - ppgtt->switch_mm = gen8_48b_mm_switch; - + ppgtt->switch_mm = gen8_mm_switch_4lvl; ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; @@ -1366,8 +1365,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) goto free_scratch; - ppgtt->switch_mm = gen8_legacy_mm_switch; - if (intel_vgpu_active(dev_priv)) { ret = gen8_preallocate_top_level_pdp(ppgtt); if (ret) { @@ -1376,6 +1373,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } } + ppgtt->switch_mm = gen8_mm_switch_3lvl; ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 562c6329aff6..fb15684c1d83 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -101,13 +101,20 @@ typedef u64 gen8_ppgtt_pml4e_t; #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) -/* GEN8 legacy style address is defined as a 3 level page table: +/* GEN8 32b style address is defined as a 3 level page table: * 31:30 | 29:21 | 20:12 | 11:0 * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. - * - * GEN8 48b legacy style address is defined as a 4 level page table: + */ +#define GEN8_3LVL_PDPES 4 +#define GEN8_PDE_SHIFT 21 +#define GEN8_PDE_MASK 0x1ff +#define GEN8_PTE_SHIFT 12 +#define GEN8_PTE_MASK 0x1ff +#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) + +/* GEN8 48b style address is defined as a 4 level page table: * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 * PML4E | PDPE | PDE | PTE | offset */ @@ -118,12 +125,6 @@ typedef u64 gen8_ppgtt_pml4e_t; /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ #define GEN8_PDPE_MASK 0x1ff -#define GEN8_PDE_SHIFT 21 -#define GEN8_PDE_MASK 0x1ff -#define GEN8_PTE_SHIFT 12 -#define GEN8_PTE_MASK 0x1ff -#define GEN8_LEGACY_PDPES 4 -#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -466,7 +467,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm) if (i915_vm_is_48bit(vm)) return GEN8_PML4ES_PER_PML4; - return GEN8_LEGACY_PDPES; + return GEN8_3LVL_PDPES; } /* Equivalent to the gen6 version, For each pde iterates over every pde diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8943c42f9e2..89f38e7def9f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1265,7 +1265,7 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; struct intel_engine_cs *engine = req->engine; - const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; + const int num_lri_cmds = GEN8_3LVL_PDPES * 2; u32 *cs; int i; @@ -1274,7 +1274,7 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) return PTR_ERR(cs); *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds); - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i)); From 054b9acde6f9b7bd5e1e9739777d1cddda048305 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:11 +0200 Subject: [PATCH 0535/1299] drm/i915/gtt: Setup vm callbacks late If we manage to tangle errorpaths and get call to callbacks, it is better to defensively keep them as null until object init is finished so that we get clean null deref on callsite, instead of more cryptic wreckage with partly initialized vm objects. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-5-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9c9a03ee44b9..cee9c4fec52a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1338,11 +1338,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; } - ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.bind_vma = ppgtt_bind_vma; - ppgtt->debug_dump = gen8_dump_ppgtt; - /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ @@ -1382,6 +1377,11 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, true); + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + ppgtt->base.unbind_vma = ppgtt_unbind_vma; + ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->debug_dump = gen8_dump_ppgtt; + return 0; free_scratch: @@ -1808,13 +1808,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.clear_range = gen6_ppgtt_clear_range; - ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.bind_vma = ppgtt_bind_vma; - ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; - ppgtt->debug_dump = gen6_dump_ppgtt; gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); gen6_write_page_range(ppgtt, 0, ppgtt->base.total); @@ -1825,6 +1819,13 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; } + ppgtt->base.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->base.unbind_vma = ppgtt_unbind_vma; + ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.cleanup = gen6_ppgtt_cleanup; + ppgtt->debug_dump = gen6_dump_ppgtt; + DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); From e9728bd888e1fa2d5c1f77de8817a96eeba31747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:51 +0200 Subject: [PATCH 0536/1299] drm/i915: Track visible planes in a bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a lot of place we wish to know which planes on the crtc are actually visible, or how many of them there are. Let's start tracking that in a bitmask in the crtc state. We already track enabled planes (ie. ones with an fb and crtc specified by the user) but that's not quite the same thing as enabled planes may still end up being invisible due to clipping and whatnot. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_atomic_plane.c | 6 ++ drivers/gpu/drm/i915/intel_display.c | 74 ++++++++++++++++------- drivers/gpu/drm/i915/intel_drv.h | 3 + 3 files changed, 60 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 41fd94e62d3c..1eaf840cf9ff 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -189,6 +189,12 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, if (ret) return ret; + /* FIXME pre-g4x don't work like this */ + if (intel_state->base.visible) + crtc_state->active_planes |= BIT(intel_plane->id); + else + crtc_state->active_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(&crtc_state->base, state); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b2b8a3b9f982..c4301e0df022 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2670,6 +2670,29 @@ update_state_fb(struct drm_plane *plane) drm_framebuffer_reference(plane->state->fb); } +static void +intel_set_plane_visible(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state, + bool visible) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + + plane_state->base.visible = visible; + + /* FIXME pre-g4x don't work like this */ + if (visible) { + crtc_state->base.plane_mask |= BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes |= BIT(plane->id); + } else { + crtc_state->base.plane_mask &= ~BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes &= ~BIT(plane->id); + } + + DRM_DEBUG_KMS("%s active planes 0x%x\n", + crtc_state->base.crtc->name, + crtc_state->active_planes); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2727,8 +2750,9 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - plane_state->visible = false; - crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + false); intel_pre_disable_primary_noatomic(&intel_crtc->base); intel_plane->disable_plane(primary, &intel_crtc->base); @@ -2768,7 +2792,11 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, drm_framebuffer_reference(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; - intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); + + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + true); + atomic_or(to_intel_plane(primary)->frontbuffer_bit, &obj->frontbuffer_bits); } @@ -10756,11 +10784,11 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); struct drm_crtc *crtc = crtc_state->crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_plane *plane = plane_state->plane; + struct intel_plane *plane = to_intel_plane(plane_state->plane); struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane_state *old_plane_state = - to_intel_plane_state(plane->state); + to_intel_plane_state(plane->base.state); bool mode_changed = needs_modeset(crtc_state); bool was_crtc_enabled = crtc->state->active; bool is_crtc_enabled = crtc_state->active; @@ -10768,7 +10796,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct drm_framebuffer *fb = plane_state->fb; int ret; - if (INTEL_GEN(dev_priv) >= 9 && plane->type != DRM_PLANE_TYPE_CURSOR) { + if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) { ret = skl_update_scaler_plane( to_intel_crtc_state(crtc_state), to_intel_plane_state(plane_state)); @@ -10792,8 +10820,10 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * per-plane wm computation to the .check_plane() hook, and * only combine the results from all planes in the current place? */ - if (!is_crtc_enabled) + if (!is_crtc_enabled) { plane_state->visible = visible = false; + to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id); + } if (!was_visible && !visible) return 0; @@ -10805,13 +10835,12 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, turn_on = visible && (!was_visible || mode_changed); DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n", - intel_crtc->base.base.id, - intel_crtc->base.name, - plane->base.id, plane->name, + intel_crtc->base.base.id, intel_crtc->base.name, + plane->base.base.id, plane->base.name, fb ? fb->base.id : -1); DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", - plane->base.id, plane->name, + plane->base.base.id, plane->base.name, was_visible, visible, turn_off, turn_on, mode_changed); @@ -10819,15 +10848,15 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, pipe_config->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (turn_off) { pipe_config->update_wm_post = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; - } else if (intel_wm_need_update(plane, plane_state)) { + } else if (intel_wm_need_update(&plane->base, plane_state)) { /* FIXME bollocks */ pipe_config->update_wm_pre = true; pipe_config->update_wm_post = true; @@ -10839,7 +10868,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, to_intel_crtc_state(crtc_state)->wm.need_postvbl_update = true; if (visible || was_visible) - pipe_config->fb_bits |= to_intel_plane(plane)->frontbuffer_bit; + pipe_config->fb_bits |= plane->frontbuffer_bit; /* * WaCxSRDisabledForSpriteScaling:ivb @@ -10847,7 +10876,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * cstate->update_wm was already set above, so this flag will * take effect when we commit and program watermarks. */ - if (plane->type == DRM_PLANE_TYPE_OVERLAY && IS_IVYBRIDGE(dev_priv) && + if (plane->id == PLANE_SPRITE0 && IS_IVYBRIDGE(dev_priv) && needs_scaling(to_intel_plane_state(plane_state)) && !needs_scaling(old_plane_state)) pipe_config->disable_lp_wm = true; @@ -15260,15 +15289,14 @@ static bool primary_get_hw_state(struct intel_plane *plane) /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct drm_plane *primary = crtc->base.primary; - struct intel_plane_state *plane_state = - to_intel_plane_state(primary->state); + struct intel_plane *primary = to_intel_plane(crtc->base.primary); + bool visible; - plane_state->base.visible = crtc->active && - primary_get_hw_state(to_intel_plane(primary)); + visible = crtc->active && primary_get_hw_state(primary); - if (plane_state->base.visible) - crtc->base.state->plane_mask |= 1 << drm_plane_index(primary); + intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), + to_intel_plane_state(primary->base.state), + visible); } static void intel_modeset_readout_hw_state(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9434c293a581..708311837faf 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -695,6 +695,9 @@ struct intel_crtc_state { /* Gamma mode programmed on the pipe */ uint32_t gamma_mode; + + /* bitmask of visible planes (enum plane_id) */ + u8 active_planes; }; struct vlv_wm_state { From f07d43d2da1cbf4f335359a5fe39f773133c2be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:52 +0200 Subject: [PATCH 0537/1299] drm/i915: Track plane fifo sizes under intel_crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the plane fifo sizes under intel_crtc instead of under each intel_plane. Avoids looping over the planes in a bunch of places, and later we'll move this tracking into the crtc state properly. v2: Nuke intel_plane_wm_parameters (Maarten) Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 32 ++------- drivers/gpu/drm/i915/intel_pm.c | 115 +++++++++++++------------------ 2 files changed, 54 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 708311837faf..0ffe527e1099 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -709,6 +709,10 @@ struct vlv_wm_state { bool cxsr; }; +struct vlv_fifo_state { + uint16_t plane[I915_MAX_PLANES]; +}; + struct intel_crtc { struct drm_crtc base; enum pipe pipe; @@ -758,6 +762,8 @@ struct intel_crtc { /* allow CxSR on this pipe */ bool cxsr_allowed; + + struct vlv_fifo_state fifo_state; } wm; int scanline_offset; @@ -775,25 +781,6 @@ struct intel_crtc { struct vlv_wm_state wm_state; }; -struct intel_plane_wm_parameters { - uint32_t horiz_pixels; - uint32_t vert_pixels; - /* - * For packed pixel formats: - * bytes_per_pixel - holds bytes per pixel - * For planar pixel formats: - * bytes_per_pixel - holds bytes per pixel for uv-plane - * y_bytes_per_pixel - holds bytes per pixel for y-plane - */ - uint8_t bytes_per_pixel; - uint8_t y_bytes_per_pixel; - bool enabled; - bool scaled; - u64 tiling; - unsigned int rotation; - uint16_t fifo_size; -}; - struct intel_plane { struct drm_plane base; u8 plane; @@ -803,13 +790,6 @@ struct intel_plane { int max_downscale; uint32_t frontbuffer_bit; - /* Since we need to change the watermarks before/after - * enabling/disabling the planes, we need to store the parameters here - * as the other pieces of the struct may not reflect the values we want - * for the watermark calculations. Currently only Haswell uses this. - */ - struct intel_plane_wm_parameters wm; - /* * NOTE: Do not place new plane state fields here (e.g., when adding * new plane properties). New runtime state should now be placed in diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 50e672f84276..7047b35d0e1d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -415,15 +415,14 @@ static const int pessimal_latency_ns = 5000; #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) -static int vlv_get_fifo_size(struct intel_plane *plane) +static void vlv_get_fifo_size(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - int sprite0_start, sprite1_start, size; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + enum pipe pipe = crtc->pipe; + int sprite0_start, sprite1_start; - if (plane->id == PLANE_CURSOR) - return 63; - - switch (plane->pipe) { + switch (pipe) { uint32_t dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ(DSPARB); @@ -444,26 +443,21 @@ static int vlv_get_fifo_size(struct intel_plane *plane) sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); break; default: - return 0; + MISSING_CASE(pipe); + return; } - switch (plane->id) { - case PLANE_PRIMARY: - size = sprite0_start; - break; - case PLANE_SPRITE0: - size = sprite1_start - sprite0_start; - break; - case PLANE_SPRITE1: - size = 512 - 1 - sprite1_start; - break; - default: - return 0; - } + fifo_state->plane[PLANE_PRIMARY] = sprite0_start; + fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; + fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; + fifo_state->plane[PLANE_CURSOR] = 63; - DRM_DEBUG_KMS("%s FIFO size: %d\n", plane->base.name, size); - - return size; + DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n", + pipe_name(pipe), + fifo_state->plane[PLANE_PRIMARY], + fifo_state->plane[PLANE_SPRITE0], + fifo_state->plane[PLANE_SPRITE1], + fifo_state->plane[PLANE_CURSOR]); } static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) @@ -1041,8 +1035,9 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, static void vlv_compute_fifo(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; unsigned int total_rate = 0; const int fifo_size = 512 - 1; @@ -1052,7 +1047,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + if (plane->id == PLANE_CURSOR) continue; if (state->base.visible) { @@ -1066,19 +1061,19 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) to_intel_plane_state(plane->base.state); unsigned int rate; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { - plane->wm.fifo_size = 63; + if (plane->id == PLANE_CURSOR) { + fifo_state->plane[plane->id] = 63; continue; } if (!state->base.visible) { - plane->wm.fifo_size = 0; + fifo_state->plane[plane->id] = 0; continue; } rate = state->base.fb->format->cpp[0]; - plane->wm.fifo_size = fifo_size * rate / total_rate; - fifo_left -= plane->wm.fifo_size; + fifo_state->plane[plane->id] = fifo_size * rate / total_rate; + fifo_left -= fifo_state->plane[plane->id]; } fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); @@ -1090,16 +1085,16 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) if (fifo_left == 0) break; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + if (plane->id == PLANE_CURSOR) continue; /* give it all to the first plane if none are active */ - if (plane->wm.fifo_size == 0 && + if (fifo_state->plane[plane->id] == 0 && wm_state->num_active_planes) continue; plane_extra = min(fifo_extra, fifo_left); - plane->wm.fifo_size += plane_extra; + fifo_state->plane[plane->id] += plane_extra; fifo_left -= plane_extra; } @@ -1121,9 +1116,10 @@ static void vlv_invert_wms(struct intel_crtc *crtc) for (level = 0; level < wm_state->num_levels; level++) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - struct intel_plane *plane; + enum plane_id plane_id; wm_state->sr[level].plane = vlv_invert_wm_value(wm_state->sr[level].plane, @@ -1132,10 +1128,10 @@ static void vlv_invert_wms(struct intel_crtc *crtc) vlv_invert_wm_value(wm_state->sr[level].cursor, 63); - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - wm_state->wm[level].plane[plane->id] = - vlv_invert_wm_value(wm_state->wm[level].plane[plane->id], - plane->wm.fifo_size); + for_each_plane_id_on_crtc(crtc, plane_id) { + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(wm_state->wm[level].plane[plane_id], + fifo_state->plane[plane_id]); } } } @@ -1144,6 +1140,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct intel_plane *plane; int level; @@ -1170,7 +1167,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) /* normal watermarks */ for (level = 0; level < wm_state->num_levels; level++) { int wm = vlv_compute_wm_level(crtc->config, state, level); - int max_wm = plane->wm.fifo_size; + int max_wm = fifo_state->plane[plane->id]; /* hack */ if (WARN_ON(level == 0 && wm > max_wm)) @@ -1214,32 +1211,16 @@ static void vlv_compute_wm(struct intel_crtc *crtc) static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *plane; - int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + int sprite0_start, sprite1_start, fifo_size; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - switch (plane->id) { - case PLANE_PRIMARY: - sprite0_start = plane->wm.fifo_size; - break; - case PLANE_SPRITE0: - sprite1_start = sprite0_start + plane->wm.fifo_size; - break; - case PLANE_SPRITE1: - fifo_size = sprite1_start + plane->wm.fifo_size; - break; - case PLANE_CURSOR: - WARN_ON(plane->wm.fifo_size != 63); - break; - default: - MISSING_CASE(plane->id); - break; - } - } + sprite0_start = fifo_state->plane[PLANE_PRIMARY]; + sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; + fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; - WARN_ON(fifo_size != 512 - 1); + WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); + WARN_ON(fifo_size != 511); DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", pipe_name(crtc->pipe), sprite0_start, @@ -4531,14 +4512,14 @@ void vlv_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct vlv_wm_values *wm = &dev_priv->wm.vlv; - struct intel_plane *plane; + struct intel_crtc *crtc; enum pipe pipe; u32 val; vlv_read_wm_values(dev_priv, wm); - for_each_intel_plane(dev, plane) - plane->wm.fifo_size = vlv_get_fifo_size(plane); + for_each_intel_crtc(dev, crtc) + vlv_get_fifo_size(crtc); wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; From 7eb4941f048f317b59d5bd5683baf76b440dc891 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:53 +0200 Subject: [PATCH 0538/1299] drm/i915: Move vlv wms from crtc->wm_state to crtc->wm.active.vlv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In an effort to make the vlv/chv wm code look and behave more like the ilk+ code, let's move the current active wms next to the corresponding ilk wms. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 3 +-- drivers/gpu/drm/i915/intel_pm.c | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0ffe527e1099..8b1900786606 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -758,6 +758,7 @@ struct intel_crtc { /* watermarks currently being used */ union { struct intel_pipe_wm ilk; + struct vlv_wm_state vlv; } active; /* allow CxSR on this pipe */ @@ -777,8 +778,6 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; - - struct vlv_wm_state wm_state; }; struct intel_plane { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7047b35d0e1d..b06f13fa921f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1035,7 +1035,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, static void vlv_compute_fifo(struct intel_crtc *crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; @@ -1111,7 +1111,7 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) static void vlv_invert_wms(struct intel_crtc *crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; int level; for (level = 0; level < wm_state->num_levels; level++) { @@ -1139,7 +1139,7 @@ static void vlv_invert_wms(struct intel_crtc *crtc) static void vlv_compute_wm(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct intel_plane *plane; int level; @@ -1302,7 +1302,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->cxsr = true; for_each_intel_crtc(&dev_priv->drm, crtc) { - const struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; if (!crtc->active) continue; @@ -1321,7 +1321,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->level = VLV_WM_LEVEL_PM2; for_each_intel_crtc(&dev_priv->drm, crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; enum pipe pipe = crtc->pipe; if (!crtc->active) From 855c79f5212acced123c1a6be3c12601f45e8da9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:54 +0200 Subject: [PATCH 0539/1299] drm/i915: Plop vlv wm state into crtc_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocate the vlv/chv wm state to live under intel_crtc_state. Note that for now this just behaves as a temporary storage. But it'll be easier to conver the thing over to properly pre-computing the state when it's already in the right place. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 30 +++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_pm.c | 32 ++++++++++++++++---------------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8b1900786606..6d3323b9204b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -492,6 +492,22 @@ struct skl_pipe_wm { uint32_t linetime; }; +enum vlv_wm_level { + VLV_WM_LEVEL_PM2, + VLV_WM_LEVEL_PM5, + VLV_WM_LEVEL_DDR_DVFS, + NUM_VLV_WM_LEVELS, +}; + +struct vlv_wm_state { + struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; + struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; + uint8_t num_active_planes; + uint8_t num_levels; + uint8_t level; + bool cxsr; +}; + struct intel_crtc_wm_state { union { struct { @@ -516,6 +532,11 @@ struct intel_crtc_wm_state { struct skl_pipe_wm optimal; struct skl_ddb_entry ddb; } skl; + + struct { + /* optimal watermarks (inverted) */ + struct vlv_wm_state optimal; + } vlv; }; /* @@ -700,15 +721,6 @@ struct intel_crtc_state { u8 active_planes; }; -struct vlv_wm_state { - struct vlv_pipe_wm wm[3]; - struct vlv_sr_wm sr[3]; - uint8_t num_active_planes; - uint8_t num_levels; - uint8_t level; - bool cxsr; -}; - struct vlv_fifo_state { uint16_t plane[I915_MAX_PLANES]; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b06f13fa921f..dfa85ec35482 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -957,12 +957,6 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, #undef FW_WM_VLV -enum vlv_wm_level { - VLV_WM_LEVEL_PM2, - VLV_WM_LEVEL_PM5, - VLV_WM_LEVEL_DDR_DVFS, -}; - /* latency must be in 0.1us units. */ static unsigned int vlv_wm_method2(unsigned int pixel_rate, unsigned int pipe_htotal, @@ -1033,9 +1027,10 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } -static void vlv_compute_fifo(struct intel_crtc *crtc) +static void vlv_compute_fifo(struct intel_crtc_state *crtc_state) { - struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; @@ -1109,9 +1104,10 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) return fifo_size - wm; } -static void vlv_invert_wms(struct intel_crtc *crtc) +static void vlv_invert_wms(struct intel_crtc_state *crtc_state) { - struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; int level; for (level = 0; level < wm_state->num_levels; level++) { @@ -1136,10 +1132,11 @@ static void vlv_invert_wms(struct intel_crtc *crtc) } } -static void vlv_compute_wm(struct intel_crtc *crtc) +static void vlv_compute_wm(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct intel_plane *plane; int level; @@ -1151,7 +1148,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) wm_state->num_active_planes = 0; - vlv_compute_fifo(crtc); + vlv_compute_fifo(crtc_state); if (wm_state->num_active_planes != 1) wm_state->cxsr = false; @@ -1166,7 +1163,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) /* normal watermarks */ for (level = 0; level < wm_state->num_levels; level++) { - int wm = vlv_compute_wm_level(crtc->config, state, level); + int wm = vlv_compute_wm_level(crtc_state, state, level); int max_wm = fifo_state->plane[plane->id]; /* hack */ @@ -1203,7 +1200,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); } - vlv_invert_wms(crtc); + vlv_invert_wms(crtc_state); } #define VLV_FIFO(plane, value) \ @@ -1351,11 +1348,14 @@ static bool is_enabling(int old, int new, int threshold) static void vlv_update_wm(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); enum pipe pipe = crtc->pipe; struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; struct vlv_wm_values new_wm = {}; - vlv_compute_wm(crtc); + vlv_compute_wm(crtc_state); + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; vlv_merge_wm(dev_priv, &new_wm); if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { From 814e7f0bf7b7d5805a5ef030348c25647103b6a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:55 +0200 Subject: [PATCH 0540/1299] drm/i915: Plop vlv/chv fifo sizes into crtc state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the vlv/chv FIFO size tracking into the crtc_state. As with the wms for now this just acts as temporary storage. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 12 ++++++------ drivers/gpu/drm/i915/intel_pm.c | 26 +++++++++++++++----------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6d3323b9204b..26b0ce0837df 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -508,6 +508,10 @@ struct vlv_wm_state { bool cxsr; }; +struct vlv_fifo_state { + u16 plane[I915_MAX_PLANES]; +}; + struct intel_crtc_wm_state { union { struct { @@ -536,6 +540,8 @@ struct intel_crtc_wm_state { struct { /* optimal watermarks (inverted) */ struct vlv_wm_state optimal; + /* display FIFO split */ + struct vlv_fifo_state fifo_state; } vlv; }; @@ -721,10 +727,6 @@ struct intel_crtc_state { u8 active_planes; }; -struct vlv_fifo_state { - uint16_t plane[I915_MAX_PLANES]; -}; - struct intel_crtc { struct drm_crtc base; enum pipe pipe; @@ -775,8 +777,6 @@ struct intel_crtc { /* allow CxSR on this pipe */ bool cxsr_allowed; - - struct vlv_fifo_state fifo_state; } wm; int scanline_offset; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dfa85ec35482..063bb531703c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -415,10 +415,11 @@ static const int pessimal_latency_ns = 5000; #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) -static void vlv_get_fifo_size(struct intel_crtc *crtc) +static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; enum pipe pipe = crtc->pipe; int sprite0_start, sprite1_start; @@ -1031,7 +1032,7 @@ static void vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; unsigned int total_rate = 0; @@ -1108,11 +1109,12 @@ static void vlv_invert_wms(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; int level; for (level = 0; level < wm_state->num_levels; level++) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; enum plane_id plane_id; @@ -1137,7 +1139,8 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; struct intel_plane *plane; int level; @@ -1206,10 +1209,12 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) #define VLV_FIFO(plane, value) \ (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) -static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) +static void vlv_pipe_set_fifo_size(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; int sprite0_start, sprite1_start, fifo_size; sprite0_start = fifo_state->plane[PLANE_PRIMARY]; @@ -1360,8 +1365,7 @@ static void vlv_update_wm(struct intel_crtc *crtc) if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); - + vlv_pipe_set_fifo_size(crtc_state); return; } @@ -1375,7 +1379,7 @@ static void vlv_update_wm(struct intel_crtc *crtc) _intel_set_memory_cxsr(dev_priv, false); /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); + vlv_pipe_set_fifo_size(crtc_state); vlv_write_wm_values(dev_priv, &new_wm); @@ -4519,7 +4523,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) vlv_read_wm_values(dev_priv, wm); for_each_intel_crtc(dev, crtc) - vlv_get_fifo_size(crtc); + vlv_get_fifo_size(to_intel_crtc_state(crtc->base.state)); wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; From 5012e604891a8ac3eafc7f59335dc688a6c388f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:56 +0200 Subject: [PATCH 0541/1299] drm/i915: Compute VLV/CHV FIFO sizes based on the PM2 watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's compute the watermarks first and the FIFO size second. This way we can make sure the FIFO split is the most accommodating to the watermarks. Previously we could have potentially computed a FIFO split that couldn't accommodate the PM2 watermarks simply due to a bad split even if the total FIFO size would have been sufficient. It'll also allow us to avoid recomputing the wms for all planes whenever the FIFO split would change. Thus we don't have to add any extra planes to the state when the FIFO needs to be repartitioned. To help with this we'll keep around copies of the non-inverted watermarks in the crtc state. For now that doesn't help too much, but once we start to do the watermark computation only for the planes that change we'll need the non-inverted values around for the other planes. v2: s/noninverted/raw/ for consistency with other platforms Fix the memset() of the "raw" watermarks Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_pm.c | 111 ++++++++++++++----------------- 2 files changed, 53 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 26b0ce0837df..6f6c29cdae73 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -538,6 +538,8 @@ struct intel_crtc_wm_state { } skl; struct { + /* "raw" watermarks (not inverted) */ + struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; /* optimal watermarks (inverted) */ struct vlv_wm_state optimal; /* display FIFO split */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 063bb531703c..a6040abffceb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1028,73 +1028,70 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } -static void vlv_compute_fifo(struct intel_crtc_state *crtc_state) +static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; - struct drm_device *dev = crtc->base.dev; - struct intel_plane *plane; - unsigned int total_rate = 0; - const int fifo_size = 512 - 1; + unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + int num_active_planes = hweight32(active_planes); + const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; + unsigned int total_rate; + enum plane_id plane_id; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); + total_rate = raw->plane[PLANE_PRIMARY] + + raw->plane[PLANE_SPRITE0] + + raw->plane[PLANE_SPRITE1]; - if (plane->id == PLANE_CURSOR) - continue; + if (total_rate > fifo_size) + return -EINVAL; - if (state->base.visible) { - wm_state->num_active_planes++; - total_rate += state->base.fb->format->cpp[0]; - } - } + if (total_rate == 0) + total_rate = 1; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); + for_each_plane_id_on_crtc(crtc, plane_id) { unsigned int rate; - if (plane->id == PLANE_CURSOR) { - fifo_state->plane[plane->id] = 63; + if ((active_planes & BIT(plane_id)) == 0) { + fifo_state->plane[plane_id] = 0; continue; } - if (!state->base.visible) { - fifo_state->plane[plane->id] = 0; - continue; - } - - rate = state->base.fb->format->cpp[0]; - fifo_state->plane[plane->id] = fifo_size * rate / total_rate; - fifo_left -= fifo_state->plane[plane->id]; + rate = raw->plane[plane_id]; + fifo_state->plane[plane_id] = fifo_size * rate / total_rate; + fifo_left -= fifo_state->plane[plane_id]; } - fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); + fifo_state->plane[PLANE_CURSOR] = 63; + + fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); /* spread the remainder evenly */ - for_each_intel_plane_on_crtc(dev, crtc, plane) { + for_each_plane_id_on_crtc(crtc, plane_id) { int plane_extra; if (fifo_left == 0) break; - if (plane->id == PLANE_CURSOR) - continue; - - /* give it all to the first plane if none are active */ - if (fifo_state->plane[plane->id] == 0 && - wm_state->num_active_planes) + if ((active_planes & BIT(plane_id)) == 0) continue; plane_extra = min(fifo_extra, fifo_left); - fifo_state->plane[plane->id] += plane_extra; + fifo_state->plane[plane_id] += plane_extra; fifo_left -= plane_extra; } - WARN_ON(fifo_left != 0); + WARN_ON(active_planes != 0 && fifo_left != 0); + + /* give it all to the first plane if none are active */ + if (active_planes == 0) { + WARN_ON(fifo_left != fifo_size); + fifo_state->plane[PLANE_PRIMARY] = fifo_left; + } + + return 0; } static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) @@ -1139,35 +1136,31 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - const struct vlv_fifo_state *fifo_state = - &crtc_state->wm.vlv.fifo_state; struct intel_plane *plane; int level; memset(wm_state, 0, sizeof(*wm_state)); + memset(&crtc_state->wm.vlv.raw, 0, sizeof(crtc_state->wm.vlv.raw)); wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; wm_state->num_levels = dev_priv->wm.max_level + 1; wm_state->num_active_planes = 0; - vlv_compute_fifo(crtc_state); - if (wm_state->num_active_planes != 1) wm_state->cxsr = false; for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - int level; if (!state->base.visible) continue; - /* normal watermarks */ for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; int wm = vlv_compute_wm_level(crtc_state, state, level); - int max_wm = fifo_state->plane[plane->id]; + int max_wm = plane->id == PLANE_CURSOR ? 63 : 511; /* hack */ if (WARN_ON(level == 0 && wm > max_wm)) @@ -1176,25 +1169,23 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) if (wm > max_wm) break; - wm_state->wm[level].plane[plane->id] = wm; + raw->plane[plane->id] = wm; } wm_state->num_levels = level; + } - if (!wm_state->cxsr) - continue; + vlv_compute_fifo(crtc_state); - /* maxfifo watermarks */ - if (plane->id == PLANE_CURSOR) { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].cursor = - wm_state->wm[level].plane[PLANE_CURSOR]; - } else { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].plane = - max(wm_state->sr[level].plane, - wm_state->wm[level].plane[plane->id]); - } + for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + + wm_state->wm[level] = *raw; + + wm_state->sr[level].plane = max3(raw->plane[PLANE_PRIMARY], + raw->plane[PLANE_SPRITE0], + raw->plane[PLANE_SPRITE1]); + wm_state->sr[level].cursor = raw->plane[PLANE_CURSOR]; } /* clear any (partially) filled invalid levels */ From ff32c54ef1ce774ad3640d4635803ad35d64b8e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:57 +0200 Subject: [PATCH 0542/1299] drm/i915: Compute vlv/chv wms the atomic way MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start computing the vlv/chv watermarks the atomic way, from the .compute_pipe_wm() hook. We'll recompute the actual watermarks for only planes that are part of the state, the other planes will keep their watermark from the last time it was computed. And the actual watermark programming will happen from the .initial_watermarks() hook. For now we'll just compute the optimal watermarks, and we'll hook up the intermediate watermarks properly later. The DSPARB registers responsible for the FIFO paritioning are double buffered, so they will be programming from intel_begin_crtc_commit(). v2: s/noninverted/raw/ for consistency with other platforms s/vlv_plane_wm_set/vlv_raw_plane_wm_set/ for clarity Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 8 + drivers/gpu/drm/i915/intel_display.c | 21 +- drivers/gpu/drm/i915/intel_drv.h | 2 - drivers/gpu/drm/i915/intel_pm.c | 308 ++++++++++++++++++--------- 4 files changed, 231 insertions(+), 108 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07bf2e32ffa7..059335f23706 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -526,6 +526,14 @@ struct i915_hotplug { for_each_power_well_rev(__dev_priv, __power_well) \ for_each_if ((__power_well)->domains & (__domain_mask)) +#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \ + (__i)++) \ + for_each_if (plane_state) + struct drm_i915_private; struct i915_mm_struct; struct i915_mmu_object; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c4301e0df022..45a7c7e18e3d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5681,6 +5681,8 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5725,7 +5727,8 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); - intel_update_watermarks(intel_crtc); + dev_priv->display.initial_watermarks(old_intel_state, + pipe_config); intel_enable_pipe(intel_crtc); assert_vblank_disabled(crtc); @@ -5842,6 +5845,9 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, if (!IS_GEN2(dev_priv)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + if (!dev_priv->display.initial_watermarks) + intel_update_watermarks(intel_crtc); } static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) @@ -11265,10 +11271,13 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) static void clear_intel_crtc_state(struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); struct drm_crtc_state tmp_state; struct intel_crtc_scaler_state scaler_state; struct intel_dpll_hw_state dpll_hw_state; struct intel_shared_dpll *shared_dpll; + struct intel_crtc_wm_state wm_state; bool force_thru; /* FIXME: before the switch to atomic started, a new pipe_config was @@ -11281,6 +11290,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; force_thru = crtc_state->pch_pfit.force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + wm_state = crtc_state->wm; memset(crtc_state, 0, sizeof *crtc_state); @@ -11289,6 +11300,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; crtc_state->pch_pfit.force_thru = force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + crtc_state->wm = wm_state; } static int @@ -12801,12 +12814,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) /* * Make sure we don't call initial_watermarks * for ILK-style watermark updates. + * + * No clue what this is supposed to achieve. */ - if (dev_priv->display.atomic_update_watermarks) + if (INTEL_GEN(dev_priv) >= 9) dev_priv->display.initial_watermarks(intel_state, to_intel_crtc_state(crtc->state)); - else - intel_update_watermarks(intel_crtc); } } } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6f6c29cdae73..2f23521f0e00 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -502,9 +502,7 @@ enum vlv_wm_level { struct vlv_wm_state { struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; - uint8_t num_active_planes; uint8_t num_levels; - uint8_t level; bool cxsr; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a6040abffceb..df76c4fd4cf3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1094,6 +1094,28 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) return 0; } +static int vlv_num_wm_levels(struct drm_i915_private *dev_priv) +{ + return dev_priv->wm.max_level + 1; +} + +/* mark all levels starting from 'level' as invalid */ +static void vlv_invalidate_wms(struct intel_crtc *crtc, + struct vlv_wm_state *wm_state, int level) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + for (; level < vlv_num_wm_levels(dev_priv); level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) + wm_state->wm[level].plane[plane_id] = USHRT_MAX; + + wm_state->sr[level].cursor = USHRT_MAX; + wm_state->sr[level].plane = USHRT_MAX; + } +} + static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) { if (wm > fifo_size) @@ -1102,105 +1124,162 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) return fifo_size - wm; } -static void vlv_invert_wms(struct intel_crtc_state *crtc_state) +/* + * Starting from 'level' set all higher + * levels to 'value' in the "raw" watermarks. + */ +static void vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, + int level, enum plane_id plane_id, u16 value) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - const struct vlv_fifo_state *fifo_state = - &crtc_state->wm.vlv.fifo_state; - int level; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + int num_levels = vlv_num_wm_levels(dev_priv); - for (level = 0; level < wm_state->num_levels; level++) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const int sr_fifo_size = - INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - enum plane_id plane_id; + for (; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - wm_state->sr[level].plane = - vlv_invert_wm_value(wm_state->sr[level].plane, - sr_fifo_size); - wm_state->sr[level].cursor = - vlv_invert_wm_value(wm_state->sr[level].cursor, - 63); - - for_each_plane_id_on_crtc(crtc, plane_id) { - wm_state->wm[level].plane[plane_id] = - vlv_invert_wm_value(wm_state->wm[level].plane[plane_id], - fifo_state->plane[plane_id]); - } + raw->plane[plane_id] = value; } } -static void vlv_compute_wm(struct intel_crtc_state *crtc_state) +static void vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + enum plane_id plane_id = plane->id; + int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); + int level; + + if (!plane_state->base.visible) { + vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); + return; + } + + for (level = 0; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + int wm = vlv_compute_wm_level(crtc_state, plane_state, level); + int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; + + /* FIXME just bail */ + if (WARN_ON(level == 0 && wm > max_wm)) + wm = max_wm; + + if (wm > max_wm) + break; + + raw->plane[plane_id] = wm; + } + + /* mark all higher levels as invalid */ + vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); + + DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); +} + +static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, int level) +{ + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + + return raw->plane[plane_id] <= fifo_state->plane[plane_id]; +} + +static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) +{ + return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); +} + +static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_atomic_state *state = + to_intel_atomic_state(crtc_state->base.state); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + int num_active_planes = hweight32(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); + struct intel_plane_state *plane_state; struct intel_plane *plane; - int level; + enum plane_id plane_id; + int level, ret, i; - memset(wm_state, 0, sizeof(*wm_state)); - memset(&crtc_state->wm.vlv.raw, 0, sizeof(crtc_state->wm.vlv.raw)); - - wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; - wm_state->num_levels = dev_priv->wm.max_level + 1; - - wm_state->num_active_planes = 0; - - if (wm_state->num_active_planes != 1) - wm_state->cxsr = false; - - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - struct intel_plane_state *state = + for_each_intel_plane_in_state(state, plane, plane_state, i) { + const struct intel_plane_state *old_plane_state = to_intel_plane_state(plane->base.state); - if (!state->base.visible) + if (plane_state->base.crtc != &crtc->base && + old_plane_state->base.crtc != &crtc->base) continue; - for (level = 0; level < wm_state->num_levels; level++) { - struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - int wm = vlv_compute_wm_level(crtc_state, state, level); - int max_wm = plane->id == PLANE_CURSOR ? 63 : 511; - - /* hack */ - if (WARN_ON(level == 0 && wm > max_wm)) - wm = max_wm; - - if (wm > max_wm) - break; - - raw->plane[plane->id] = wm; - } - - wm_state->num_levels = level; + vlv_plane_wm_compute(crtc_state, plane_state); } - vlv_compute_fifo(crtc_state); + /* initially allow all levels */ + wm_state->num_levels = vlv_num_wm_levels(dev_priv); + /* + * Note that enabling cxsr with no primary/sprite planes + * enabled can wedge the pipe. Hence we only allow cxsr + * with exactly one enabled primary/sprite plane. + */ + wm_state->cxsr = crtc->pipe != PIPE_C && + crtc->wm.cxsr_allowed && num_active_planes == 1; + + ret = vlv_compute_fifo(crtc_state); + if (ret) + return ret; for (level = 0; level < wm_state->num_levels; level++) { - struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - wm_state->wm[level] = *raw; + if (!vlv_crtc_wm_is_valid(crtc_state, level)) + break; - wm_state->sr[level].plane = max3(raw->plane[PLANE_PRIMARY], + for_each_plane_id_on_crtc(crtc, plane_id) { + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); + } + + wm_state->sr[level].plane = + vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY], raw->plane[PLANE_SPRITE0], - raw->plane[PLANE_SPRITE1]); - wm_state->sr[level].cursor = raw->plane[PLANE_CURSOR]; + raw->plane[PLANE_SPRITE1]), + sr_fifo_size); + + wm_state->sr[level].cursor = + vlv_invert_wm_value(raw->plane[PLANE_CURSOR], + 63); } - /* clear any (partially) filled invalid levels */ - for (level = wm_state->num_levels; level < dev_priv->wm.max_level + 1; level++) { - memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); - memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); - } + if (level == 0) + return -EINVAL; - vlv_invert_wms(crtc_state); + /* limit to only levels we can actually handle */ + wm_state->num_levels = level; + + /* invalidate the higher levels */ + vlv_invalidate_wms(crtc, wm_state, level); + + return 0; } #define VLV_FIFO(plane, value) \ (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) -static void vlv_pipe_set_fifo_size(const struct intel_crtc_state *crtc_state) +static void vlv_atomic_update_fifo(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1215,10 +1294,6 @@ static void vlv_pipe_set_fifo_size(const struct intel_crtc_state *crtc_state) WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); WARN_ON(fifo_size != 511); - DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", - pipe_name(crtc->pipe), sprite0_start, - sprite1_start, fifo_size); - spin_lock(&dev_priv->wm.dsparb_lock); switch (crtc->pipe) { @@ -1317,11 +1392,8 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; enum pipe pipe = crtc->pipe; - if (!crtc->active) - continue; - wm->pipe[pipe] = wm_state->wm[wm->level]; - if (wm->cxsr) + if (crtc->active && wm->cxsr) wm->sr = wm_state->sr[wm->level]; wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2; @@ -1341,24 +1413,15 @@ static bool is_enabling(int old, int new, int threshold) return old < threshold && new >= threshold; } -static void vlv_update_wm(struct intel_crtc *crtc) +static void vlv_program_watermarks(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - enum pipe pipe = crtc->pipe; struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; struct vlv_wm_values new_wm = {}; - vlv_compute_wm(crtc_state); - crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; vlv_merge_wm(dev_priv, &new_wm); - if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc_state); + if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) return; - } if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) chv_set_memory_dvfs(dev_priv, false); @@ -1369,17 +1432,8 @@ static void vlv_update_wm(struct intel_crtc *crtc) if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, false); - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc_state); - vlv_write_wm_values(dev_priv, &new_wm); - DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " - "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", - pipe_name(pipe), new_wm.pipe[pipe].plane[PLANE_PRIMARY], new_wm.pipe[pipe].plane[PLANE_CURSOR], - new_wm.pipe[pipe].plane[PLANE_SPRITE0], new_wm.pipe[pipe].plane[PLANE_SPRITE1], - new_wm.sr.plane, new_wm.sr.cursor, new_wm.level, new_wm.cxsr); - if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, true); @@ -1392,6 +1446,18 @@ static void vlv_update_wm(struct intel_crtc *crtc) *old_wm = new_wm; } +static void vlv_initial_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + mutex_lock(&dev_priv->wm.wm_mutex); + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + #define single_plane_enabled(mask) is_power_of_2(mask) static void g4x_update_wm(struct intel_crtc *crtc) @@ -4508,14 +4574,10 @@ void vlv_wm_get_hw_state(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); struct vlv_wm_values *wm = &dev_priv->wm.vlv; struct intel_crtc *crtc; - enum pipe pipe; u32 val; vlv_read_wm_values(dev_priv, wm); - for_each_intel_crtc(dev, crtc) - vlv_get_fifo_size(to_intel_crtc_state(crtc->base.state)); - wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; @@ -4553,13 +4615,53 @@ void vlv_wm_get_hw_state(struct drm_device *dev) mutex_unlock(&dev_priv->rps.hw_lock); } - for_each_pipe(dev_priv, pipe) + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct vlv_wm_state *active = &crtc->wm.active.vlv; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum pipe pipe = crtc->pipe; + enum plane_id plane_id; + int level; + + vlv_get_fifo_size(crtc_state); + + active->num_levels = wm->level + 1; + active->cxsr = wm->cxsr; + + /* FIXME sanitize things more */ + for (level = 0; level < active->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + active->sr[level].plane = wm->sr.plane; + active->sr[level].cursor = wm->sr.cursor; + + for_each_plane_id_on_crtc(crtc, plane_id) { + active->wm[level].plane[plane_id] = + wm->pipe[pipe].plane[plane_id]; + + raw->plane[plane_id] = + vlv_invert_wm_value(active->wm[level].plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_plane_id_on_crtc(crtc, plane_id) + vlv_raw_plane_wm_set(crtc_state, level, + plane_id, USHRT_MAX); + vlv_invalidate_wms(crtc, active, level); + + crtc_state->wm.vlv.optimal = *active; + DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", pipe_name(pipe), wm->pipe[pipe].plane[PLANE_PRIMARY], wm->pipe[pipe].plane[PLANE_CURSOR], wm->pipe[pipe].plane[PLANE_SPRITE0], wm->pipe[pipe].plane[PLANE_SPRITE1]); + } DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); @@ -7717,7 +7819,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); - dev_priv->display.update_wm = vlv_update_wm; + dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; + dev_priv->display.initial_watermarks = vlv_initial_watermarks; + dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), dev_priv->is_ddr3, From 236c48e692458fb55ec96ac1823a176f2bce09f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:58 +0200 Subject: [PATCH 0543/1299] drm/i915: Skip useless watermark/FIFO related work on VLV/CHV when not needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check whether anything relevant has actually change when we compute new watermarks for each plane in the state. If the watermarks for no primary/sprite planes changed we don't have to recompute the FIFO split or reprogram the DSBARB registers. And even the cursor watermarks didn't change we can skip the merge+invert step between all the planes on the pipe as well. v2: s/noninverted/raw/ for consistency with other platforms v3: Drop duplicated vlv_get_fifo_size() call during init Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 71 +++++++++++++++++++++++------ 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index e65818e5d2ab..50fb1f76cc5f 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -99,6 +99,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fb_changed = false; + crtc_state->fifo_changed = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2f23521f0e00..1e05da992aa0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -573,6 +573,7 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fb_changed; /* fb on any of the planes is changed */ + bool fifo_changed; /* FIFO split is changed */ /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index df76c4fd4cf3..39bc4206be7a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1128,30 +1128,35 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) * Starting from 'level' set all higher * levels to 'value' in the "raw" watermarks. */ -static void vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, +static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, int level, enum plane_id plane_id, u16 value) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); int num_levels = vlv_num_wm_levels(dev_priv); + bool dirty = false; for (; level < num_levels; level++) { struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + dirty |= raw->plane[plane_id] != value; raw->plane[plane_id] = value; } + + return dirty; } -static void vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, +static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); enum plane_id plane_id = plane->id; int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); int level; + bool dirty = false; if (!plane_state->base.visible) { - vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); - return; + dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); + goto out; } for (level = 0; level < num_levels; level++) { @@ -1166,17 +1171,22 @@ static void vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, if (wm > max_wm) break; + dirty |= raw->plane[plane_id] != wm; raw->plane[plane_id] = wm; } /* mark all higher levels as invalid */ - vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); + dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); - DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", - plane->base.name, - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); +out: + if (dirty) + DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); + + return dirty; } static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, @@ -1209,10 +1219,12 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) &crtc_state->wm.vlv.fifo_state; int num_active_planes = hweight32(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); + bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); struct intel_plane_state *plane_state; struct intel_plane *plane; enum plane_id plane_id; int level, ret, i; + unsigned int dirty = 0; for_each_intel_plane_in_state(state, plane, plane_state, i) { const struct intel_plane_state *old_plane_state = @@ -1222,7 +1234,37 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) old_plane_state->base.crtc != &crtc->base) continue; - vlv_plane_wm_compute(crtc_state, plane_state); + if (vlv_plane_wm_compute(crtc_state, plane_state)) + dirty |= BIT(plane->id); + } + + /* + * DSPARB registers may have been reset due to the + * power well being turned off. Make sure we restore + * them to a consistent state even if no primary/sprite + * planes are initially active. + */ + if (needs_modeset) + crtc_state->fifo_changed = true; + + if (!dirty) + return 0; + + /* cursor changes don't warrant a FIFO recompute */ + if (dirty & ~BIT(PLANE_CURSOR)) { + const struct intel_crtc_state *old_crtc_state = + to_intel_crtc_state(crtc->base.state); + const struct vlv_fifo_state *old_fifo_state = + &old_crtc_state->wm.vlv.fifo_state; + + ret = vlv_compute_fifo(crtc_state); + if (ret) + return ret; + + if (needs_modeset || + memcmp(old_fifo_state, fifo_state, + sizeof(*fifo_state)) != 0) + crtc_state->fifo_changed = true; } /* initially allow all levels */ @@ -1235,10 +1277,6 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed && num_active_planes == 1; - ret = vlv_compute_fifo(crtc_state); - if (ret) - return ret; - for (level = 0; level < wm_state->num_levels; level++) { const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; @@ -1287,6 +1325,9 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, &crtc_state->wm.vlv.fifo_state; int sprite0_start, sprite1_start, fifo_size; + if (!crtc_state->fifo_changed) + return; + sprite0_start = fifo_state->plane[PLANE_PRIMARY]; sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; From 4841da51a753ca65909441e561236de82b6b48a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:14:59 +0200 Subject: [PATCH 0544/1299] drm/i915: Compute proper intermediate wms for vlv/cvh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the watermark registers arent double buffered on VLV/CHV, we'll need to play around with intermediate watermarks same was as we do on ILK-BDW. The watermark registers on VLV/CHV contain inverted values, so to find the intermediate watermark value we just take the minimum of the active and optimal values. This also means that, unlike ILK-BDW, there's no chance that we'd fail to find a working intermediate watermarks. As long as both the active and optimal watermarks are valid the intermediate watermarks will come out valid as well. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 2 ++ drivers/gpu/drm/i915/intel_pm.c | 59 +++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1e05da992aa0..9bab6124cde8 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -538,6 +538,8 @@ struct intel_crtc_wm_state { struct { /* "raw" watermarks (not inverted) */ struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; + /* intermediate watermarks (inverted) */ + struct vlv_wm_state intermediate; /* optimal watermarks (inverted) */ struct vlv_wm_state optimal; /* display FIFO split */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 39bc4206be7a..824d5b0806b9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1401,6 +1401,45 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, #undef VLV_FIFO +static int vlv_compute_intermediate_wm(struct drm_device *dev, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; + const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; + const struct vlv_wm_state *active = &crtc->wm.active.vlv; + int level; + + intermediate->num_levels = min(optimal->num_levels, active->num_levels); + intermediate->cxsr = optimal->cxsr & active->cxsr; + + for (level = 0; level < intermediate->num_levels; level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + intermediate->wm[level].plane[plane_id] = + min(optimal->wm[level].plane[plane_id], + active->wm[level].plane[plane_id]); + } + + intermediate->sr[level].plane = min(optimal->sr[level].plane, + active->sr[level].plane); + intermediate->sr[level].cursor = min(optimal->sr[level].cursor, + active->sr[level].cursor); + } + + vlv_invalidate_wms(crtc, intermediate, level); + + /* + * If our intermediate WM are identical to the final WM, then we can + * omit the post-vblank programming; only update if it's different. + */ + if (memcmp(intermediate, optimal, sizeof(*intermediate)) == 0) + crtc_state->wm.need_postvbl_update = false; + + return 0; +} + static void vlv_merge_wm(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { @@ -1494,7 +1533,22 @@ static void vlv_initial_watermarks(struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); mutex_lock(&dev_priv->wm.wm_mutex); - crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + +static void vlv_optimize_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + + if (!crtc_state->wm.need_postvbl_update) + return; + + mutex_lock(&dev_priv->wm.wm_mutex); + intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; vlv_program_watermarks(dev_priv); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -4695,6 +4749,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) vlv_invalidate_wms(crtc, active, level); crtc_state->wm.vlv.optimal = *active; + crtc_state->wm.vlv.intermediate = *active; DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", pipe_name(pipe), @@ -7861,7 +7916,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; + dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm; dev_priv->display.initial_watermarks = vlv_initial_watermarks; + dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), From 5eeb798bbe398bbbcb09d7e6851825cb584b5bf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:00 +0200 Subject: [PATCH 0545/1299] drm/i915: Nuke crtc->wm.cxsr_allowed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove crtc->wm.cxsr_allowed and just rely on crtc_state->disable_cxsr instead. This was used only by vlv/chv to indicate whether to enable cxsr in the wm computation. That doesn't really work anymore, and as far as the optimal watermarks go we'll just consider the number of planes and the current pipe, and for the intermediate watermarks we'll also start to consider disable_cxsr which is set appropriately when planes are being enabled/disabled. We'll also flip over the crtc_state->wm.need_postvbl_update setup so that it's the wm code that will set it. Previously the generic code set it up, and then the wm code cleared it again if it thought it's not needed after all. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 44 ++++++++-------------------- drivers/gpu/drm/i915/intel_drv.h | 3 -- drivers/gpu/drm/i915/intel_pm.c | 14 ++++----- 3 files changed, 20 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 45a7c7e18e3d..83f30560217d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4997,8 +4997,6 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); - crtc->wm.cxsr_allowed = true; - if (pipe_config->update_wm_post && pipe_config->base.active) intel_update_watermarks(crtc); @@ -5045,22 +5043,18 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) intel_pre_disable_primary(&crtc->base); } - if (pipe_config->disable_cxsr && HAS_GMCH_DISPLAY(dev_priv)) { - crtc->wm.cxsr_allowed = false; - - /* - * Vblank time updates from the shadow to live plane control register - * are blocked if the memory self-refresh mode is active at that - * moment. So to make sure the plane gets truly disabled, disable - * first the self-refresh mode. The self-refresh enable bit in turn - * will be checked/applied by the HW only at the next frame start - * event which is after the vblank start event, so we need to have a - * wait-for-vblank between disabling the plane and the pipe. - */ - if (old_crtc_state->base.active && - intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, crtc->pipe); - } + /* + * Vblank time updates from the shadow to live plane control register + * are blocked if the memory self-refresh mode is active at that + * moment. So to make sure the plane gets truly disabled, disable + * first the self-refresh mode. The self-refresh enable bit in turn + * will be checked/applied by the HW only at the next frame start + * event which is after the vblank start event, so we need to have a + * wait-for-vblank between disabling the plane and the pipe. + */ + if (HAS_GMCH_DISPLAY(dev_priv) && old_crtc_state->base.active && + pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) + intel_wait_for_vblank(dev_priv, crtc->pipe); /* * IVB workaround: must disable low power watermarks for at least @@ -10868,11 +10862,6 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, pipe_config->update_wm_post = true; } - /* Pre-gen9 platforms need two-step watermark updates */ - if ((pipe_config->update_wm_pre || pipe_config->update_wm_post) && - INTEL_GEN(dev_priv) < 9 && dev_priv->display.optimize_watermarks) - to_intel_crtc_state(crtc_state)->wm.need_postvbl_update = true; - if (visible || was_visible) pipe_config->fb_bits |= plane->frontbuffer_bit; @@ -12616,12 +12605,7 @@ static bool needs_vblank_wait(struct intel_crtc_state *crtc_state) if (crtc_state->update_wm_post) return true; - /* - * cxsr is re-enabled after vblank. - * This is already handled by crtc_state->update_wm_post, - * but added for clarity. - */ - if (crtc_state->disable_cxsr) + if (crtc_state->wm.need_postvbl_update) return true; return false; @@ -13890,8 +13874,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) intel_crtc->cursor_cntl = ~0; intel_crtc->cursor_size = ~0; - intel_crtc->wm.cxsr_allowed = true; - /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9bab6124cde8..f503313f6e65 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -777,9 +777,6 @@ struct intel_crtc { struct intel_pipe_wm ilk; struct vlv_wm_state vlv; } active; - - /* allow CxSR on this pipe */ - bool cxsr_allowed; } wm; int scanline_offset; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 824d5b0806b9..af7fb532d806 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1274,8 +1274,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) * enabled can wedge the pipe. Hence we only allow cxsr * with exactly one enabled primary/sprite plane. */ - wm_state->cxsr = crtc->pipe != PIPE_C && - crtc->wm.cxsr_allowed && num_active_planes == 1; + wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; for (level = 0; level < wm_state->num_levels; level++) { const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; @@ -1411,7 +1410,8 @@ static int vlv_compute_intermediate_wm(struct drm_device *dev, int level; intermediate->num_levels = min(optimal->num_levels, active->num_levels); - intermediate->cxsr = optimal->cxsr & active->cxsr; + intermediate->cxsr = optimal->cxsr && active->cxsr && + !crtc_state->disable_cxsr; for (level = 0; level < intermediate->num_levels; level++) { enum plane_id plane_id; @@ -1434,8 +1434,8 @@ static int vlv_compute_intermediate_wm(struct drm_device *dev, * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(intermediate, optimal, sizeof(*intermediate)) == 0) - crtc_state->wm.need_postvbl_update = false; + if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) + crtc_state->wm.need_postvbl_update = true; return 0; } @@ -2628,8 +2628,8 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) == 0) - newstate->wm.need_postvbl_update = false; + if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) + newstate->wm.need_postvbl_update = true; return 0; } From b4ede6dfa0c5a952161bac6c5f840469a13386ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:01 +0200 Subject: [PATCH 0546/1299] drm/i915: Only use update_wm_{pre,post} for pre-ilk platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that vlv/chv have more proper wm programming support, let's reduce the the update_wm_{pre,post} flags to only cover the pre-ilk platforms. When we finally convert those as well we can drop these flags entirely. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-12-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 83f30560217d..089f7e86c706 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10845,21 +10845,25 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, turn_off, turn_on, mode_changed); if (turn_on) { - pipe_config->update_wm_pre = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (turn_off) { - pipe_config->update_wm_post = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_post = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (intel_wm_need_update(&plane->base, plane_state)) { - /* FIXME bollocks */ - pipe_config->update_wm_pre = true; - pipe_config->update_wm_post = true; + if (INTEL_GEN(dev_priv) < 5) { + /* FIXME bollocks */ + pipe_config->update_wm_pre = true; + pipe_config->update_wm_post = true; + } } if (visible || was_visible) From 602ae835505603eca8b50d813e00a56f56158434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:02 +0200 Subject: [PATCH 0547/1299] drm/i915: Sanitize VLV/CHV watermarks properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear out the watermark for all disabled planes to 0. This is required to avoid falsely thinking that the inherited watermarks are bogus in case the watermark is actually higher than the FIFO size. v2: s/noninverted/raw/ for consistency with other platforms Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-13-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 17 ++++++---- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 50 +++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 089f7e86c706..1c22f8639b28 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3478,7 +3478,8 @@ __intel_display_resume(struct drm_device *dev, } /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(to_i915(dev))) + to_intel_atomic_state(state)->skip_intermediate_wm = true; ret = drm_atomic_commit(state); @@ -14877,7 +14878,8 @@ static void sanitize_watermarks(struct drm_device *dev) * intermediate watermarks (since we don't trust the current * watermarks). */ - intel_state->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(dev_priv)) + intel_state->skip_intermediate_wm = true; ret = intel_atomic_check(dev, state); if (ret) { @@ -15040,7 +15042,8 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - sanitize_watermarks(dev); + if (!HAS_GMCH_DISPLAY(dev_priv)) + sanitize_watermarks(dev); return 0; } @@ -15511,12 +15514,14 @@ intel_modeset_setup_hw_state(struct drm_device *dev) pll->on = false; } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_wm_get_hw_state(dev); - else if (IS_GEN9(dev_priv)) + vlv_wm_sanitize(dev_priv); + } else if (IS_GEN9(dev_priv)) { skl_wm_get_hw_state(dev); - else if (HAS_PCH_SPLIT(dev_priv)) + } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_wm_get_hw_state(dev); + } for_each_intel_crtc(dev, crtc) { u64 put_domains; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f503313f6e65..3ef044efe98c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1824,6 +1824,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, struct skl_pipe_wm *out); +void vlv_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct drm_atomic_state *state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index af7fb532d806..7caed0006ee7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4725,7 +4725,6 @@ void vlv_wm_get_hw_state(struct drm_device *dev) active->num_levels = wm->level + 1; active->cxsr = wm->cxsr; - /* FIXME sanitize things more */ for (level = 0; level < active->num_levels; level++) { struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; @@ -4763,6 +4762,55 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); } +void vlv_wm_sanitize(struct drm_i915_private *dev_priv) +{ + struct intel_plane *plane; + struct intel_crtc *crtc; + + mutex_lock(&dev_priv->wm.wm_mutex); + + for_each_intel_plane(&dev_priv->drm, plane) { + struct intel_crtc *crtc = + intel_get_crtc_for_pipe(dev_priv, plane->pipe); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum plane_id plane_id = plane->id; + int level; + + if (plane_state->base.visible) + continue; + + for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + raw->plane[plane_id] = 0; + + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + crtc_state->wm.vlv.intermediate = + crtc_state->wm.vlv.optimal; + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + } + + vlv_program_watermarks(dev_priv); + + mutex_unlock(&dev_priv->wm.wm_mutex); +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); From 1a10ae6ba8c3df4af9bcdb6f40348af71e72bd13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:03 +0200 Subject: [PATCH 0548/1299] drm/i915: Workaround VLV/CHV sprite1->sprite0 enable underrun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On VLV/CHV enabling sprite0 when sprite1 has already been enabled may lead to an underrun. This only happens when sprite0 FIFO size is zero prior to enabling it. Hence an effective workaround is to always allocate at least one cacheline for sprite0 when sprite1 is active. I've not observed this sort of failure during any other type of plane enable/disable sequence. v2: s/noninverted/raw/ for consistency with other platforms Testcase: igt/kms_plane_blinker Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-14-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7caed0006ee7..6e2acfd687a1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1028,6 +1028,12 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } +static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) +{ + return (active_planes & (BIT(PLANE_SPRITE0) | + BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1); +} + static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); @@ -1038,12 +1044,25 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) int num_active_planes = hweight32(active_planes); const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; + int sprite0_fifo_extra = 0; unsigned int total_rate; enum plane_id plane_id; + /* + * When enabling sprite0 after sprite1 has already been enabled + * we tend to get an underrun unless sprite0 already has some + * FIFO space allcoated. Hence we always allocate at least one + * cacheline for sprite0 whenever sprite1 is enabled. + * + * All other plane enable sequences appear immune to this problem. + */ + if (vlv_need_sprite0_fifo_workaround(active_planes)) + sprite0_fifo_extra = 1; + total_rate = raw->plane[PLANE_PRIMARY] + raw->plane[PLANE_SPRITE0] + - raw->plane[PLANE_SPRITE1]; + raw->plane[PLANE_SPRITE1] + + sprite0_fifo_extra; if (total_rate > fifo_size) return -EINVAL; @@ -1064,6 +1083,9 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) fifo_left -= fifo_state->plane[plane_id]; } + fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra; + fifo_left -= sprite0_fifo_extra; + fifo_state->plane[PLANE_CURSOR] = 63; fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); From 7373728ddf5ebb199b8e000d8e097eb534ab6e6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:04 +0200 Subject: [PATCH 0549/1299] drm/i915: Kill level 0 wm hack for VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now compute the watermarks correctly, so just return an error if we can't support the configuration. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-15-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6e2acfd687a1..9e4c7a0b0407 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1186,10 +1186,6 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, int wm = vlv_compute_wm_level(crtc_state, plane_state, level); int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; - /* FIXME just bail */ - if (WARN_ON(level == 0 && wm > max_wm)) - wm = max_wm; - if (wm > max_wm) break; From 722595362cad7f254d8930b9576e4202010917b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:05 +0200 Subject: [PATCH 0550/1299] drm/i915: Add plane update/disable tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints for plane programming. The tracepoints will dump the frame and scanline counters, so this can be used to verify eg. that the plane gets reprogrammed at the right time with respect to watermark programming (if we have appropriate tracepoints for that as well). v2: Rebase due to legacy cursor changes Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-16-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_irq.c | 3 ++ drivers/gpu/drm/i915/i915_trace.h | 56 +++++++++++++++++++++++ drivers/gpu/drm/i915/intel_atomic_plane.c | 11 ++++- drivers/gpu/drm/i915/intel_display.c | 15 ++++-- 4 files changed, 80 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e0aa686f9c68..675c28d54658 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -783,6 +783,9 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; int position, vtotal; + if (!crtc->active) + return -1; + vtotal = mode->crtc_vtotal; if (mode->flags & DRM_MODE_FLAG_INTERLACE) vtotal /= 2; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 2dbef3147a60..6cb03bd40aef 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -14,6 +14,62 @@ #define TRACE_SYSTEM i915 #define TRACE_INCLUDE_FILE i915_trace +/* plane updates */ + +TRACE_EVENT(intel_update_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + __array(int, src, 4) + __array(int, dst, 4) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); + memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline, + DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), + DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) +); + +TRACE_EVENT(intel_disable_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline) +); + /* pipe updates */ TRACE_EVENT(i915_pipe_update_start, diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 1eaf840cf9ff..cfb47293fd53 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -231,12 +231,19 @@ static void intel_plane_atomic_update(struct drm_plane *plane, to_intel_plane_state(plane->state); struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; - if (intel_state->base.visible) + if (intel_state->base.visible) { + trace_intel_update_plane(plane, + to_intel_crtc(crtc)); + intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), intel_state); - else + } else { + trace_intel_disable_plane(plane, + to_intel_crtc(crtc)); + intel_plane->disable_plane(plane, crtc); + } } const struct drm_plane_helper_funcs intel_plane_helper_funcs = { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1c22f8639b28..72e2c91707d4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2754,6 +2754,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, to_intel_plane_state(plane_state), false); intel_pre_disable_primary_noatomic(&intel_crtc->base); + trace_intel_disable_plane(primary, intel_crtc); intel_plane->disable_plane(primary, &intel_crtc->base); return; @@ -3447,10 +3448,14 @@ static void intel_update_primary_planes(struct drm_device *dev) struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->base.visible) + if (plane_state->base.visible) { + trace_intel_update_plane(&plane->base, + to_intel_crtc(crtc)); + plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } } } @@ -13491,12 +13496,15 @@ intel_legacy_cursor_update(struct drm_plane *plane, new_plane_state->fb = old_fb; to_intel_plane_state(new_plane_state)->vma = old_vma; - if (plane->state->visible) + if (plane->state->visible) { + trace_intel_update_plane(plane, to_intel_crtc(crtc)); intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), to_intel_plane_state(plane->state)); - else + } else { + trace_intel_disable_plane(plane, to_intel_crtc(crtc)); intel_plane->disable_plane(plane, crtc); + } intel_cleanup_plane_fb(plane, new_plane_state); @@ -15145,6 +15153,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) continue; + trace_intel_disable_plane(&plane->base, crtc); plane->disable_plane(&plane->base, &crtc->base); } } From c137d6605fd73635900597d386e3fa8af26d7787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:06 +0200 Subject: [PATCH 0551/1299] drm/i915: Add VLV/CHV watermark/FIFO programming tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints for observing the WM/FIFO programming on VLV/CHV. When compared with the plane and pipe update tracepoints this can be used to verify that everything is performed in the right sequence. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-17-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_trace.h | 71 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 4 ++ 2 files changed, 75 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6cb03bd40aef..ca8198785311 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -14,6 +14,77 @@ #define TRACE_SYSTEM i915 #define TRACE_INCLUDE_FILE i915_trace +/* watermark/fifo updates */ + +TRACE_EVENT(vlv_wm, + TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), + TP_ARGS(crtc, wm), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, level) + __field(u32, cxsr) + __field(u32, primary) + __field(u32, sprite0) + __field(u32, sprite1) + __field(u32, cursor) + __field(u32, sr_plane) + __field(u32, sr_cursor) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->level = wm->level; + __entry->cxsr = wm->cxsr; + __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; + __entry->sprite0 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; + __entry->sprite1 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE1]; + __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; + __entry->sr_plane = wm->sr.plane; + __entry->sr_cursor = wm->sr.cursor; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->level, __entry->cxsr, + __entry->primary, __entry->sprite0, __entry->sprite1, __entry->cursor, + __entry->sr_plane, __entry->sr_cursor) +); + +TRACE_EVENT(vlv_fifo_size, + TP_PROTO(struct intel_crtc *crtc, u32 sprite0_start, u32 sprite1_start, u32 fifo_size), + TP_ARGS(crtc, sprite0_start, sprite1_start, fifo_size), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, sprite0_start) + __field(u32, sprite1_start) + __field(u32, fifo_size) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->sprite0_start = sprite0_start; + __entry->sprite1_start = sprite1_start; + __entry->fifo_size = fifo_size; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, %d/%d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->sprite0_start, + __entry->sprite1_start, __entry->fifo_size) +); + /* plane updates */ TRACE_EVENT(intel_update_plane, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9e4c7a0b0407..dc85501cfff3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -888,6 +888,8 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, enum pipe pipe; for_each_pipe(dev_priv, pipe) { + trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); + I915_WRITE(VLV_DDL(pipe), (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) | (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) | @@ -1352,6 +1354,8 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); WARN_ON(fifo_size != 511); + trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size); + spin_lock(&dev_priv->wm.dsparb_lock); switch (crtc->pipe) { From 1489bba82433d8960d6bfef7453eae77ef9c926a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:07 +0200 Subject: [PATCH 0552/1299] drm/i915: Add cxsr toggle tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a tracepoint for observing changes in the cxsr state. The tracepoint will dump out the frame and scanline counters for each pipe so that the information can be compared with eg. plane update tracepoints. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-18-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_trace.h | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 2 ++ 2 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index ca8198785311..2537a03555cc 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -16,6 +16,36 @@ /* watermark/fifo updates */ +TRACE_EVENT(intel_memory_cxsr, + TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), + TP_ARGS(dev_priv, old, new), + + TP_STRUCT__entry( + __array(u32, frame, 3) + __array(u32, scanline, 3) + __field(bool, old) + __field(bool, new) + ), + + TP_fast_assign( + enum pipe pipe; + for_each_pipe(dev_priv, pipe) { + __entry->frame[pipe] = + dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline[pipe] = + intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + } + __entry->old = old; + __entry->new = new; + ), + + TP_printk("%s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", + onoff(__entry->old), onoff(__entry->new), + __entry->frame[PIPE_A], __entry->scanline[PIPE_A], + __entry->frame[PIPE_B], __entry->scanline[PIPE_B], + __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) +); + TRACE_EVENT(vlv_wm, TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), TP_ARGS(crtc, wm), diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dc85501cfff3..99e09f63d4b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -377,6 +377,8 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl return false; } + trace_intel_memory_cxsr(dev_priv, was_enabled, enable); + DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", enableddisabled(enable), enableddisabled(was_enabled)); From 53a7915cd28b17536609bed6416701b40e8a10de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 2 Mar 2017 19:15:08 +0200 Subject: [PATCH 0553/1299] drm/i915: Add FIFO underrun tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints for display FIFO underruns. Makes it more convenient to correlate the underruns with other display tracepoints. v2: s/i915/intel/ in the tracepoint name Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-19-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_trace.h | 43 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_fifo_underrun.c | 11 +++++- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 2537a03555cc..5503f5ab1e98 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -16,6 +16,49 @@ /* watermark/fifo updates */ +TRACE_EVENT(intel_cpu_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), + TP_ARGS(dev_priv, pipe), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_pch_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder), + TP_ARGS(dev_priv, pch_transcoder), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + enum pipe pipe = (enum pipe)pch_transcoder; + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pch transcoder %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + TRACE_EVENT(intel_memory_cxsr, TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), TP_ARGS(dev_priv, old, new), diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index f5ddacc90fde..966e255ca053 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -98,6 +98,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); POSTING_READ(reg); + trace_intel_cpu_fifo_underrun(dev_priv, crtc->pipe); DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe)); } @@ -147,6 +148,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN(pipe)); POSTING_READ(GEN7_ERR_INT); + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe)); } @@ -212,6 +214,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(SERR_INT, SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)); POSTING_READ(SERR_INT); + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("pch fifo underrun on pch transcoder %s\n", transcoder_name(pch_transcoder)); } @@ -368,9 +371,11 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, crtc->cpu_fifo_underrun_disabled) return; - if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) + if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) { + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("CPU pipe %c FIFO underrun\n", pipe_name(pipe)); + } intel_fbc_handle_fifo_underrun_irq(dev_priv); } @@ -388,9 +393,11 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder) { if (intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, - false)) + false)) { + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("PCH transcoder %s FIFO underrun\n", transcoder_name(pch_transcoder)); + } } /** From 24754d751cb86f6069315d8d613e23afcab06c91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 14:45:57 +0000 Subject: [PATCH 0554/1299] drm/i915: Take reference for signaling the request from hardirq Being inside a spinlock signaling that the hardware just completed a request doesn't prevent a second thread already spotting that the request is complete, freeing it and reallocating it! The code currently tries to prevent this using RCU -- but that only prevents the request from being freed, it doesn't prevent us from reallocating it - that requires us to take a reference. [ 206.922985] BUG: spinlock already unlocked on CPU#4, gem_exec_parall/7796 [ 206.922994] lock: 0xffff8801c6047120, .magic: dead4ead, .owner: /-1, .owner_cpu: -1 [ 206.923000] CPU: 4 PID: 7796 Comm: gem_exec_parall Not tainted 4.10.0-CI-Patchwork_4008+ #1 [ 206.923006] Hardware name: System manufacturer System Product Name/Z170M-PLUS, BIOS 1805 06/20/2016 [ 206.923012] Call Trace: [ 206.923014] [ 206.923019] dump_stack+0x67/0x92 [ 206.923023] spin_dump+0x73/0xc0 [ 206.923027] do_raw_spin_unlock+0x79/0xb0 [ 206.923031] _raw_spin_unlock_irqrestore+0x27/0x60 [ 206.923042] dma_fence_signal+0x160/0x230 [ 206.923060] notify_ring+0xae/0x2e0 [i915] [ 206.923073] ? ibx_hpd_irq_handler+0xc0/0xc0 [i915] [ 206.923086] gen8_gt_irq_handler+0x219/0x290 [i915] [ 206.923100] gen8_irq_handler+0x8e/0x6b0 [i915] [ 206.923105] __handle_irq_event_percpu+0x58/0x370 [ 206.923109] handle_irq_event_percpu+0x1e/0x50 [ 206.923113] handle_irq_event+0x34/0x60 [ 206.923117] handle_edge_irq+0xbe/0x150 [ 206.923122] handle_irq+0x15/0x20 [ 206.923126] do_IRQ+0x63/0x130 [ 206.923142] ? i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923148] common_interrupt+0x90/0x90 [ 206.923153] RIP: 0010:osq_lock+0x77/0x110 [ 206.923157] RSP: 0018:ffffc90001cabaa0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff6e [ 206.923164] RAX: 0000000000000000 RBX: ffff880236d1abc0 RCX: ffff8801ef642fc0 [ 206.923169] RDX: ffff8801ef6427c0 RSI: ffffffff81c6e7fd RDI: ffffffff81c7c848 [ 206.923175] RBP: ffffc90001cabab8 R08: 00000000692bb19b R09: 08c1493200000000 [ 206.923180] R10: 0000000000000001 R11: 0000000000000001 R12: ffff880236cdabc0 [ 206.923185] R13: ffff8802207f00b0 R14: ffffffffa00b7cd9 R15: ffff8802207f0070 [ 206.923191] [ 206.923206] ? i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923213] __mutex_lock+0x649/0x990 [ 206.923217] ? __mutex_lock+0xb0/0x990 [ 206.923221] ? _raw_spin_unlock+0x2c/0x50 [ 206.923226] ? __pm_runtime_resume+0x56/0x80 [ 206.923242] ? i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923249] mutex_lock_interruptible_nested+0x16/0x20 [ 206.923264] i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923270] ? __pm_runtime_resume+0x56/0x80 [ 206.923285] i915_gem_do_execbuffer.isra.15+0x442/0x1d10 [i915] [ 206.923291] ? __lock_acquire+0x449/0x1b50 [ 206.923296] ? __might_fault+0x3e/0x90 [ 206.923301] ? __might_fault+0x87/0x90 [ 206.923305] ? __might_fault+0x3e/0x90 [ 206.923320] i915_gem_execbuffer2+0xb5/0x220 [i915] [ 206.923327] drm_ioctl+0x200/0x450 [ 206.923341] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 206.923348] do_vfs_ioctl+0x90/0x6e0 [ 206.923352] ? __fget+0x108/0x200 [ 206.923356] ? expand_files+0x2b0/0x2b0 [ 206.923361] SyS_ioctl+0x3c/0x70 [ 206.923365] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 206.923369] RIP: 0033:0x7fdd75fc6357 [ 206.923373] RSP: 002b:00007fdd20e59bf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 206.923380] RAX: ffffffffffffffda RBX: ffffffff81481ff3 RCX: 00007fdd75fc6357 [ 206.923385] RDX: 00007fdd20e59c70 RSI: 0000000040406469 RDI: 0000000000000003 [ 206.923390] RBP: ffffc90001cabf88 R08: 0000000000000040 R09: 00000000000003f7 [ 206.923396] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 206.923401] R13: 0000000000000003 R14: 0000000040406469 R15: 0000000001cf9cb0 [ 206.923408] ? __this_cpu_preempt_check+0x13/0x20 Fixes: 56299fb7d904 ("drm/i915: Signal first fence from irq handler if complete") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100051 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303144557.4815-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 675c28d54658..cb1424abb7c0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1042,8 +1042,6 @@ static void notify_ring(struct intel_engine_cs *engine) atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - rcu_read_lock(); - spin_lock(&engine->breadcrumbs.lock); wait = engine->breadcrumbs.first_wait; if (wait) { @@ -1060,7 +1058,7 @@ static void notify_ring(struct intel_engine_cs *engine) */ if (i915_seqno_passed(intel_engine_get_seqno(engine), wait->seqno)) - rq = wait->request; + rq = i915_gem_request_get(wait->request); wake_up_process(wait->tsk); } else { @@ -1068,10 +1066,10 @@ static void notify_ring(struct intel_engine_cs *engine) } spin_unlock(&engine->breadcrumbs.lock); - if (rq) + if (rq) { dma_fence_signal(&rq->fence); - - rcu_read_unlock(); + i915_gem_request_put(rq); + } trace_intel_engine_notify(engine, wait); } From b66255f0f77902ef41b09163a6a092d2d905e151 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 17:14:22 +0000 Subject: [PATCH 0555/1299] drm/i915: Refactor wakeup of the next breadcrumb waiter Refactor the common task of updating the first_waiter, serialised with the interrupt handler. When we update the first_waiter, we also need to wakeup the new bottom-half in order to complete the actions that we may have delegated to it (such as checking the irq-seqno coherency or waking up other lower priority concurrent waiters). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303171422.4735-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 48 +++++++++--------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 235d4645a5cf..2b26f84480cc 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -287,6 +287,22 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, wake_up_process(wait->tsk); /* implicit smp_wmb() */ } +static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, + struct rb_node *next) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + GEM_BUG_ON(!b->irq_armed); + b->first_wait = to_wait(next); + + /* We always wake up the next waiter that takes over as the bottom-half + * as we may delegate not only the irq-seqno barrier to the next waiter + * but also the task of waking up concurrent waiters. + */ + if (next) + wake_up_process(to_wait(next)->tsk); +} + static bool __intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait) { @@ -357,21 +373,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); - b->first_wait = to_wait(next); - /* As there is a delay between reading the current - * seqno, processing the completed tasks and selecting - * the next waiter, we may have missed the interrupt - * and so need for the next bottom-half to wakeup. - * - * Also as we enable the IRQ, we may miss the - * interrupt for that seqno, so we have to wake up - * the next bottom-half in order to do a coherent check - * in case the seqno passed. - */ - __intel_breadcrumbs_enable_irq(b); - if (test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)) - wake_up_process(to_wait(next)->tsk); + __intel_breadcrumbs_next(engine, next); } do { @@ -473,21 +475,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, } } - if (next) { - /* In our haste, we may have completed the first waiter - * before we enabled the interrupt. Do so now as we - * have a second waiter for a future seqno. Afterwards, - * we have to wake up that waiter in case we missed - * the interrupt, or if we have to handle an - * exception rather than a seqno completion. - */ - b->first_wait = to_wait(next); - if (b->first_wait->seqno != wait->seqno) - __intel_breadcrumbs_enable_irq(b); - wake_up_process(b->first_wait->tsk); - } else { - b->first_wait = NULL; - } + __intel_breadcrumbs_next(engine, next); } else { GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); } From 61d3dc708077de316bdcafd66016c2285da07275 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 19:08:24 +0000 Subject: [PATCH 0556/1299] drm/i915: Split breadcrumbs spinlock into two As we now take the breadcrumbs spinlock within the interrupt handler, we wish to minimise its hold time. During the interrupt we do not care about the state of the full rbtree, only that of the first element, so we can guard that with a separate lock. v2: Rename first_wait to irq_wait to make it clearer that it is guarded by irq_lock. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303190824.1330-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +-- drivers/gpu/drm/i915/i915_drv.h | 8 +- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +- drivers/gpu/drm/i915/i915_irq.c | 6 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 80 ++++++++++--------- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +- .../drm/i915/selftests/intel_breadcrumbs.c | 4 +- 7 files changed, 68 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 70acbbf50c75..478f19d2f3d8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -726,14 +726,14 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "Waiting (%s): %s [%d] on %x\n", engine->name, w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static int i915_gem_seqno_info(struct seq_file *m, void *data) @@ -1380,14 +1380,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) &dev_priv->gpu_error.missed_irq_rings)), yesno(engine->hangcheck.stalled)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, @@ -3385,14 +3385,14 @@ static int i915_engine_info(struct seq_file *m, void *unused) I915_READ(RING_PP_DIR_DCLV(engine))); } - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_puts(m, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 059335f23706..95050115c700 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4097,16 +4097,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the seqno before we believe it coherent since they see * irq_posted == false but we are still running). */ - spin_lock_irqsave(&b->lock, flags); - if (b->first_wait && b->first_wait->tsk != current) + spin_lock_irqsave(&b->irq_lock, flags); + if (b->irq_wait && b->irq_wait->tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will * be woken by whomever made the change. We only have * to worry about when we steal the irq-posted for * ourself. */ - wake_up_process(b->first_wait->tsk); - spin_unlock_irqrestore(&b->lock, flags); + wake_up_process(b->irq_wait->tsk); + spin_unlock_irqrestore(&b->irq_lock, flags); if (__i915_gem_request_completed(req, seqno)) return true; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 061af8040498..8effc59f5cb5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1111,7 +1111,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (RB_EMPTY_ROOT(&b->waiters)) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { ee->waiters = ERR_PTR(-EDEADLK); return; } @@ -1119,7 +1119,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, count = 0; for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) count++; - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); waiter = NULL; if (count) @@ -1129,7 +1129,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (!waiter) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { kfree(waiter); ee->waiters = ERR_PTR(-EDEADLK); return; @@ -1147,7 +1147,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (++ee->num_waiters == count) break; } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static void error_record_engine_registers(struct i915_gpu_state *error, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index cb1424abb7c0..29b002dacbd5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1042,8 +1042,8 @@ static void notify_ring(struct intel_engine_cs *engine) atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - spin_lock(&engine->breadcrumbs.lock); - wait = engine->breadcrumbs.first_wait; + spin_lock(&engine->breadcrumbs.irq_lock); + wait = engine->breadcrumbs.irq_wait; if (wait) { /* We use a callback from the dma-fence to submit * requests after waiting on our own requests. To @@ -1064,7 +1064,7 @@ static void notify_ring(struct intel_engine_cs *engine) } else { __intel_engine_disarm_breadcrumbs(engine); } - spin_unlock(&engine->breadcrumbs.lock); + spin_unlock(&engine->breadcrumbs.irq_lock); if (rq) { dma_fence_signal(&rq->fence); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 2b26f84480cc..6032d2a937d5 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -31,7 +31,9 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) struct intel_wait *wait; unsigned int result = 0; - wait = b->first_wait; + lockdep_assert_held(&b->irq_lock); + + wait = b->irq_wait; if (wait) { result = ENGINE_WAKEUP_WAITER; if (wake_up_process(wait->tsk)) @@ -47,9 +49,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) unsigned long flags; unsigned int result; - spin_lock_irqsave(&b->lock, flags); + spin_lock_irqsave(&b->irq_lock, flags); result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irqrestore(&b->lock, flags); + spin_unlock_irqrestore(&b->irq_lock, flags); return result; } @@ -117,10 +119,10 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * coherent seqno check. */ - spin_lock_irqsave(&b->lock, flags); + spin_lock_irqsave(&b->irq_lock, flags); if (!__intel_breadcrumbs_wakeup(b)) __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irqrestore(&b->lock, flags); + spin_unlock_irqrestore(&b->irq_lock, flags); if (!b->irq_armed) return; @@ -164,7 +166,7 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->irq_lock); if (b->irq_enabled) { irq_disable(engine); @@ -182,7 +184,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) if (!b->irq_armed) return; - spin_lock_irqsave(&b->lock, flags); + spin_lock_irqsave(&b->irq_lock, flags); /* We only disarm the irq when we are idle (all requests completed), * so if there remains a sleeping waiter, it missed the request @@ -193,7 +195,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irqrestore(&b->lock, flags); + spin_unlock_irqrestore(&b->irq_lock, flags); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -228,7 +230,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->irq_lock); if (b->irq_armed) return; @@ -276,7 +278,7 @@ static inline struct intel_wait *to_wait(struct rb_node *node) static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, struct intel_wait *wait) { - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->rb_lock); /* This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. @@ -292,8 +294,10 @@ static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, { struct intel_breadcrumbs *b = &engine->breadcrumbs; + spin_lock(&b->irq_lock); GEM_BUG_ON(!b->irq_armed); - b->first_wait = to_wait(next); + b->irq_wait = to_wait(next); + spin_unlock(&b->irq_lock); /* We always wake up the next waiter that takes over as the bottom-half * as we may delegate not only the irq-seqno barrier to the next waiter @@ -384,8 +388,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } if (first) { + spin_lock(&b->irq_lock); GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); - b->first_wait = wait; + b->irq_wait = wait; /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -395,9 +400,10 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * and so we miss the wake up. */ __intel_breadcrumbs_enable_irq(b); + spin_unlock(&b->irq_lock); } - GEM_BUG_ON(!b->first_wait); - GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); + GEM_BUG_ON(!b->irq_wait); + GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); return first; } @@ -408,9 +414,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_breadcrumbs *b = &engine->breadcrumbs; bool first; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); first = __intel_engine_add_wait(engine, wait); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); return first; } @@ -434,12 +440,12 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, { struct intel_breadcrumbs *b = &engine->breadcrumbs; - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->rb_lock); if (RB_EMPTY_NODE(&wait->node)) goto out; - if (b->first_wait == wait) { + if (b->irq_wait == wait) { const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; @@ -484,9 +490,9 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, rb_erase(&wait->node, &b->waiters); out: - GEM_BUG_ON(b->first_wait == wait); + GEM_BUG_ON(b->irq_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != - (b->first_wait ? &b->first_wait->node : NULL)); + (b->irq_wait ? &b->irq_wait->node : NULL)); } void intel_engine_remove_wait(struct intel_engine_cs *engine, @@ -501,9 +507,9 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, if (RB_EMPTY_NODE(&wait->node)) return; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); __intel_engine_remove_wait(engine, wait); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static bool signal_valid(const struct drm_i915_gem_request *request) @@ -573,7 +579,7 @@ static int intel_breadcrumbs_signaler(void *arg) dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. @@ -596,7 +602,7 @@ static int intel_breadcrumbs_signaler(void *arg) rb_erase(&request->signaling.node, &b->signals); RB_CLEAR_NODE(&request->signaling.node); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); i915_gem_request_put(request); } else { @@ -653,7 +659,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) request->signaling.wait.seqno = seqno; i915_gem_request_get(request); - spin_lock(&b->lock); + spin_lock(&b->rb_lock); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest @@ -687,7 +693,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) if (first) rcu_assign_pointer(b->first_signal, request); - spin_unlock(&b->lock); + spin_unlock(&b->rb_lock); if (wakeup) wake_up_process(b->signaler); @@ -702,7 +708,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) lockdep_assert_held(&request->lock); GEM_BUG_ON(!request->signaling.wait.seqno); - spin_lock(&b->lock); + spin_lock(&b->rb_lock); if (!RB_EMPTY_NODE(&request->signaling.node)) { if (request == rcu_access_pointer(b->first_signal)) { @@ -718,7 +724,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) __intel_engine_remove_wait(engine, &request->signaling.wait); - spin_unlock(&b->lock); + spin_unlock(&b->rb_lock); request->signaling.wait.seqno = 0; } @@ -728,7 +734,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; struct task_struct *tsk; - spin_lock_init(&b->lock); + spin_lock_init(&b->rb_lock); + spin_lock_init(&b->irq_lock); + setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)engine); @@ -766,7 +774,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; cancel_fake_irq(engine); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->irq_lock); if (b->irq_enabled) irq_enable(engine); @@ -785,7 +793,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) if (b->irq_armed) enable_fake_irq(b); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->irq_lock); } void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) @@ -793,7 +801,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The engines should be idle and all requests accounted for! */ - WARN_ON(READ_ONCE(b->first_wait)); + WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!RB_EMPTY_ROOT(&b->signals)); @@ -809,10 +817,10 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; bool busy = false; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); + if (b->irq_wait) { + wake_up_process(b->irq_wait->tsk); busy |= intel_engine_flag(engine); } @@ -821,7 +829,7 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) busy |= intel_engine_flag(engine); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); return busy; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 55a6a3f8274c..9d6b83a16cc8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -235,10 +235,12 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t lock; /* protects the lists of requests; irqsafe */ + spinlock_t irq_lock; /* protects irq_*; irqsafe */ + struct intel_wait *irq_wait; /* oldest waiter by retirement */ + + spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ - struct intel_wait *first_wait; /* oldest waiter by retirement */ struct task_struct *signaler; /* used for fence signalling */ struct drm_i915_gem_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ @@ -639,7 +641,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.first_wait); + return READ_ONCE(engine->breadcrumbs.irq_wait); } unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 621be1ca53d8..19860a372d90 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -37,7 +37,7 @@ static int check_rbtree(struct intel_engine_cs *engine, struct rb_node *rb; int n; - if (&b->first_wait->node != rb_first(&b->waiters)) { + if (&b->irq_wait->node != rb_first(&b->waiters)) { pr_err("First waiter does not match first element of wait-tree\n"); return -EINVAL; } @@ -90,7 +90,7 @@ static int check_rbtree_empty(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - if (b->first_wait) { + if (b->irq_wait) { pr_err("Empty breadcrumbs still has a waiter\n"); return -EINVAL; } From 237ae7c79e2683723a6bf83e1d7db40fc890b285 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Mar 2017 20:26:15 +0000 Subject: [PATCH 0557/1299] drm/i915: Don't use enums for hardware engine id Generally we are using macros for any hardware identifiers as these may change between Gens. Do the same with hardware engine ids. v2: move hw engine defs to i915_reg.h (Chris) Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170301202615.118632-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 6 +++++ drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 32 ++++++++++++------------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0d9ae27fddff..243050ea4938 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -77,7 +77,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MASKED_BIT_ENABLE(a) ({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); }) #define _MASKED_BIT_DISABLE(a) (_MASKED_FIELD((a), 0)) +/* Engine ID */ +#define RCS_HW 0 +#define VCS_HW 1 +#define BCS_HW 2 +#define VECS_HW 3 +#define VCS2_HW 4 /* PCI config space */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 5fd4883db235..73fe718516a5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -28,8 +28,8 @@ static const struct engine_info { const char *name; - unsigned exec_id; - enum intel_engine_hw_id hw_id; + unsigned int exec_id; + unsigned int hw_id; u32 mmio_base; unsigned irq_shift; int (*init_legacy)(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9d6b83a16cc8..0ef491df5b4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -186,26 +186,26 @@ struct i915_ctx_workarounds { struct drm_i915_gem_request; struct intel_render_state; +/* + * Engine IDs definitions. + * Keep instances of the same type engine together. + */ +enum intel_engine_id { + RCS = 0, + BCS, + VCS, + VCS2, +#define _VCS(n) (VCS + (n)) + VECS +}; + struct intel_engine_cs { struct drm_i915_private *i915; const char *name; - enum intel_engine_id { - RCS = 0, - BCS, - VCS, - VCS2, /* Keep instances of the same type engine together. */ - VECS - } id; -#define _VCS(n) (VCS + (n)) + enum intel_engine_id id; unsigned int exec_id; - enum intel_engine_hw_id { - RCS_HW = 0, - VCS_HW, - BCS_HW, - VECS_HW, - VCS2_HW - } hw_id; - enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ + unsigned int hw_id; + unsigned int guc_id; u32 mmio_base; unsigned int irq_shift; struct intel_ring *buffer; From 2bc756e7dde20972300bb8e2e46dd430d6d2d5db Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Mar 2017 23:22:29 +0100 Subject: [PATCH 0558/1299] cpufreq: intel_pstate: Do not use performance_limits in passive mode Using performance_limits in the passive mode doesn't make sense, because in that mode the global limits are applied to the frequency selected by the scaling governor. The maximum and minimum P-state limits in performance_limits are both set to 100 percent which will put all CPUs into the turbo range regardless of what governor is used and what frequencies are selected by it (that is particularly undesirable on CPUs with the generic powersave governor attached). For this reason, make intel_pstate_register_driver() always point limits to powersave_limits in the passive mode. Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9ee13f195c37..7fc1f8a0ef44 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2442,8 +2442,11 @@ static int intel_pstate_register_driver(void) intel_pstate_init_limits(&powersave_limits); intel_pstate_set_performance_limits(&performance_limits); - limits = IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) ? - &performance_limits : &powersave_limits; + if (IS_ENABLED(CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE) && + intel_pstate_driver == &intel_pstate) + limits = &performance_limits; + else + limits = &powersave_limits; ret = cpufreq_register_driver(intel_pstate_driver); if (ret) { From 7f17326fc0b69afbda7aed6c4ce8e2328b74203b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Mar 2017 23:24:36 +0100 Subject: [PATCH 0559/1299] cpufreq: intel_pstate: Fix intel_cpufreq_verify_policy() The intel_pstate_update_perf_limits() called from intel_cpufreq_verify_policy() may cause global P-state limits to change which is generally confusing and unnecessary. In the passive mode the global limits are only applied to the frequency selected by the scaling governor (they are not taken into account by governors when making decisions anyway), so making them follow the per-policy limits serves no purpose and may go against user expectations (as it generally causes the global attributes in sysfs to change even though they have not been written to in some cases). Fix that by dropping the intel_pstate_update_perf_limits() invocation from intel_cpufreq_verify_policy() (which also reduces the code size by a few lines). This change does not affect the per-CPU limits case, because those limits allow any P-state to be set by default in the passive mode and it removes the only piece of code updating them in that mode, so the per-policy settings will be the only ones taken into account in that case as expected. Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 7fc1f8a0ef44..04b2aa162276 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2306,7 +2306,6 @@ static struct cpufreq_driver intel_pstate = { static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) { struct cpudata *cpu = all_cpu_data[policy->cpu]; - struct perf_limits *perf_limits = limits; update_turbo_state(); policy->cpuinfo.max_freq = limits->turbo_disabled ? @@ -2314,15 +2313,6 @@ static int intel_cpufreq_verify_policy(struct cpufreq_policy *policy) cpufreq_verify_within_cpu_limits(policy); - if (per_cpu_limits) - perf_limits = cpu->perf_limits; - - mutex_lock(&intel_pstate_limits_lock); - - intel_pstate_update_perf_limits(policy, perf_limits); - - mutex_unlock(&intel_pstate_limits_lock); - return 0; } From 6407829901f074cf6beef566d6af9a0b0e238f0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 3 Mar 2017 23:51:31 +0100 Subject: [PATCH 0560/1299] cpufreq: intel_pstate: Avoid triggering cpu_frequency tracepoint unnecessarily In the passive mode the cpu_frequency trace event is already triggered by the cpufreq core or by scaling governors, so intel_pstate should not trigger it once again for the same P-state updates. In addition to that, the frequency returned by intel_cpufreq_fast_switch() and passed via freqs.new from intel_cpufreq_target() to cpufreq_freq_transition_end() should reflect the P-state actually set, so make that happen. Fixes: 001c76f05b01 (cpufreq: intel_pstate: Generic governors support) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 04b2aa162276..5e066b332598 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1879,13 +1879,11 @@ static int intel_pstate_prepare_request(struct cpudata *cpu, int pstate) intel_pstate_get_min_max(cpu, &min_perf, &max_perf); pstate = clamp_t(int, pstate, min_perf, max_perf); - trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); return pstate; } static void intel_pstate_update_pstate(struct cpudata *cpu, int pstate) { - pstate = intel_pstate_prepare_request(cpu, pstate); if (pstate == cpu->pstate.current_pstate) return; @@ -1905,6 +1903,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) update_turbo_state(); + target_pstate = intel_pstate_prepare_request(cpu, target_pstate); + trace_cpu_frequency(target_pstate * cpu->pstate.scaling, cpu->cpu); intel_pstate_update_pstate(cpu, target_pstate); sample = &cpu->sample; @@ -2365,6 +2365,7 @@ static int intel_cpufreq_target(struct cpufreq_policy *policy, wrmsrl_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, target_pstate)); } + freqs.new = target_pstate * cpu->pstate.scaling; cpufreq_freq_transition_end(policy, &freqs, false); return 0; @@ -2378,8 +2379,9 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy, target_freq = intel_cpufreq_turbo_update(cpu, policy, target_freq); target_pstate = DIV_ROUND_UP(target_freq, cpu->pstate.scaling); + target_pstate = intel_pstate_prepare_request(cpu, target_pstate); intel_pstate_update_pstate(cpu, target_pstate); - return target_freq; + return target_pstate * cpu->pstate.scaling; } static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) From 2a9c4f40ab2c281f95d41577ff0f7f78668feb73 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 2 Mar 2017 17:41:24 +1100 Subject: [PATCH 0561/1299] powerpc/powernv: Fix opal tracepoints with JUMP_LABEL=n The recent commit to allow calling OPAL calls in real mode, commit ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls"), introduced a bug when CONFIG_JUMP_LABEL=n. The commit moved the "mfmsr r12" prior to the call to OPAL_BRANCH, but we missed that OPAL_BRANCH clobbers r12 when jump labels are disabled. This leads to us using the tracepoint refcount as the MSR value, typically zero, and saving that into PACASAVEDMSR. When we return from OPAL we use that value as the MSR value for rfid, meaning we switch to 32-bit BE real mode - hilarity ensues. Fix it by using r11 in OPAL_BRANCH, which is not live at the time the macro is used in OPAL_CALL. Fixes: ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls") Suggested-by: Paul Mackerras Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6693f75e93d1..da8a0f7a035c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -39,8 +39,8 @@ opal_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,opal_tracepoint_refcount@toc(r2); \ - cmpdi r12,0; \ + ld r11,opal_tracepoint_refcount@toc(r2); \ + cmpdi r11,0; \ bne- LABEL; \ 1: From 6ad966d7303b70165228dba1ee8da1a05c10eefe Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Sat, 4 Feb 2017 17:03:40 +0800 Subject: [PATCH 0562/1299] powerpc/64: Fix checksum folding in csum_add() Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold") missed a case in csum_add(). Fix it. Signed-off-by: Shile Zhang Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 4e63787dc3be..842124b199b5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" From 6c4f0fa643cb9e775dcc976e3db00d649468ff1d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Mar 2017 14:03:20 +0530 Subject: [PATCH 0563/1299] cpufreq: schedutil: move cached_raw_freq to struct sugov_policy cached_raw_freq applies to the entire cpufreq policy and not individual CPUs. Apart from wasting per-cpu memory, it is actually wrong to keep it in struct sugov_cpu as we may end up comparing next_freq with a stale cached_raw_freq of a random CPU. Move cached_raw_freq to struct sugov_policy. Fixes: 5cbea46984d6 (cpufreq: schedutil: map raw required frequency to driver frequency) Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 8f8de3d4d6b7..3ab2aeaec6ec 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -36,6 +36,7 @@ struct sugov_policy { u64 last_freq_update_time; s64 freq_update_delay_ns; unsigned int next_freq; + unsigned int cached_raw_freq; /* The next fields are only needed if fast switch cannot be used. */ struct irq_work irq_work; @@ -52,7 +53,6 @@ struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; - unsigned int cached_raw_freq; unsigned long iowait_boost; unsigned long iowait_boost_max; u64 last_update; @@ -146,9 +146,9 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, freq = (freq + (freq >> 2)) * util / max; - if (freq == sg_cpu->cached_raw_freq && sg_policy->next_freq != UINT_MAX) + if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; - sg_cpu->cached_raw_freq = freq; + sg_policy->cached_raw_freq = freq; return cpufreq_driver_resolve_freq(policy, freq); } @@ -580,6 +580,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->next_freq = UINT_MAX; sg_policy->work_in_progress = false; sg_policy->need_freq_update = false; + sg_policy->cached_raw_freq = 0; for_each_cpu(cpu, policy->cpus) { struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); @@ -590,7 +591,6 @@ static int sugov_start(struct cpufreq_policy *policy) sg_cpu->max = 0; sg_cpu->flags = SCHED_CPUFREQ_RT; sg_cpu->last_update = 0; - sg_cpu->cached_raw_freq = 0; sg_cpu->iowait_boost = 0; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, From 655cb1ebff4b7918fc560502c3297af2d3c7d114 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 2 Mar 2017 14:03:21 +0530 Subject: [PATCH 0564/1299] cpufreq: schedutil: Pass sg_policy to get_next_freq() get_next_freq() uses sg_cpu only to get sg_policy, which the callers of get_next_freq() already have. Pass sg_policy instead of sg_cpu to get_next_freq(), to make it more efficient. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3ab2aeaec6ec..cd7cd489f739 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -116,7 +116,7 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, /** * get_next_freq - Compute a new frequency for a given cpufreq policy. - * @sg_cpu: schedutil cpu object to compute the new frequency for. + * @sg_policy: schedutil policy object to compute the new frequency for. * @util: Current CPU utilization. * @max: CPU capacity. * @@ -136,10 +136,9 @@ static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time, * next_freq (as calculated above) is returned, subject to policy min/max and * cpufreq driver limitations. */ -static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util, - unsigned long max) +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) { - struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; @@ -213,7 +212,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, } else { sugov_get_util(&util, &max); sugov_iowait_boost(sg_cpu, &util, &max); - next_f = get_next_freq(sg_cpu, util, max); + next_f = get_next_freq(sg_policy, util, max); } sugov_update_commit(sg_policy, time, next_f); } @@ -267,7 +266,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, sugov_iowait_boost(j_sg_cpu, &util, &max); } - return get_next_freq(sg_cpu, util, max); + return get_next_freq(sg_policy, util, max); } static void sugov_update_shared(struct update_util_data *hook, u64 time, From d82f26925599cae83c38d17d07ae982356e81318 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 28 Feb 2017 16:44:16 -0500 Subject: [PATCH 0565/1299] cpufreq: Add the "cpufreq.off=1" cmdline option Add the "cpufreq.off=1" cmdline option. At boot-time, this allows a user to request CONFIG_CPU_FREQ=n behavior from a kernel built with CONFIG_CPU_FREQ=y. This is analogous to the existing "cpuidle.off=1" option and CONFIG_CPU_IDLE=y This capability is valuable when we need to debug end-user issues in the BIOS or in Linux. It is also convenient for enabling comparisons, which may otherwise require a new kernel, or help from BIOS SETUP, which may be buggy or unavailable. Signed-off-by: Len Brown Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ drivers/cpufreq/cpufreq.c | 1 + 2 files changed, 4 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be7c0d9506b1..3988d2311f97 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -662,6 +662,9 @@ cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system + cpufreq.off=1 [CPU_FREQ] + disable the cpufreq sub-system + cpu_init_udelay=N [X86] Delay for N microsec between assert and de-assert of APIC INIT to start processors. This delay occurs diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 80a785ad17e8..7790db2645d7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2532,4 +2532,5 @@ static int __init cpufreq_core_init(void) return 0; } +module_param(off, int, 0444); core_initcall(cpufreq_core_init); From cd59b4bed9d11a2aefc4bb44eed9de0e6c1eea06 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 1 Mar 2017 00:07:36 +0100 Subject: [PATCH 0566/1299] cpufreq: intel_pstate: Fix global settings in active mode Commit 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all limits settings in sync) changed intel_pstate to invoke cpufreq_update_policy() for every registered CPU on global sysfs attributes updates, but that led to undesirable effects in the active mode if the "performance" P-state selection algorithm is configufred for one CPU and the "powersave" one is chosen for all of the other CPUs. Namely, in that case, the following is possible: # cd /sys/devices/system/cpu/ # cat intel_pstate/max_perf_pct 100 # cat intel_pstate/min_perf_pct 26 # echo performance > cpufreq/policy0/scaling_governor # cat intel_pstate/max_perf_pct 100 # cat intel_pstate/min_perf_pct 100 # echo 94 > intel_pstate/min_perf_pct # cat intel_pstate/min_perf_pct 26 The reason why this happens is because intel_pstate attempts to maintain two sets of global limits in the active mode, one for the "performance" P-state selection algorithm and one for the "powersave" P-state selection algorithm, but the P-state selection algorithms are set per policy, so the global limits cannot reflect all of them at the same time if they are different for different policies. In the particular situation above, the attempt to change min_perf_pct to 94 caused cpufreq_update_policy() to be run for a CPU with the "powersave" P-state selection algorithm and intel_pstate_set_policy() called by it silently switched the global limits to the "powersave" set which finally was reflected by the sysfs interface. To prevent that from happening, modify intel_pstate_update_policies() to always switch back to the set of limits that was used right before it has been invoked. Fixes: 111b8b3fe4fa (cpufreq: intel_pstate: Always keep all limits settings in sync) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 5e066b332598..436a4c5ba70d 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -973,11 +973,20 @@ static int intel_pstate_resume(struct cpufreq_policy *policy) } static void intel_pstate_update_policies(void) + __releases(&intel_pstate_limits_lock) + __acquires(&intel_pstate_limits_lock) { + struct perf_limits *saved_limits = limits; int cpu; + mutex_unlock(&intel_pstate_limits_lock); + for_each_possible_cpu(cpu) cpufreq_update_policy(cpu); + + mutex_lock(&intel_pstate_limits_lock); + + limits = saved_limits; } /************************** debugfs begin ************************/ @@ -1185,10 +1194,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, limits->no_turbo = clamp_t(int, input, 0, 1); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1222,10 +1231,10 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf = div_ext_fp(limits->max_perf_pct, 100); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; @@ -1259,10 +1268,10 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf = div_ext_fp(limits->min_perf_pct, 100); - mutex_unlock(&intel_pstate_limits_lock); - intel_pstate_update_policies(); + mutex_unlock(&intel_pstate_limits_lock); + mutex_unlock(&intel_pstate_driver_lock); return count; From d74b19929130c9b5395a497030dcf161353cd526 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 1 Mar 2017 00:11:05 +0100 Subject: [PATCH 0567/1299] cpufreq: intel_pstate: Fix intel_pstate_verify_policy() The code added to intel_pstate_verify_policy() by commit 1443ebbacfd7 (cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy) should use perf_limits instead of limits, because otherwise setting global limits via sysfs may affect policies inconsistently. For example, in the sequence of shell commands below, the scaling_min_freq attribute for policy1 and policy2 should be affected in the same way, because scaling_governor is set in the same way for both of them: # cat cpufreq/policy1/scaling_governor powersave # cat cpufreq/policy2/scaling_governor powersave # echo performance > cpufreq/policy0/scaling_governor # echo 94 > intel_pstate/min_perf_pct # cat cpufreq/policy0/scaling_min_freq 2914000 # cat cpufreq/policy1/scaling_min_freq 2914000 # cat cpufreq/policy2/scaling_min_freq 800000 The are affected differently, because intel_pstate_verify_policy() is invoked with limits set to &performance_limits (left behind by policy0) for policy1 and with limits set to &powersave_limits (left behind by policy1) for policy2. Since perf_limits is set to the set of limits matching the policy being updated, using it instead of limits fixes the inconsistency. Fixes: 1443ebbacfd7 (cpufreq: intel_pstate: Fix sysfs limits enforcement for performance policy) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 436a4c5ba70d..3dc546601c88 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2212,9 +2212,9 @@ static int intel_pstate_verify_policy(struct cpufreq_policy *policy) unsigned int max_freq, min_freq; max_freq = policy->cpuinfo.max_freq * - limits->max_sysfs_pct / 100; + perf_limits->max_sysfs_pct / 100; min_freq = policy->cpuinfo.max_freq * - limits->min_sysfs_pct / 100; + perf_limits->min_sysfs_pct / 100; cpufreq_verify_within_limits(policy, min_freq, max_freq); } From a240c4aa5d0f9c8bb0db380ab9c1ec1f68b923ad Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Mar 2017 23:29:12 +0100 Subject: [PATCH 0568/1299] cpufreq: intel_pstate: Do not reinit performance limits in ->setpolicy If the current P-state selection algorithm is set to "performance" in intel_pstate_set_policy(), the limits may be initialized from scratch, but only if no_turbo is not set and the maximum frequency allowed for the given CPU (i.e. the policy object representing it) is at least equal to the max frequency supported by the CPU. In all of the other cases, the limits will not be updated. For example, the following can happen: # cat intel_pstate/status active # echo performance > cpufreq/policy0/scaling_governor # cat intel_pstate/min_perf_pct 100 # echo 94 > intel_pstate/min_perf_pct # cat intel_pstate/min_perf_pct 100 # cat cpufreq/policy0/scaling_max_freq 3100000 echo 3000000 > cpufreq/policy0/scaling_max_freq # cat intel_pstate/min_perf_pct 94 # echo 95 > intel_pstate/min_perf_pct # cat intel_pstate/min_perf_pct 95 That is confusing for two reasons. First, the initial attempt to change min_perf_pct to 94 seems to have no effect, even though setting the global limits should always work. Second, after changing scaling_max_freq for policy0 the global min_perf_pct attribute shows 94, even though it should have not been affected by that operation in principle. Moreover, the final attempt to change min_perf_pct to 95 worked as expected, because scaling_max_freq for the only policy with scaling_governor equal to "performance" was different from the maximum at that time. To make all that confusion go away, modify intel_pstate_set_policy() so that it doesn't reinitialize the limits at all. At the same time, change intel_pstate_set_performance_limits() to set min_sysfs_pct to 100 in the "performance" limits set so that switching the P-state selection algorithm to "performance" causes intel_pstate/min_perf_pct in sysfs to go to 100 (or whatever value min_sysfs_pct in the "performance" limits is set to later). That requires per-CPU limits to be initialized explicitly rather than by copying the global limits to avoid setting min_sysfs_pct in the per-CPU limits to 100. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 3dc546601c88..f1f8fbe0b4c4 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -382,6 +382,7 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) intel_pstate_init_limits(limits); limits->min_perf_pct = 100; limits->min_perf = int_ext_tofp(1); + limits->min_sysfs_pct = 100; } static DEFINE_MUTEX(intel_pstate_driver_lock); @@ -2146,16 +2147,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) mutex_lock(&intel_pstate_limits_lock); if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + pr_debug("set performance\n"); if (!perf_limits) { limits = &performance_limits; perf_limits = limits; } - if (policy->max >= policy->cpuinfo.max_freq && - !limits->no_turbo) { - pr_debug("set performance\n"); - intel_pstate_set_performance_limits(perf_limits); - goto out; - } } else { pr_debug("set powersave\n"); if (!perf_limits) { @@ -2166,7 +2162,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) } intel_pstate_update_perf_limits(policy, perf_limits); - out: + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* * NOHZ_FULL CPUs need this as the governor callback may not @@ -2257,13 +2253,8 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy) cpu = all_cpu_data[policy->cpu]; - /* - * We need sane value in the cpu->perf_limits, so inherit from global - * perf_limits limits, which are seeded with values based on the - * CONFIG_CPU_FREQ_DEFAULT_GOV_*, during boot up. - */ if (per_cpu_limits) - memcpy(cpu->perf_limits, limits, sizeof(struct perf_limits)); + intel_pstate_init_limits(cpu->perf_limits); policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling; From f38837b08d23e66de17d46d030e0d9ac5172ad1f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 5 Mar 2017 09:41:08 -0800 Subject: [PATCH 0569/1299] bpf: add get_next_key callback to LPM map map_get_next_key callback is mandatory. Supply dummy handler. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Dmitry Vyukov Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/lpm_trie.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 8bfe0afaee10..b37bd9ab7f57 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -500,9 +500,15 @@ static void trie_free(struct bpf_map *map) raw_spin_unlock(&trie->lock); } +static int trie_get_next_key(struct bpf_map *map, void *key, void *next_key) +{ + return -ENOTSUPP; +} + static const struct bpf_map_ops trie_ops = { .map_alloc = trie_alloc, .map_free = trie_free, + .map_get_next_key = trie_get_next_key, .map_lookup_elem = trie_lookup_elem, .map_update_elem = trie_update_elem, .map_delete_elem = trie_delete_elem, From 0878fff1f42c18e448ab5b8b4f6a3eb32365b5b6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 5 Mar 2017 12:34:49 -0800 Subject: [PATCH 0570/1299] net: phy: Do not perform software reset for Generic PHY The Generic PHY driver is a catch-all PHY driver and it should preserve whatever prior initialization has been done by boot loader or firmware agents. For specific PHY device configuration it is expected that a specialized PHY driver would take over that role. Resetting the generic PHY was a bad idea that has lead to several complaints and downstream workarounds e.g: in OpenWrt/LEDE so restore the behavior prior to 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()"). Reported-by: Felix Fietkau Fixes: 87aa9f9c61ad ("net: phy: consolidate PHY reset in phy_init_hw()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index daec6555f3b1..5198ccfa347f 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1864,7 +1864,7 @@ static struct phy_driver genphy_driver[] = { .phy_id = 0xffffffff, .phy_id_mask = 0xffffffff, .name = "Generic PHY", - .soft_reset = genphy_soft_reset, + .soft_reset = genphy_no_soft_reset, .config_init = genphy_config_init, .features = PHY_GBIT_FEATURES | SUPPORTED_MII | SUPPORTED_AUI | SUPPORTED_FIBRE | diff --git a/include/linux/phy.h b/include/linux/phy.h index 772476028a65..43a774873aa9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -837,6 +837,10 @@ int genphy_read_status(struct phy_device *phydev); int genphy_suspend(struct phy_device *phydev); int genphy_resume(struct phy_device *phydev); int genphy_soft_reset(struct phy_device *phydev); +static inline int genphy_no_soft_reset(struct phy_device *phydev) +{ + return 0; +} void phy_driver_unregister(struct phy_driver *drv); void phy_drivers_unregister(struct phy_driver *drv, int n); int phy_driver_register(struct phy_driver *new_driver, struct module *owner); From 36fc579761b50784b63dafd0f2e796b659e0f5ee Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Mon, 20 Feb 2017 16:25:45 +0100 Subject: [PATCH 0571/1299] drm/edid: Add EDID_QUIRK_FORCE_8BPC quirk for Rotel RSX-1058 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rotel RSX-1058 is a receiver with 4 HDMI inputs and a HDMI output, all 1.1. When a sink that supports deep color is connected to the output, the receiver will send EDIDs that advertise this capability, even if it isn't possible with HDMI versions earlier than 1.3. Currently the kernel is assuming that deep color is possible and the sink displays an error. This quirk will make sure that deep color isn't used with this particular receiver. Fixes: 7a0baa623446 ("Revert "drm/i915: Disable 12bpc hdmi for now"") Signed-off-by: Tomeu Vizoso Link: http://patchwork.freedesktop.org/patch/msgid/20170220152545.13153-1-tomeu.vizoso@collabora.com Cc: stable@vger.kernel.org Cc: Matt Horan Tested-by: Matt Horan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99869 Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/drm_edid.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index c8baab9bee0d..ba58f1b11d1e 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -148,6 +148,9 @@ static const struct edid_quirk { /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, + + /* Rotel RSX-1058 forwards sink's EDID but only does HDMI 1.1*/ + { "ETR", 13896, EDID_QUIRK_FORCE_8BPC }, }; /* From 505b681539a7e14aeb866515d3ef1a67375839bc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 6 Mar 2017 08:34:44 +0100 Subject: [PATCH 0572/1299] drm/i915: Update DRIVER_DATE to 20170306 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 95050115c700..56e569f536ec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170206" -#define DRIVER_TIMESTAMP 1486372993 +#define DRIVER_DATE "20170306" +#define DRIVER_TIMESTAMP 1488785683 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ From 7369090a9fb57c3fc705ce355d2e4523a5a24716 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 31 Jan 2017 13:24:54 +0200 Subject: [PATCH 0573/1299] usb: dwc3: gadget: make Set Endpoint Configuration macros safe Some gadget drivers are bad, bad boys. We notice that ADB was passing bad Burst Size which caused top bits of param0 to be overwritten which confused DWC3 when running this command. In order to avoid future issues, we're going to make sure values passed by macros are always safe for the controller. Note that ADB still needs a fix to *not* pass bad values. Cc: # v3.2+ Reported-by: Mohamed Abbas Sugested-by: Adam Andruszak Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/dwc3/gadget.h b/drivers/usb/dwc3/gadget.h index 3129bcf74d7d..265e223ab645 100644 --- a/drivers/usb/dwc3/gadget.h +++ b/drivers/usb/dwc3/gadget.h @@ -28,23 +28,23 @@ struct dwc3; #define gadget_to_dwc(g) (container_of(g, struct dwc3, gadget)) /* DEPCFG parameter 1 */ -#define DWC3_DEPCFG_INT_NUM(n) ((n) << 0) +#define DWC3_DEPCFG_INT_NUM(n) (((n) & 0x1f) << 0) #define DWC3_DEPCFG_XFER_COMPLETE_EN (1 << 8) #define DWC3_DEPCFG_XFER_IN_PROGRESS_EN (1 << 9) #define DWC3_DEPCFG_XFER_NOT_READY_EN (1 << 10) #define DWC3_DEPCFG_FIFO_ERROR_EN (1 << 11) #define DWC3_DEPCFG_STREAM_EVENT_EN (1 << 13) -#define DWC3_DEPCFG_BINTERVAL_M1(n) ((n) << 16) +#define DWC3_DEPCFG_BINTERVAL_M1(n) (((n) & 0xff) << 16) #define DWC3_DEPCFG_STREAM_CAPABLE (1 << 24) -#define DWC3_DEPCFG_EP_NUMBER(n) ((n) << 25) +#define DWC3_DEPCFG_EP_NUMBER(n) (((n) & 0x1f) << 25) #define DWC3_DEPCFG_BULK_BASED (1 << 30) #define DWC3_DEPCFG_FIFO_BASED (1 << 31) /* DEPCFG parameter 0 */ -#define DWC3_DEPCFG_EP_TYPE(n) ((n) << 1) -#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) ((n) << 3) -#define DWC3_DEPCFG_FIFO_NUMBER(n) ((n) << 17) -#define DWC3_DEPCFG_BURST_SIZE(n) ((n) << 22) +#define DWC3_DEPCFG_EP_TYPE(n) (((n) & 0x3) << 1) +#define DWC3_DEPCFG_MAX_PACKET_SIZE(n) (((n) & 0x7ff) << 3) +#define DWC3_DEPCFG_FIFO_NUMBER(n) (((n) & 0x1f) << 17) +#define DWC3_DEPCFG_BURST_SIZE(n) (((n) & 0xf) << 22) #define DWC3_DEPCFG_DATA_SEQ_NUM(n) ((n) << 26) /* This applies for core versions earlier than 1.94a */ #define DWC3_DEPCFG_IGN_SEQ_NUM (1 << 31) From 2bfa0719ac2a9b2f3c91345873d3cdebd0296ba9 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 31 Jan 2017 14:54:45 +0200 Subject: [PATCH 0574/1299] usb: gadget: function: f_fs: pass companion descriptor along If we're dealing with SuperSpeed endpoints, we need to make sure to pass along the companion descriptor and initialize fields needed by the Gadget API. Eventually, f_fs.c should be converted to use config_ep_by_speed() like all other functions, though. Cc: Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index a5b7cd615698..7e976f00cb02 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1834,11 +1834,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) spin_lock_irqsave(&func->ffs->eps_lock, flags); while(count--) { struct usb_endpoint_descriptor *ds; + struct usb_ss_ep_comp_descriptor *comp_desc = NULL; + int needs_comp_desc = false; int desc_idx; - if (ffs->gadget->speed == USB_SPEED_SUPER) + if (ffs->gadget->speed == USB_SPEED_SUPER) { desc_idx = 2; - else if (ffs->gadget->speed == USB_SPEED_HIGH) + needs_comp_desc = true; + } else if (ffs->gadget->speed == USB_SPEED_HIGH) desc_idx = 1; else desc_idx = 0; @@ -1855,6 +1858,14 @@ static int ffs_func_eps_enable(struct ffs_function *func) ep->ep->driver_data = ep; ep->ep->desc = ds; + + comp_desc = (struct usb_ss_ep_comp_descriptor *)(ds + + USB_DT_ENDPOINT_SIZE); + ep->ep->maxburst = comp_desc->bMaxBurst + 1; + + if (needs_comp_desc) + ep->ep->comp_desc = comp_desc; + ret = usb_ep_enable(ep->ep); if (likely(!ret)) { epfile->ep = ep; From cf3113d893d4427b166ec8695460efa7aa660923 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 17 Feb 2017 11:12:44 +0200 Subject: [PATCH 0575/1299] usb: dwc3: gadget: properly increment dequeue pointer on ep_dequeue If request was already started, this means we had to stop the transfer. With that we also need to ignore all TRBs used by the request, however TRBs can only be modified after completion of END_TRANSFER command. So what we have to do here is wait for END_TRANSFER completion and only after that jump over TRBs by clearing HWO and incrementing dequeue pointer. Note that we have 2 possible types of transfers here: i) Linear buffer request ii) SG-list based request SG-list based requests will have r->num_pending_sgs set to a valid number (> 0). Linear requests, normally use a single TRB. For each of these two cases, if r->unaligned flag is set, one extra TRB has been used to align transfer size to wMaxPacketSize. All of these cases need to be taken into consideration so we don't mess up our TRB ring pointers. Tested-by: Janusz Dziedzic Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 63 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 4db97ecae885..f875b3f4b0ec 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1342,6 +1342,68 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, if (r == req) { /* wait until it is processed */ dwc3_stop_active_transfer(dwc, dep->number, true); + + /* + * If request was already started, this means we had to + * stop the transfer. With that we also need to ignore + * all TRBs used by the request, however TRBs can only + * be modified after completion of END_TRANSFER + * command. So what we do here is that we wait for + * END_TRANSFER completion and only after that, we jump + * over TRBs by clearing HWO and incrementing dequeue + * pointer. + * + * Note that we have 2 possible types of transfers here: + * + * i) Linear buffer request + * ii) SG-list based request + * + * SG-list based requests will have r->num_pending_sgs + * set to a valid number (> 0). Linear requests, + * normally use a single TRB. + * + * For each of these two cases, if r->unaligned flag is + * set, one extra TRB has been used to align transfer + * size to wMaxPacketSize. + * + * All of these cases need to be taken into + * consideration so we don't mess up our TRB ring + * pointers. + */ + wait_event_lock_irq(dep->wait_end_transfer, + !(dep->flags & DWC3_EP_END_TRANSFER_PENDING), + dwc->lock); + + if (!r->trb) + goto out1; + + if (r->num_pending_sgs) { + struct dwc3_trb *trb; + int i = 0; + + for (i = 0; i < r->num_pending_sgs; i++) { + trb = r->trb + i; + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + } + + if (r->unaligned) { + trb = r->trb + r->num_pending_sgs + 1; + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + } + } else { + struct dwc3_trb *trb = r->trb; + + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + + if (r->unaligned) { + trb = r->trb + 1; + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + dwc3_ep_inc_deq(dep); + } + } goto out1; } dev_err(dwc->dev, "request %p was not queued to %s\n", @@ -1352,6 +1414,7 @@ static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, out1: /* giveback the request */ + dep->queued_requests--; dwc3_gadget_giveback(dep, req, -ECONNRESET); out0: From 2e46565cf622dd0534a9d8bffe152a577b48d7aa Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 24 Feb 2017 19:11:28 +0100 Subject: [PATCH 0576/1299] USB: serial: digi_acceleport: fix OOB-event processing A recent change claimed to fix an off-by-one error in the OOB-port completion handler, but instead introduced such an error. This could specifically led to modem-status changes going unnoticed, effectively breaking TIOCMGET. Note that the offending commit fixes a loop-condition underflow and is marked for stable, but should not be backported without this fix. Reported-by: Ben Hutchings Fixes: 2d380889215f ("USB: serial: digi_acceleport: fix OOB data sanity check") Cc: stable # v2.6.30: 2d380889215f Signed-off-by: Johan Hovold --- drivers/usb/serial/digi_acceleport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index ab78111e0968..6537d3ca2797 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -1500,7 +1500,7 @@ static int digi_read_oob_callback(struct urb *urb) return -1; /* handle each oob command */ - for (i = 0; i < urb->actual_length - 4; i += 4) { + for (i = 0; i < urb->actual_length - 3; i += 4) { opcode = buf[i]; line = buf[i + 1]; status = buf[i + 2]; From 8f1117abb408808af9cc4c948925c726bec4755a Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Mon, 6 Mar 2017 13:05:24 +0800 Subject: [PATCH 0577/1299] drm/i915/gvt: handle workload lifecycle properly Currently i915 has a request replay mechanism which can make sure the request can be replayed after a GPU reset. With this mechanism, gvt should wait until the GVT request seqno passed before complete the current workload. So that there should be a context switch interrupt come before gvt free the workload. In this way, workload lifecylce matches with the i915 request lifecycle. The workload can only be freed after the request is completed. v2: use gvt_dbg_sched instead of gvt_err to print when wait again Signed-off-by: Chuanxiao Dong Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 49 +++++++++++++++++++--------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index e355a82ccabd..d3a56c949025 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -151,6 +151,15 @@ static int shadow_context_status_change(struct notifier_block *nb, case INTEL_CONTEXT_SCHEDULE_OUT: intel_gvt_restore_render_mmio(workload->vgpu, workload->ring_id); + /* If the status is -EINPROGRESS means this workload + * doesn't meet any issue during dispatching so when + * get the SCHEDULE_OUT set the status to be zero for + * good. If the status is NOT -EINPROGRESS means there + * is something wrong happened during dispatching and + * the status should not be set to zero + */ + if (workload->status == -EINPROGRESS) + workload->status = 0; atomic_set(&workload->shadow_ctx_active, 0); break; default: @@ -362,15 +371,23 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload = scheduler->current_workload[ring_id]; vgpu = workload->vgpu; - if (!workload->status && !vgpu->resetting) { + /* For the workload w/ request, needs to wait for the context + * switch to make sure request is completed. + * For the workload w/o request, directly complete the workload. + */ + if (workload->req) { wait_event(workload->shadow_ctx_status_wq, !atomic_read(&workload->shadow_ctx_active)); - update_guest_context(workload); + i915_gem_request_put(fetch_and_zero(&workload->req)); - for_each_set_bit(event, workload->pending_events, - INTEL_GVT_EVENT_MAX) - intel_vgpu_trigger_virtual_event(vgpu, event); + if (!workload->status && !vgpu->resetting) { + update_guest_context(workload); + + for_each_set_bit(event, workload->pending_events, + INTEL_GVT_EVENT_MAX) + intel_vgpu_trigger_virtual_event(vgpu, event); + } } gvt_dbg_sched("ring id %d complete workload %p status %d\n", @@ -400,7 +417,6 @@ static int workload_thread(void *priv) int ring_id = p->ring_id; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload = NULL; - long lret; int ret; bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -449,23 +465,24 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); - - lret = i915_wait_request(workload->req, +retry: + i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT); - if (lret < 0) { - workload->status = lret; - gvt_err("fail to wait workload, skip\n"); - } else { - workload->status = 0; + /* I915 has replay mechanism and a request will be replayed + * if there is i915 reset. So the seqno will be updated anyway. + * If the seqno is not updated yet after waiting, which means + * the replay may still be in progress and we can wait again. + */ + if (!i915_gem_request_completed(workload->req)) { + gvt_dbg_sched("workload %p not completed, wait again\n", + workload); + goto retry; } complete: gvt_dbg_sched("will complete workload %p, status: %d\n", workload, workload->status); - if (workload->req) - i915_gem_request_put(fetch_and_zero(&workload->req)); - complete_current_workload(gvt, ring_id); if (need_force_wake) From c2e04fdab33181b53b5a2f9662b7b607b720f79f Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 6 Mar 2017 17:08:30 +0800 Subject: [PATCH 0578/1299] drm/i915/gvt: protect RO and Rsvd bits of virtual vgpu configuration space Per PCI specification, Configuration Register has different types (RO, RW, RW1C, Rsvd). For RO Register bits are read-only and cannot be altered by software. For RW1C Register bits indicate status when read. A Set bit indicates a status event which is Cleared by writing a 1b. Writing a 0b to RW1C bits has no effect. Reserved Register is for future implementations, and they are read-only and must return zero when read. Current vGPU configuration write emulation just copy the value as it is. So we haven't emulated RO, RW1C and Rsvd Registers correctly. This patch is following the Spec to correct emulation logic. We add a function vgpu_cfg_mem_write to wrap the access to vGPU configuration memory. The write function uses a RW Register bitmap to avoid RO bits be overwritten, and emulate RW1C behavior for the particular status Register. v2: new = src[i] --> new = src[i] & mask (zhenyu) Signed-off-by: Changbin Du Cc: Xiaoguang Chen Cc: Zhiyuan Lv Cc: Min He Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cfg_space.c | 54 ++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c index a77e050b85a3..b7d7721e72fa 100644 --- a/drivers/gpu/drm/i915/gvt/cfg_space.c +++ b/drivers/gpu/drm/i915/gvt/cfg_space.c @@ -41,6 +41,54 @@ enum { INTEL_GVT_PCI_BAR_MAX, }; +/* bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one + * byte) byte by byte in standard pci configuration space. (not the full + * 256 bytes.) + */ +static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = { + [PCI_COMMAND] = 0xff, 0x07, + [PCI_STATUS] = 0x00, 0xf9, /* the only one RW1C byte */ + [PCI_CACHE_LINE_SIZE] = 0xff, + [PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff, + [PCI_ROM_ADDRESS] = 0x01, 0xf8, 0xff, 0xff, + [PCI_INTERRUPT_LINE] = 0xff, +}; + +/** + * vgpu_pci_cfg_mem_write - write virtual cfg space memory + * + * Use this function to write virtual cfg space memory. + * For standard cfg space, only RW bits can be changed, + * and we emulates the RW1C behavior of PCI_STATUS register. + */ +static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off, + u8 *src, unsigned int bytes) +{ + u8 *cfg_base = vgpu_cfg_space(vgpu); + u8 mask, new, old; + int i = 0; + + for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) { + mask = pci_cfg_space_rw_bmp[off + i]; + old = cfg_base[off + i]; + new = src[i] & mask; + + /** + * The PCI_STATUS high byte has RW1C bits, here + * emulates clear by writing 1 for these bits. + * Writing a 0b to RW1C bits has no effect. + */ + if (off + i == PCI_STATUS + 1) + new = (~new & old) & mask; + + cfg_base[off + i] = (old & ~mask) | new; + } + + /* For other configuration space directly copy as it is. */ + if (i < bytes) + memcpy(cfg_base + off + i, src + i, bytes - i); +} + /** * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read * @@ -123,7 +171,7 @@ static int emulate_pci_command_write(struct intel_vgpu *vgpu, u8 changed = old ^ new; int ret; - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); if (!(changed & PCI_COMMAND_MEMORY)) return 0; @@ -277,10 +325,10 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset, if (ret) return ret; - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); break; default: - memcpy(vgpu_cfg_space(vgpu) + offset, p_data, bytes); + vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes); break; } return 0; From 68925176296a8b995e503349200e256674bfe5ac Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 17 Feb 2017 14:32:18 +0000 Subject: [PATCH 0579/1299] arm64: KVM: VHE: Clear HCR_TGE when invalidating guest TLBs When invalidating guest TLBs, special care must be taken to actually shoot the guest TLBs and not the host ones if we're running on a VHE system. This is controlled by the HCR_EL2.TGE bit, which we forget to clear before invalidating TLBs. Address the issue by introducing two wrappers (__tlb_switch_to_guest and __tlb_switch_to_host) that take care of both the VTTBR_EL2 and HCR_EL2.TGE switching. Reported-by: Tomasz Nowicki Tested-by: Tomasz Nowicki Reviewed-by: Christoffer Dall Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/tlb.c | 64 ++++++++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index e8e7ba2bc11f..9e1d2b75eecd 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -18,14 +18,62 @@ #include #include +static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm) +{ + u64 val; + + /* + * With VHE enabled, we have HCR_EL2.{E2H,TGE} = {1,1}, and + * most TLB operations target EL2/EL0. In order to affect the + * guest TLBs (EL1/EL0), we need to change one of these two + * bits. Changing E2H is impossible (goodbye TTBR1_EL2), so + * let's flip TGE before executing the TLB operation. + */ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + val = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + write_sysreg(val, hcr_el2); + isb(); +} + +static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); +} + +static hyp_alternate_select(__tlb_switch_to_guest, + __tlb_switch_to_guest_nvhe, + __tlb_switch_to_guest_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + +static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm) +{ + /* + * We're done with the TLB operation, let's restore the host's + * view of HCR_EL2. + */ + write_sysreg(0, vttbr_el2); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); +} + +static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__tlb_switch_to_host, + __tlb_switch_to_host_nvhe, + __tlb_switch_to_host_vhe, + ARM64_HAS_VIRT_HOST_EXTN); + void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); /* * We could do so much better if we had the VA as well. @@ -46,7 +94,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) @@ -55,14 +103,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) /* Switch to requested VMID */ kvm = kern_hyp_va(kvm); - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalls12e1is); dsb(ish); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) @@ -70,14 +117,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu) struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm); /* Switch to requested VMID */ - write_sysreg(kvm->arch.vttbr, vttbr_el2); - isb(); + __tlb_switch_to_guest()(kvm); __tlbi(vmalle1); dsb(nsh); isb(); - write_sysreg(0, vttbr_el2); + __tlb_switch_to_host()(kvm); } void __hyp_text __kvm_flush_vm_context(void) From 4dfc050571523ac2bc02cbf948dd47621f7dd83f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 21 Feb 2017 11:32:47 +0000 Subject: [PATCH 0580/1299] KVM: arm/arm64: vgic-v3: Don't pretend to support IRQ/FIQ bypass Our GICv3 emulation always presents ICC_SRE_EL1 with DIB/DFB set to zero, which implies that there is a way to bypass the GIC and inject raw IRQ/FIQ by driving the CPU pins. Of course, we don't allow that when the GIC is configured, but we fail to indicate that to the guest. The obvious fix is to set these bits (and never let them being changed again). Reported-by: Peter Maydell Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 ++ virt/kvm/arm/vgic/vgic-v3.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 672cfef72fc8..97cbca19430d 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -373,6 +373,8 @@ #define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) #define ICC_IGRPEN1_EL1_SHIFT 0 #define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) /* diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index edc6ee2dc852..be0f4c3e0142 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -229,10 +229,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. * This goes with the spec allowing the value to be RAO/WI. */ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; } else { vgic_v3->vgic_sre = 0; From a69e2fb70350a66f91175cd2625f1e8215c5b6e9 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 3 Mar 2017 11:58:44 +1100 Subject: [PATCH 0581/1299] powerpc/xics: Work around limitations of OPAL XICS priority handling The CPPR (Current Processor Priority Register) of a XICS interrupt presentation controller contains a value N, such that only interrupts with a priority "more favoured" than N will be received by the CPU, where "more favoured" means "less than". So if the CPPR has the value 5 then only interrupts with a priority of 0-4 inclusive will be received. In theory the CPPR can support a value of 0 to 255 inclusive. In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR to 0 rejects all interrupts, setting it to 0xff allows all interrupts. The values 4 and 5 are used to differentiate IPIs from external interrupts. Setting the CPPR to 5 allows IPIs to be received but not external interrupts. The CPPR emulation in the OPAL XICS implementation only directly supports priorities 0 and 0xff. All other priorities are considered equivalent, and mapped to a single priority value internally. This means when using icp-opal we can not allow IPIs but not externals. This breaks Linux's use of priority values when a CPU is hot unplugged. After migrating IRQs away from the CPU that is being offlined, we set the priority to 5, meaning we still want the offline CPU to receive IPIs. But the effect of the OPAL XICS emulation's use of a single priority value is that all interrupts are rejected by the CPU. With the CPU offline, and not receiving IPIs, we may not be able to wake it up to bring it back online. The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values of 0 to 4 inclusive will correctly cause all interrupts to be rejected, so we pass those CPPR values through to OPAL. However if we are called with a CPPR of 5 or greater, the caller is expecting to be able to allow IPIs but not external interrupts. We know this doesn't work, so instead of rejecting all interrupts we choose the opposite which is to allow all interrupts. This is still not correct behaviour, but we know for the only existing caller (xics_migrate_irqs_away()), that it is the better option. The other part of the fix is in xics_migrate_irqs_away(). Instead of setting priority (CPPR) to 0, and then back to 5 before migrating IRQs, we migrate the IRQs before setting the priority back to 5. This should have no effect on an ICP backend with a working set_priority(), and on icp-opal it means we will keep all interrupts blocked until after we've finished doing the IRQ migration. Additionally we wait for 5ms after doing the migration to make sure there are no IRQs in flight. Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Cc: stable@vger.kernel.org # v4.8+ Suggested-by: Michael Ellerman Reported-by: Vaidyanathan Srinivasan Tested-by: Vaidyanathan Srinivasan Signed-off-by: Balbir Singh [mpe: Rewrote comments and change log, change delay to 5ms] Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-opal.c | 10 ++++++++++ arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index f9670eabfcfa..b53f80f0b4d8 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ From 12cc9fd6b2d8ee307a735b3b9faed0d17b719463 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:47 +1100 Subject: [PATCH 0582/1299] powerpc: Parse the command line before calling CAS On POWER9 the hypervisor requires the guest to decide whether it would like to use a hash or radix mmu model at the time it calls ibm,client-architecture-support (CAS) based on what the hypervisor has said it's allowed to do. It is possible to disable radix by passing "disable_radix" on the command line. The next patch will add support for the new CAS format, thus we need to parse the command line before calling CAS so we can correctly select which mmu we would like to use. Signed-off-by: Suraj Jitindar Singh Reviewed-by: Paul Mackerras Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a3944540fe0d..bf3966d12565 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2993,6 +2993,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -3008,11 +3013,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, if (of_platform != PLATFORM_POWERMAC) copy_and_flush(0, kbase, 0x100, 0); - /* - * Do early parsing of command line - */ - early_cmdline_parse(); - /* * Initialize memory management within prom_init */ From 014d02cbf16b3106dc8e93281d2a9c189751ed5e Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:48 +1100 Subject: [PATCH 0583/1299] powerpc: Update to new option-vector-5 format for CAS On POWER9 the ibm,client-architecture-support (CAS) negotiation process has been updated to change how the host to guest negotiation is done for the new hash/radix mmu as well as the nest mmu, process tables and guest translation shootdown (GTSE). This is documented in the unreleased PAPR ACR "CAS option vector additions for P9". The host tells the guest which options it supports in ibm,arch-vec-5-platform-support. The guest then chooses a subset of these to request in the CAS call and these are agreed to in the ibm,architecture-vec-5 property of the chosen node. Thus we read ibm,arch-vec-5-platform-support and make our selection before calling CAS. We then parse the ibm,architecture-vec-5 property of the chosen node to check whether we should run as hash or radix. ibm,arch-vec-5-platform-support format: index value pairs: ... index: Option vector 5 byte number val: Some representation of supported values Signed-off-by: Suraj Jitindar Singh Acked-by: Paul Mackerras [mpe: Don't print about unknown options, be consistent with OV5_FEAT] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 18 ++++-- arch/powerpc/kernel/prom_init.c | 110 +++++++++++++++++++++++++++++++- arch/powerpc/mm/init_64.c | 42 +++++++++--- 3 files changed, 153 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 4a90634e8322..35c00d7a0cf8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,12 +160,18 @@ struct of_drconf_cell { #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ -#define OV5_MMU_SLB 0x1800 /* always use SLB */ -#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ +/* MMU Base Architecture */ +#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ +#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ +#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */ +#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */ +#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */ +#define OV5_NMMU 0x1820 /* Nest MMU Available */ +/* Hash Table Extensions */ +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */ +#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ +/* Radix Table Extensions */ +#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bf3966d12565..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -695,6 +709,8 @@ struct option_vector5 { u8 byte22; u8 intarch; u8 mmu; + u8 hash_ext; + u8 radix_ext; } __packed; struct option_vector6 { @@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved3 = 0, .subprocessors = 1, .intarch = 0, - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | - OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, }, /* option vector 6: IBM PAPR hints */ @@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { @@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void) prom_arg_t ret; u32 cores; + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); + root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* We need to tell the FW about the number of cores we support. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa3b76aa0d6..9be992083d2a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,18 +356,42 @@ static void early_check_vec5(void) unsigned long root, chosen; int size; const u8 *vec5; + u8 mmu_supported; root = of_get_flat_dt_root(); chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); - if (chosen == -FDT_ERR_NOTFOUND) - return; - vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); - if (!vec5) - return; - if (size <= OV5_INDX(OV5_MMU_RADIX_300) || - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) - /* Hypervisor doesn't support radix */ + if (chosen == -FDT_ERR_NOTFOUND) { cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } } void __init mmu_early_init_devtree(void) @@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + if (!(mfmsr() & MSR_HV)) early_check_vec5(); if (early_radix_enabled()) From 6ba422c75facb1b1e0e206c464ee121b8073f7e0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 5 Mar 2017 10:54:34 +1100 Subject: [PATCH 0584/1299] powerpc/64: Avoid panic during boot due to divide by zero in init_cache_info() I see a panic in early boot when building with a recent gcc toolchain. The issue is a divide by zero, which is undefined. Older toolchains let us get away with it: int foo(int a) { return a / 0; } foo: li 9,0 divw 3,3,9 extsw 3,3 blr But newer ones catch it: foo: trap Add a check to avoid the divide by zero. Fixes: e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache") Signed-off-by: Anton Blanchard Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index adf2084f214b..9cfaa8b69b5f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, info->line_size = lsize; info->block_size = bsize; info->log_block_size = __ilog2(bsize); - info->blocks_per_page = PAGE_SIZE / bsize; + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; if (sets == 0) info->assoc = 0xffff; From 3b3e78552d3077ec70d2640e629e07e3ab416a6a Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 5 Mar 2017 19:16:44 +0100 Subject: [PATCH 0585/1299] x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA/W will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Link: http://lkml.kernel.org/r/1488737804-20681-1-git-send-email-matjaz.hegedic@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 01c9cda3e1b7..4194d6f9bb29 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -231,6 +231,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TAW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ From f2853308b6409b97799b0beceacd9da43a82fe43 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Mar 2017 10:57:48 +0200 Subject: [PATCH 0586/1299] x86/build/x86_64_defconfig: Enable CONFIG_R8169 Very common PCIe ethernet card. Already enabled in i386_defconfig. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Cc: Konstantin Khlebnikov Link: http://lkml.kernel.org/r/20170306085748.85957-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/configs/x86_64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7ef4a099defc..6205d3b81e6d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -176,6 +176,7 @@ CONFIG_E1000E=y CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y +CONFIG_R8169=y CONFIG_FDDI=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set From 9c7a00868c3a77c86ab07a2c51f3bb4897bd8550 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 21:51:32 +1100 Subject: [PATCH 0587/1299] powerpc/64: Fix L1D cache shape vector reporting L1I values It seems we didn't pay quite enough attention when testing the new cache shape vectors, which means we didn't notice the bug where the vector for the L1D was using the L1I values. Fix it, resulting in eg: L1I cache size: 0x8000 32768B 32K L1I line size: 0x80 8-way associative L1D cache size: 0x10000 65536B 64K L1D line size: 0x80 8-way associative Fixes: 98a5f361b862 ("powerpc: Add new cache geometry aux vectors") Cut-and-paste-bug-by: Benjamin Herrenschmidt Badly-reviewed-by: Michael Ellerman Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/elf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 93b9b84568e8..09bde6e34f5d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define ARCH_DLINFO_CACHE_GEOMETRY \ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ - NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ - NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ From d9321a03efcda867b3a8c6327e01808516f0acd7 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Mon, 6 Mar 2017 10:56:51 +0200 Subject: [PATCH 0588/1299] drm/i915/glk: Remove MODULE_FIRMWARE() tag from Geminilake's DMC Geminilake's DMC is not yet available in the linux-firmware repository. To prevent userspace tools such as mkinitramfs to complain about missing firmware, remove the MODULE_FIRMWARE() tag for now. Fixes: dbb28b5c3d3c ("drm/i915/DMC/GLK: Load DMC on GLK") Cc: Rodrigo Vivi Cc: Anusha Srivatsa Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170306085651.14008-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 14659c7e2858..02a90257161b 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -35,7 +35,6 @@ */ #define I915_CSR_GLK "i915/glk_dmc_ver1_03.bin" -MODULE_FIRMWARE(I915_CSR_GLK); #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 3) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" From 567f0792a6ad11c0c2620944b8eeb777359fb85a Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Feb 2017 15:28:47 +0100 Subject: [PATCH 0589/1299] drm/i915: Move updating color management to before vblank evasion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This cannot be done reliably during vblank evasasion since the color management registers are not double buffered. The original commit that moved it always during vblank evasion was wrong, so revert it to before vblank evasion again. Signed-off-by: Maarten Lankhorst Fixes: 20a34e78f0d7 ("drm/i915: Update color management during vblank evasion.") Cc: stable@vger.kernel.org # v4.7+ Link: http://patchwork.freedesktop.org/patch/msgid/1488292128-14540-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_display.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 72e2c91707d4..b2b0661ccbed 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13343,17 +13343,19 @@ static void intel_begin_crtc_commit(struct drm_crtc *crtc, to_intel_atomic_state(old_crtc_state->state); bool modeset = needs_modeset(crtc->state); + if (!modeset && + (intel_cstate->base.color_mgmt_changed || + intel_cstate->update_pipe)) { + intel_color_set_csc(crtc->state); + intel_color_load_luts(crtc->state); + } + /* Perform vblank evasion around commit operation */ intel_pipe_update_start(intel_crtc); if (modeset) goto out; - if (crtc->state->color_mgmt_changed || to_intel_crtc_state(crtc->state)->update_pipe) { - intel_color_set_csc(crtc->state); - intel_color_load_luts(crtc->state); - } - if (intel_cstate->update_pipe) intel_update_pipe_config(intel_crtc, old_intel_cstate); else if (INTEL_GEN(dev_priv) >= 9) From e1edbd44e23b4119161fe4accc967878bae93831 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 28 Feb 2017 15:28:48 +0100 Subject: [PATCH 0590/1299] drm/i915: Complain if we take too long under vblank evasion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of only complaining when we actually miss a vblank, always complain if we take longer than 100 us. This will make it easier to find cases where we potentially miss vblanks. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1488292128-14540-2-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Ville Syrjälä [mlankhorst: Add commit message.] --- drivers/gpu/drm/i915/intel_sprite.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 27e0752d1578..375ca91b308c 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -65,6 +65,8 @@ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, 1000 * adjusted_mode->crtc_htotal); } +#define VBLANK_EVASION_TIME_US 100 + /** * intel_pipe_update_start() - start update of a set of display registers * @crtc: the crtc of which the registers are going to be updated @@ -92,7 +94,8 @@ void intel_pipe_update_start(struct intel_crtc *crtc) vblank_start = DIV_ROUND_UP(vblank_start, 2); /* FIXME needs to be calibrated sensibly */ - min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, 100); + min = vblank_start - intel_usecs_to_scanlines(adjusted_mode, + VBLANK_EVASION_TIME_US); max = vblank_start - 1; local_irq_disable(); @@ -191,7 +194,12 @@ void intel_pipe_update_end(struct intel_crtc *crtc, struct intel_flip_work *work ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time), crtc->debug.min_vbl, crtc->debug.max_vbl, crtc->debug.scanline_start, scanline_end); - } + } else if (ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time) > + VBLANK_EVASION_TIME_US) + DRM_WARN("Atomic update on pipe (%c) took %lld us, max time under evasion is %u us\n", + pipe_name(pipe), + ktime_us_delta(end_vbl_time, crtc->debug.start_vbl_time), + VBLANK_EVASION_TIME_US); } static void From a7d2475af7aedcb9b5c6343989a8bfadbf84429b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 22:53:59 +1100 Subject: [PATCH 0591/1299] powerpc: Sort the selects under CONFIG_PPC We have a big list of selects under CONFIG_PPC, and currently they're completely unsorted. This means people tend to add new selects at the bottom of the list, and so two commits which both add a new select will often conflict. Instead sort it alphabetically. This is nicer in and of itself, but also means two commits that add a new select will have a greater chance of not conflicting. Add a note at the top and bottom asking people to keep it sorted. And while we're here pad out the 'if' expressions to make them stand out. Suggested-by: Stephen Rothwell Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 166 ++++++++++++++++++++++--------------------- 1 file changed, 86 insertions(+), 80 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 494091762bd7..97a8bc8a095c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y - select BUILDTIME_EXTABLE_SORT + # + # Please keep this list sorted alphabetically. + # + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SG_CHAIN + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_ELF - select ARCH_HAS_ELF_RANDOMIZE + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_ATOMIC64 if PPC32 + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL_OLD + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_CBPF_JIT if !PPC64 + select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG + select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_EBPF_JIT if PPC64 + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS + select HAVE_GENERIC_RCU_GUP + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) + select HAVE_IDE + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_KERNEL_GZIP + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI if PERF_EVENTS + select HAVE_OPROFILE + select HAVE_OPTPROBES if PPC64 + select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP + select HAVE_RCU_TABLE_FREE if SMP + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING + select IRQ_DOMAIN + select IRQ_FORCED_THREADING + select MODULES_USE_ELF_RELA + select NO_BOOTMEM select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM - select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_GCC_PLUGINS - select SYSCTL_EXCEPTION_TRACE - select VIRT_TO_BUS if !PPC64 - select HAVE_IDE - select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) - select HAVE_KPROBES - select HAVE_OPTPROBES if PPC64 - select HAVE_ARCH_KGDB - select HAVE_KRETPROBES - select HAVE_ARCH_TRACEHOOK - select HAVE_MEMBLOCK - select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG - select HAVE_OPROFILE - select HAVE_DEBUG_KMEMLEAK - select ARCH_HAS_SG_CHAIN - select GENERIC_ATOMIC64 if PPC32 - select HAVE_PERF_EVENTS - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select ARCH_WANT_IPC_PARSE_VERSION - select SPARSE_IRQ - select IRQ_DOMAIN - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_SHOW_LEVEL - select IRQ_FORCED_THREADING - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT if !PPC64 - select HAVE_EBPF_JIT if PPC64 - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_HAS_GCOV_PROFILE_ALL - select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE - select GENERIC_TIME_VSYSCALL_OLD - select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC - select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS - select ARCH_USE_BUILTIN_BSWAP + select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND - select OLD_SIGACTION if PPC32 - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN - select NO_BOOTMEM - select HAVE_GENERIC_RCU_GUP - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_NMI if PERF_EVENTS - select EDAC_SUPPORT - select EDAC_ATOMIC_SCRUB - select ARCH_HAS_DMA_SET_COHERENT_MASK - select ARCH_HAS_DEVMEM_IS_ALLOWED - select HAVE_ARCH_SECCOMP_FILTER - select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select GENERIC_CPU_AUTOPROBE - select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_KERNEL_GZIP - select HAVE_CONTEXT_TRACKING if PPC64 + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select VIRT_TO_BUS if !PPC64 + # + # Please keep this list sorted alphabetically. + # config GENERIC_CSUM def_bool n From 606142af57dad981b78707234cfbd15f9f7b7125 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 15 Feb 2017 18:29:15 -0200 Subject: [PATCH 0592/1299] [media] dw2102: don't do DMA on stack On Kernel 4.9, WARNINGs about doing DMA on stack are hit at the dw2102 driver: one in su3000_power_ctrl() and the other in tt_s2_4600_frontend_attach(). Both were due to the use of buffers on the stack as parameters to dvb_usb_generic_rw() and the resulting attempt to do DMA with them. The device was non-functional as a result. So, switch this driver over to use a buffer within the device state structure, as has been done with other DVB-USB drivers. Tested with TechnoTrend TT-connect S2-4600. [mchehab@osg.samsung.com: fixed a warning at su3000_i2c_transfer() that state var were dereferenced before check 'd'] Signed-off-by: Jonathan McDowell Cc: Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dw2102.c | 242 +++++++++++++++++------------ 1 file changed, 144 insertions(+), 98 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index 6ca502d834b4..4f42d57f81d9 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -68,6 +68,7 @@ struct dw2102_state { u8 initialized; u8 last_lock; + u8 data[MAX_XFER_SIZE + 4]; struct i2c_client *i2c_client_demod; struct i2c_client *i2c_client_tuner; @@ -661,62 +662,72 @@ static int su3000_i2c_transfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); - u8 obuf[0x40], ibuf[0x40]; + struct dw2102_state *state; if (!d) return -ENODEV; + + state = d->priv; + if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; + if (mutex_lock_interruptible(&d->data_mutex) < 0) { + mutex_unlock(&d->i2c_mutex); + return -EAGAIN; + } switch (num) { case 1: switch (msg[0].addr) { case SU3000_STREAM_CTRL: - obuf[0] = msg[0].buf[0] + 0x36; - obuf[1] = 3; - obuf[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 0, 0) < 0) + state->data[0] = msg[0].buf[0] + 0x36; + state->data[1] = 3; + state->data[2] = 0; + if (dvb_usb_generic_rw(d, state->data, 3, + state->data, 0, 0) < 0) err("i2c transfer failed."); break; case DW2102_RC_QUERY: - obuf[0] = 0x10; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 2, 0) < 0) + state->data[0] = 0x10; + if (dvb_usb_generic_rw(d, state->data, 1, + state->data, 2, 0) < 0) err("i2c transfer failed."); - msg[0].buf[1] = ibuf[0]; - msg[0].buf[0] = ibuf[1]; + msg[0].buf[1] = state->data[0]; + msg[0].buf[0] = state->data[1]; break; default: /* always i2c write*/ - obuf[0] = 0x08; - obuf[1] = msg[0].addr; - obuf[2] = msg[0].len; + state->data[0] = 0x08; + state->data[1] = msg[0].addr; + state->data[2] = msg[0].len; - memcpy(&obuf[3], msg[0].buf, msg[0].len); + memcpy(&state->data[3], msg[0].buf, msg[0].len); - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 3, - ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 3, + state->data, 1, 0) < 0) err("i2c transfer failed."); } break; case 2: /* always i2c read */ - obuf[0] = 0x09; - obuf[1] = msg[0].len; - obuf[2] = msg[1].len; - obuf[3] = msg[0].addr; - memcpy(&obuf[4], msg[0].buf, msg[0].len); + state->data[0] = 0x09; + state->data[1] = msg[0].len; + state->data[2] = msg[1].len; + state->data[3] = msg[0].addr; + memcpy(&state->data[4], msg[0].buf, msg[0].len); - if (dvb_usb_generic_rw(d, obuf, msg[0].len + 4, - ibuf, msg[1].len + 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, msg[0].len + 4, + state->data, msg[1].len + 1, 0) < 0) err("i2c transfer failed."); - memcpy(msg[1].buf, &ibuf[1], msg[1].len); + memcpy(msg[1].buf, &state->data[1], msg[1].len); break; default: warn("more than 2 i2c messages at a time is not handled yet."); break; } + mutex_unlock(&d->data_mutex); mutex_unlock(&d->i2c_mutex); return num; } @@ -844,17 +855,23 @@ static int su3000_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) static int su3000_power_ctrl(struct dvb_usb_device *d, int i) { struct dw2102_state *state = (struct dw2102_state *)d->priv; - u8 obuf[] = {0xde, 0}; + int ret = 0; info("%s: %d, initialized %d", __func__, i, state->initialized); if (i && !state->initialized) { + mutex_lock(&d->data_mutex); + + state->data[0] = 0xde; + state->data[1] = 0; + state->initialized = 1; /* reset board */ - return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0); + ret = dvb_usb_generic_rw(d, state->data, 2, NULL, 0, 0); + mutex_unlock(&d->data_mutex); } - return 0; + return ret; } static int su3000_read_mac_address(struct dvb_usb_device *d, u8 mac[6]) @@ -1309,49 +1326,57 @@ static int prof_7500_frontend_attach(struct dvb_usb_adapter *d) return 0; } -static int su3000_frontend_attach(struct dvb_usb_adapter *d) +static int su3000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, - &d->dev->i2c_adap); - if (d->fe_adap[0].fe == NULL) + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(ds3000_attach, &su3000_ds3000_config, + &d->i2c_adap); + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached DS3000/TS2020!"); return 0; } @@ -1360,47 +1385,55 @@ static int su3000_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int t220_frontend_attach(struct dvb_usb_adapter *d) +static int t220_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[3] = { 0xe, 0x87, 0 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x87; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x86; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x86; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(50); - obuf[0] = 0xe; - obuf[1] = 0x80; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 1; - if (dvb_usb_generic_rw(d->dev, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, - &d->dev->i2c_adap, NULL); - if (d->fe_adap[0].fe != NULL) { - if (dvb_attach(tda18271_attach, d->fe_adap[0].fe, 0x60, - &d->dev->i2c_adap, &tda18271_config)) { + mutex_unlock(&d->data_mutex); + + adap->fe_adap[0].fe = dvb_attach(cxd2820r_attach, &cxd2820r_config, + &d->i2c_adap, NULL); + if (adap->fe_adap[0].fe != NULL) { + if (dvb_attach(tda18271_attach, adap->fe_adap[0].fe, 0x60, + &d->i2c_adap, &tda18271_config)) { info("Attached TDA18271HD/CXD2820R!"); return 0; } @@ -1410,23 +1443,30 @@ static int t220_frontend_attach(struct dvb_usb_adapter *d) return -EIO; } -static int m88rs2000_frontend_attach(struct dvb_usb_adapter *d) +static int m88rs2000_frontend_attach(struct dvb_usb_adapter *adap) { - u8 obuf[] = { 0x51 }; - u8 ibuf[] = { 0 }; + struct dvb_usb_device *d = adap->dev; + struct dw2102_state *state = d->priv; - if (dvb_usb_generic_rw(d->dev, obuf, 1, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0x51; + + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); - d->fe_adap[0].fe = dvb_attach(m88rs2000_attach, &s421_m88rs2000_config, - &d->dev->i2c_adap); + mutex_unlock(&d->data_mutex); - if (d->fe_adap[0].fe == NULL) + adap->fe_adap[0].fe = dvb_attach(m88rs2000_attach, + &s421_m88rs2000_config, + &d->i2c_adap); + + if (adap->fe_adap[0].fe == NULL) return -EIO; - if (dvb_attach(ts2020_attach, d->fe_adap[0].fe, + if (dvb_attach(ts2020_attach, adap->fe_adap[0].fe, &dw2104_ts2020_config, - &d->dev->i2c_adap)) { + &d->i2c_adap)) { info("Attached RS2000/TS2020!"); return 0; } @@ -1439,44 +1479,50 @@ static int tt_s2_4600_frontend_attach(struct dvb_usb_adapter *adap) { struct dvb_usb_device *d = adap->dev; struct dw2102_state *state = d->priv; - u8 obuf[3] = { 0xe, 0x80, 0 }; - u8 ibuf[] = { 0 }; struct i2c_adapter *i2c_adapter; struct i2c_client *client; struct i2c_board_info board_info; struct m88ds3103_platform_data m88ds3103_pdata = {}; struct ts2020_config ts2020_config = {}; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + mutex_lock(&d->data_mutex); + + state->data[0] = 0xe; + state->data[1] = 0x80; + state->data[2] = 0x0; + + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x02; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x02; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); msleep(300); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 0; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 0; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0xe; - obuf[1] = 0x83; - obuf[2] = 1; + state->data[0] = 0xe; + state->data[1] = 0x83; + state->data[2] = 1; - if (dvb_usb_generic_rw(d, obuf, 3, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 3, state->data, 1, 0) < 0) err("command 0x0e transfer failed."); - obuf[0] = 0x51; + state->data[0] = 0x51; - if (dvb_usb_generic_rw(d, obuf, 1, ibuf, 1, 0) < 0) + if (dvb_usb_generic_rw(d, state->data, 1, state->data, 1, 0) < 0) err("command 0x51 transfer failed."); + mutex_unlock(&d->data_mutex); + /* attach demod */ m88ds3103_pdata.clk = 27000000; m88ds3103_pdata.i2c_wr_max = 33; From 8e51533780ba223a3562ff4382c6b6f350c7e9a4 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 10 Feb 2017 17:21:00 -0600 Subject: [PATCH 0593/1299] pinctrl: qcom: add get_direction function The get_direction callback function allows gpiolib to know the current direction (input vs output) for a given GPIO. This is particularly useful on ACPI systems, where the GPIOs are configured only by firmware (typically UEFI), so the only way to know the initial values to query the hardware directly. Without this function, gpiolib thinks that all GPIOs are configured for input. Signed-off-by: Timur Tabi Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-msm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index f8e9e1c2b2f6..c978be5eb9eb 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -422,6 +422,20 @@ static int msm_gpio_direction_output(struct gpio_chip *chip, unsigned offset, in return 0; } +static int msm_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) +{ + struct msm_pinctrl *pctrl = gpiochip_get_data(chip); + const struct msm_pingroup *g; + u32 val; + + g = &pctrl->soc->groups[offset]; + + val = readl(pctrl->regs + g->ctl_reg); + + /* 0 = output, 1 = input */ + return val & BIT(g->oe_bit) ? 0 : 1; +} + static int msm_gpio_get(struct gpio_chip *chip, unsigned offset) { const struct msm_pingroup *g; @@ -510,6 +524,7 @@ static void msm_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) static struct gpio_chip msm_gpio_template = { .direction_input = msm_gpio_direction_input, .direction_output = msm_gpio_direction_output, + .get_direction = msm_gpio_get_direction, .get = msm_gpio_get, .set = msm_gpio_set, .request = gpiochip_generic_request, From 96e1ce8fcd66d850196758c038bd9d6f7857c518 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Mon, 20 Feb 2017 21:00:42 +0900 Subject: [PATCH 0594/1299] pinctrl: uniphier: change pin names of aio/xirq for LD11 This patch changes pin names of AIO and XIRQ according to updated specification. Signed-off-by: Kunihiko Hayashi Acked-by: Masahiro Yamada Signed-off-by: Linus Walleij --- drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c index 77a0236ee781..83f8864fa76a 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-ld11.c @@ -390,22 +390,22 @@ static const struct pinctrl_pin_desc uniphier_ld11_pins[] = { UNIPHIER_PINCTRL_PIN(140, "AO1D0", 140, 140, UNIPHIER_PIN_DRV_1BIT, 140, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(141, "TCON0", 141, + UNIPHIER_PINCTRL_PIN(141, "AO1D1", 141, 141, UNIPHIER_PIN_DRV_1BIT, 141, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(142, "TCON1", 142, + UNIPHIER_PINCTRL_PIN(142, "AO1D2", 142, 142, UNIPHIER_PIN_DRV_1BIT, 142, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(143, "TCON2", 143, + UNIPHIER_PINCTRL_PIN(143, "XIRQ9", 143, 143, UNIPHIER_PIN_DRV_1BIT, 143, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(144, "TCON3", 144, + UNIPHIER_PINCTRL_PIN(144, "XIRQ10", 144, 144, UNIPHIER_PIN_DRV_1BIT, 144, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(145, "TCON4", 145, + UNIPHIER_PINCTRL_PIN(145, "XIRQ11", 145, 145, UNIPHIER_PIN_DRV_1BIT, 145, UNIPHIER_PIN_PULL_DOWN), - UNIPHIER_PINCTRL_PIN(146, "TCON5", 146, + UNIPHIER_PINCTRL_PIN(146, "XIRQ13", 146, 146, UNIPHIER_PIN_DRV_1BIT, 146, UNIPHIER_PIN_PULL_DOWN), UNIPHIER_PINCTRL_PIN(147, "PWMA", 147, From b3bd0f2849f4480fc4f40bb954d27e58f4f0786b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 1 Mar 2017 23:50:19 +0100 Subject: [PATCH 0595/1299] staging/vc04_services: add CONFIG_OF dependency After several hours of debugging this obviously bogus but elaborate gcc-7.0.1 warning, drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c: In function 'vchiq_complete_bulk': drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:603:4: error: argument 2 null where non-null expected [-Werror=nonnull] memcpy((char *)page_address(pages[0]) + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pagelist->offset, ~~~~~~~~~~~~~~~~~ fragments, ~~~~~~~~~~ head_bytes); ~~~~~~~~~~~ In file included from include/linux/string.h:18:0, from include/linux/bitmap.h:8, from include/linux/cpumask.h:11, from include/linux/interrupt.h:9, from drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c:37: arch/arm/include/asm/string.h:16:15: note: in a call to function 'memcpy' declared here extern void * memcpy(void *, const void *, __kernel_size_t) __nocapture(2); ^~~~~~ I have concluded that gcc was technically right in the first place: vchiq_complete_bulk is an externally visible function that calls free_pagelist(), which in turn derives a pointer from the global g_fragments_base variable. g_fragments_base is initialized in vchiq_platform_init(), but we only get there if of_property_read_u32() successfully reads the cache line size. When CONFIG_OF is disabled, this always fails, and g_fragments_base is guaranteed to be NULL when vchiq_complete_bulk() gets called. This adds a CONFIG_OF Kconfig dependency, which is also technically correct but nonobvious, and thus seems like a good fit for the warning. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/vc04_services/Kconfig b/drivers/staging/vc04_services/Kconfig index e61e4ca064a8..74094fff4367 100644 --- a/drivers/staging/vc04_services/Kconfig +++ b/drivers/staging/vc04_services/Kconfig @@ -1,6 +1,7 @@ config BCM2835_VCHIQ tristate "Videocore VCHIQ" depends on HAS_DMA + depends on OF depends on RASPBERRYPI_FIRMWARE || (COMPILE_TEST && !RASPBERRYPI_FIRMWARE) default y help From e1c0c91bdaec0702ebbfdc546926989dd63d19ef Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Mar 2017 09:29:15 +0000 Subject: [PATCH 0596/1299] drm/i915: Wake up all waiters before idling When we idle, we wakeup the first waiter (checking to see if it missed an earlier wakeup) and disarm the breadcrumbs. However, we now assert that there are no waiter when the interrupt is disabled, triggering an assert if there were multiple waiters when we idled. [ 420.842275] invalid opcode: 0000 [#1] PREEMPT SMP [ 420.842285] Modules linked in: vgem snd_hda_codec_realtek x86_pkg_temp_thermal snd_hda_codec_generic intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep mei_me snd_hda_core mei snd_pcm lpc_ich i915 r8169 mii prime_numbers [ 420.842357] CPU: 4 PID: 8714 Comm: kms_pipe_crc_ba Tainted: G U W 4.10.0-CI-CI_DRM_2280+ #1 [ 420.842377] Hardware name: Hewlett-Packard HP Pro 3500 Series/2ABF, BIOS 8.11 10/24/2012 [ 420.842395] task: ffff880117ddce40 task.stack: ffffc90001114000 [ 420.842439] RIP: 0010:__intel_engine_remove_wait+0x1f4/0x200 [i915] [ 420.842454] RSP: 0018:ffffc90001117b18 EFLAGS: 00010046 [ 420.842467] RAX: 0000000000000000 RBX: ffff88010c25c2a8 RCX: 0000000000000001 [ 420.842481] RDX: 0000000000000001 RSI: 00000000ffffffff RDI: ffffc90001117c50 [ 420.842495] RBP: ffffc90001117b58 R08: 0000000011e52352 R09: c4d16acc00000000 [ 420.842511] R10: ffffffff82789eb0 R11: ffff880117ddce40 R12: ffffc90001117c50 [ 420.842525] R13: ffffc90001117c50 R14: 0000000000000078 R15: 0000000000000000 [ 420.842540] FS: 00007fe47dda0a40(0000) GS:ffff88011fb00000(0000) knlGS:0000000000000000 [ 420.842559] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 420.842571] CR2: 00007fd6c0a2cec4 CR3: 000000010a5e5000 CR4: 00000000001406e0 [ 420.842586] Call Trace: [ 420.842595] ? do_raw_spin_lock+0xad/0xb0 [ 420.842635] intel_engine_remove_wait.part.3+0x26/0x40 [i915] [ 420.842678] intel_engine_remove_wait+0xe/0x20 [i915] [ 420.842721] i915_wait_request+0x4f0/0x8c0 [i915] [ 420.842736] ? wake_up_q+0x70/0x70 [ 420.842747] ? wake_up_q+0x70/0x70 [ 420.842787] i915_gem_object_wait_fence+0x7d/0x1a0 [i915] [ 420.842829] i915_gem_object_wait+0x30d/0x520 [i915] [ 420.842842] ? __this_cpu_preempt_check+0x13/0x20 [ 420.842884] i915_gem_wait_ioctl+0x12e/0x2e0 [i915] [ 420.842924] ? i915_gem_wait_ioctl+0x22/0x2e0 [i915] [ 420.842939] drm_ioctl+0x200/0x450 [ 420.842976] ? i915_gem_set_wedged+0x90/0x90 [i915] [ 420.842993] do_vfs_ioctl+0x90/0x6e0 [ 420.843003] ? entry_SYSCALL_64_fastpath+0x5/0xb1 [ 420.843017] ? __this_cpu_preempt_check+0x13/0x20 [ 420.843030] ? trace_hardirqs_on_caller+0xe7/0x200 [ 420.843042] SyS_ioctl+0x3c/0x70 [ 420.843054] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 420.843065] RIP: 0033:0x7fe47c4b9357 [ 420.843075] RSP: 002b:00007ffc3c0633c8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 420.843094] RAX: ffffffffffffffda RBX: ffffffff81482393 RCX: 00007fe47c4b9357 [ 420.843109] RDX: 00007ffc3c063400 RSI: 00000000c010646c RDI: 0000000000000004 [ 420.843123] RBP: ffffc90001117f88 R08: 0000000000000008 R09: 0000000000000000 [ 420.843137] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 420.843151] R13: 0000000000000004 R14: 00000000c010646c R15: 0000000000000000 [ 420.843168] ? __this_cpu_preempt_check+0x13/0x20 [ 420.843180] Code: 81 48 c7 c1 40 6a 16 a0 48 c7 c2 47 29 15 a0 be 17 01 00 00 48 c7 c7 10 6a 16 a0 e8 c7 ea fe e0 e9 5d ff ff ff 0f 0b 0f 0b 0f 0b <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 55 48 89 e5 e8 67 41 7e e1 [ 420.843325] RIP: __intel_engine_remove_wait+0x1f4/0x200 [i915] RSP: ffffc90001117b18 Fixes: b66255f0f779 ("drm/i915: Refactor wakeup of the next breadcrumb waiter") Fixes: 67b807a89230 ("drm/i915: Delay disabling the user interrupt for breadcrumbs") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170306092916.11623-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 6032d2a937d5..ba73dc5b0328 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -167,6 +167,7 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; lockdep_assert_held(&b->irq_lock); + GEM_BUG_ON(b->irq_wait); if (b->irq_enabled) { irq_disable(engine); @@ -179,23 +180,30 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; + struct intel_wait *wait, *n; if (!b->irq_armed) return; - spin_lock_irqsave(&b->irq_lock, flags); - /* We only disarm the irq when we are idle (all requests completed), - * so if there remains a sleeping waiter, it missed the request + * so if the bottom-half remains asleep, it missed the request * completion. */ - if (__intel_breadcrumbs_wakeup(b) & ENGINE_WAKEUP_ASLEEP) - missed_breadcrumb(engine); + spin_lock_irq(&b->rb_lock); + rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) { + RB_CLEAR_NODE(&wait->node); + if (wake_up_process(wait->tsk) && wait == b->irq_wait) + missed_breadcrumb(engine); + } + b->waiters = RB_ROOT; + + spin_lock(&b->irq_lock); + b->irq_wait = NULL; __intel_engine_disarm_breadcrumbs(engine); + spin_unlock(&b->irq_lock); - spin_unlock_irqrestore(&b->irq_lock, flags); + spin_unlock_irq(&b->rb_lock); } static bool use_fake_irq(const struct intel_breadcrumbs *b) From eb38d913c27f32f4df173791051fecf6aca34173 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 2 Mar 2017 10:44:58 +0200 Subject: [PATCH 0597/1299] Revert "usb: gadget: uvc: Add missing call for additional setup data" This reverts commit 4fbac5206afd01b717d4bdc58793d471f3391b4b. This commit breaks g_webcam when used with uvc-gadget [1]. The user space application (e.g. uvc-gadget) is responsible for sending response to UVC class specific requests on control endpoint in uvc_send_response() in uvc_v4l2.c. The bad commit was causing a duplicate response to be sent with incorrect response data thus causing UVC probe to fail at the host and broken control transfer endpoint at the gadget. [1] - git://git.ideasonboard.org/uvc-gadget.git Cc: # v4.9+ Acked-by: Laurent Pinchart Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_uvc.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index 27ed51b5082f..29b41b5dee04 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -258,13 +258,6 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req)); v4l2_event_queue(&uvc->vdev, &v4l2_event); - /* Pass additional setup data to userspace */ - if (uvc->event_setup_out && uvc->event_length) { - uvc->control_req->length = uvc->event_length; - return usb_ep_queue(uvc->func.config->cdev->gadget->ep0, - uvc->control_req, GFP_ATOMIC); - } - return 0; } From 5bbc852676ae08e818241cf66a3ffe4be44225c4 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 28 Feb 2017 14:25:45 +0800 Subject: [PATCH 0598/1299] usb: gadget: dummy_hcd: clear usb_gadget region before registration When the user does device unbind and rebind test, the kernel will show below dump due to usb_gadget memory region is dirty after unbind. Clear usb_gadget region for every new probe. root@imx6qdlsolo:/sys/bus/platform/drivers/dummy_udc# echo dummy_udc.0 > bind [ 102.523312] kobject (eddd78b0): tried to init an initialized object, something is seriously wrong. [ 102.532447] CPU: 0 PID: 734 Comm: sh Not tainted 4.10.0-rc7-00872-g1b2b8e9 #1298 [ 102.539866] Hardware name: Freescale i.MX6 SoloX (Device Tree) [ 102.545717] Backtrace: [ 102.548225] [] (dump_backtrace) from [] (show_stack+0x18/0x1c) [ 102.555822] r7:ede34000 r6:60010013 r5:00000000 r4:c0f29418 [ 102.561512] [] (show_stack) from [] (dump_stack+0xb4/0xe8) [ 102.568764] [] (dump_stack) from [] (kobject_init+0x80/0x9c) [ 102.576187] r10:0000001f r9:eddd7000 r8:eeaf8c10 r7:eddd78a8 r6:c177891c r5:c0f3b060 [ 102.584036] r4:eddd78b0 r3:00000000 [ 102.587641] [] (kobject_init) from [] (device_initialize+0x28/0xf8) [ 102.595665] r5:eebc4800 r4:eddd78a8 [ 102.599268] [] (device_initialize) from [] (device_register+0x14/0x20) [ 102.607556] r7:eddd78a8 r6:00000000 r5:eebc4800 r4:eddd78a8 [ 102.613256] [] (device_register) from [] (usb_add_gadget_udc_release+0x8c/0x1ec) [ 102.622410] r5:eebc4800 r4:eddd7860 [ 102.626015] [] (usb_add_gadget_udc_release) from [] (usb_add_gadget_udc+0x14/0x18) [ 102.635351] r10:0000001f r9:eddd7000 r8:eddd788c r7:bf003770 r6:eddd77f8 r5:eddd7818 [ 102.643198] r4:eddd785c r3:eddd7b24 [ 102.646834] [] (usb_add_gadget_udc) from [] (dummy_udc_probe+0x170/0x1c4 [dummy_hcd]) [ 102.656458] [] (dummy_udc_probe [dummy_hcd]) from [] (platform_drv_probe+0x54/0xb8) [ 102.665881] r10:00000008 r9:c1778960 r8:bf004128 r7:fffffdfb r6:bf004128 r5:eeaf8c10 [ 102.673727] r4:eeaf8c10 [ 102.676293] [] (platform_drv_probe) from [] (driver_probe_device+0x264/0x474) [ 102.685186] r7:00000000 r6:00000000 r5:c1778960 r4:eeaf8c10 [ 102.690876] [] (driver_probe_device) from [] (bind_store+0xb8/0x14c) [ 102.698994] r10:eeb3bb4c r9:ede34000 r8:0000000c r7:eeaf8c44 r6:bf004128 r5:c0f3b668 [ 102.706840] r4:eeaf8c10 [ 102.709402] [] (bind_store) from [] (drv_attr_store+0x28/0x34) [ 102.716998] r9:ede34000 r8:00000000 r7:ee3863c0 r6:ee3863c0 r5:c0538c80 r4:c053970c [ 102.724776] [] (drv_attr_store) from [] (sysfs_kf_write+0x50/0x54) [ 102.732711] r5:c0538c80 r4:0000000c [ 102.736313] [] (sysfs_kf_write) from [] (kernfs_fop_write+0x100/0x214) [ 102.744599] r7:ee3863c0 r6:eeb3bb40 r5:00000000 r4:00000000 [ 102.750287] [] (kernfs_fop_write) from [] (__vfs_write+0x34/0x120) [ 102.758231] r10:00000000 r9:ede34000 r8:c0108bc4 r7:0000000c r6:ede35f80 r5:c029bd84 [ 102.766077] r4:ee223780 [ 102.768638] [] (__vfs_write) from [] (vfs_write+0xa8/0x170) [ 102.775974] r9:ede34000 r8:c0108bc4 r7:ede35f80 r6:01861cb0 r5:ee223780 r4:0000000c [ 102.783743] [] (vfs_write) from [] (SyS_write+0x4c/0xa8) [ 102.790818] r9:ede34000 r8:c0108bc4 r7:0000000c r6:01861cb0 r5:ee223780 r4:ee223780 [ 102.798595] [] (SyS_write) from [] (ret_fast_syscall+0x0/0x1c) [ 102.806188] r7:00000004 r6:b6e83d58 r5:01861cb0 r4:0000000c Fixes: 90fccb529d24 ("usb: gadget: Gadget directory cleanup - group UDC drivers") Cc: stable Acked-by: Alan Stern Signed-off-by: Peter Chen Tested-by: Xiaolong Ye Reported-by: Fengguang Wu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/dummy_hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index c60abe3a68f9..8cabc5944d5f 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -1031,6 +1031,8 @@ static int dummy_udc_probe(struct platform_device *pdev) int rc; dum = *((void **)dev_get_platdata(&pdev->dev)); + /* Clear usb_gadget region for new registration to udc-core */ + memzero_explicit(&dum->gadget, sizeof(struct usb_gadget)); dum->gadget.name = gadget_name; dum->gadget.ops = &dummy_ops; dum->gadget.max_speed = USB_SPEED_SUPER; From 181df2d458f3e84eb4809083655fd2fd4ef33642 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Mar 2017 09:29:16 +0000 Subject: [PATCH 0599/1299] drm/i915: Take rpm wakelock for releasing the fence on unbind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unbind the vma may happen at any time, outside of the normal GT wakeref. As such it relies on having a wakeref of its own. However, we can forgo clearing the register whilst the device is asleep and just mark it as unused - so that when we do wake up the device, we will clear the unused fence register (see i915_gem_restore_fences). [22423.944631] WARNING: CPU: 3 PID: 26178 at drivers/gpu/drm/i915/intel_drv.h:1739 i915_vma_put_fence+0xf3/0x100 [i915] [22423.946053] RPM wakelock ref not held during HW access [22423.946056] Modules linked in: vgem(E) i915(E) nls_ascii(E) nls_cp437(E) vfat(E) fat(E) x86_pkg_temp_thermal(E) crct10dif_pclmul(E) crc32_pclmul(E) crc32c_intel(E) ghash_clmulni_intel(E) intel_gtt(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) evdev(E) aesni_intel(E) aes_x86_64(E) crypto_simd(E) cryptd(E) glue_helper(E) sysimgblt(E) fb_sys_fops(E) prime_numbers(E) drm(E) efivars(E) mei_me(E) lpc_ich(E) mei(E) mfd_core(E) battery(E) video(E) acpi_pad(E) button(E) tpm_tis(E) tpm_tis_core(E) tpm(E) autofs4(E) i2c_i801(E) thermal(E) fan(E) i2c_designware_platform(E) i2c_designware_core(E) [22423.946438] CPU: 2 PID: 26178 Comm: gem_concurrent_ Tainted: G E 4.10.0+ #101 [22423.946513] Hardware name: ��������������������������������� ���������������������������������/���������������������������������, BIOS RYBDWi35.86A.0246.2 [22423.946600] Call Trace: [22423.946641] dump_stack+0x68/0x9f [22423.946703] __warn+0x107/0x130 [22423.946763] warn_slowpath_fmt+0xa8/0xe0 [22423.946825] ? __warn+0x130/0x130 [22423.946868] ? free_hot_cold_page_list+0x53/0x70 [22423.946942] ? mark_lock+0xcc/0x7f0 [22423.946997] ? __lock_is_held+0x84/0x100 [22423.947115] ? i915_vma_put_fence+0x64/0x100 [i915] [22423.947224] i915_vma_put_fence+0xf3/0x100 [i915] [22423.947335] i915_vma_unbind+0x4da/0x560 [i915] [22423.947387] ? rb_erase+0x812/0x8a0 [22423.947439] ? kfree+0xa2/0xd0 [22423.947562] i915_vma_close+0x159/0x180 [i915] [22423.947674] intel_ring_free+0x31/0x50 [i915] [22423.947776] i915_gem_context_free+0x1ff/0x3d0 [i915] [22423.947887] context_close+0x106/0x110 [i915] [22423.947989] context_idr_cleanup+0xc/0x10 [i915] [22423.948041] idr_for_each+0x14d/0x1d0 [22423.948158] ? context_close+0x110/0x110 [i915] [22423.948206] ? get_from_free_list+0x70/0x70 [22423.948261] ? __lock_is_held+0x84/0x100 [22423.948325] ? __mutex_unlock_slowpath+0xd4/0x400 [22423.948448] i915_gem_context_close+0x4b/0x90 [i915] [22423.948544] i915_driver_preclose+0x28/0x50 [i915] [22423.948620] drm_release+0x175/0x690 [drm] [22423.948681] ? fcntl_setlk+0x5e0/0x5e0 [22423.948746] __fput+0x17d/0x300 [22423.948807] ____fput+0x9/0x10 [22423.948859] task_work_run+0xa7/0xe0 [22423.948924] do_exit+0x4d2/0x13e0 [22423.948986] ? mm_update_next_owner+0x320/0x320 [22423.949051] ? __do_page_fault+0x209/0x5c0 [22423.949110] ? mark_held_locks+0x23/0xc0 [22423.949166] ? entry_SYSCALL_64_fastpath+0x5/0xb1 [22423.949232] do_group_exit+0x93/0x160 [22423.949289] SyS_exit_group+0x18/0x20 [22423.949350] entry_SYSCALL_64_fastpath+0x1c/0xb1 [22423.949403] RIP: 0033:0x7f9cc2e154c8 [22423.949484] RSP: 002b:00007ffd7e81b448 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 [22423.949557] RAX: ffffffffffffffda RBX: ffffffff810ef1f0 RCX: 00007f9cc2e154c8 [22423.949617] RDX: 0000000000000000 RSI: 000000000000003c RDI: 0000000000000000 [22423.949677] RBP: ffff880367e9ff98 R08: 00000000000000e7 R09: ffffffffffffff88 [22423.949741] R10: 00007f9cc1d5c000 R11: 0000000000000246 R12: 00007f9cc30f6c30 [22423.949798] R13: 0000000000000000 R14: 00007f9cc30f6c20 R15: 0000000000000003 [22423.949868] ? trace_hardirqs_off_caller+0xc0/0x110 v2: Move the rpm check down a layer so that we still perform the vma/fence update required for the deferred mmio write on resume. v3: Don't touch i915_gem_object_set_cache_level() and leave the rpm to the low level routines (such as i915_vma_put_fence). v4: vma may be null in fence_write, so extract drm_i915_private from fence->i915 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170306092916.11623-3-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_fence_reg.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence_reg.c b/drivers/gpu/drm/i915/i915_gem_fence_reg.c index fadbe8f4c745..5fe2cd8c8f28 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence_reg.c +++ b/drivers/gpu/drm/i915/i915_gem_fence_reg.c @@ -248,7 +248,14 @@ static int fence_update(struct drm_i915_fence_reg *fence, list_move(&fence->link, &fence->i915->mm.fence_list); } - fence_write(fence, vma); + /* We only need to update the register itself if the device is awake. + * If the device is currently powered down, we will defer the write + * to the runtime resume, see i915_gem_restore_fences(). + */ + if (intel_runtime_pm_get_if_in_use(fence->i915)) { + fence_write(fence, vma); + intel_runtime_pm_put(fence->i915); + } if (vma) { if (fence->vma != vma) { @@ -278,8 +285,6 @@ i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; - assert_rpm_wakelock_held(vma->vm->i915); - if (!fence) return 0; From 077dbaee9df53c597df532a08d721d03f4570f3d Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Thu, 23 Feb 2017 10:48:55 +0100 Subject: [PATCH 0600/1299] irqchip/crossbar: Fix incorrect type of local variables The max and entry variables are unsigned according to the dt-bindings. Fix following 3 sparse issues (-Wtypesign): drivers/irqchip/irq-crossbar.c:222:52: warning: incorrect type in argument 3 (different signedness) drivers/irqchip/irq-crossbar.c:222:52: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:222:52: got int * drivers/irqchip/irq-crossbar.c:245:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:245:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:245:56: got int * drivers/irqchip/irq-crossbar.c:263:56: warning: incorrect type in argument 4 (different signedness) drivers/irqchip/irq-crossbar.c:263:56: expected unsigned int [usertype] *out_value drivers/irqchip/irq-crossbar.c:263:56: got int * Signed-off-by: Franck Demathieu Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-crossbar.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-crossbar.c b/drivers/irqchip/irq-crossbar.c index 1eef56a89b1f..05bbf171df37 100644 --- a/drivers/irqchip/irq-crossbar.c +++ b/drivers/irqchip/irq-crossbar.c @@ -198,7 +198,8 @@ static const struct irq_domain_ops crossbar_domain_ops = { static int __init crossbar_of_init(struct device_node *node) { - int i, size, max = 0, reserved = 0, entry; + int i, size, reserved = 0; + u32 max = 0, entry; const __be32 *irqsr; int ret = -ENOMEM; From b3e228473e6cec7cf83b4025b4570c8066ab2dd8 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Thu, 2 Mar 2017 16:11:47 +0100 Subject: [PATCH 0601/1299] irqdomain: Add empty irq_domain_check_msi_remap Fix following build error for s390: drivers/vfio/vfio_iommu_type1.c: In function 'vfio_iommu_type1_attach_group': drivers/vfio/vfio_iommu_type1.c:1290:25: error: implicit declaration of function 'irq_domain_check_msi_remap' Acked-by: Marc Zyngier Reviewed-by: Eric Auger Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 188eced6813e..9f3616085423 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -524,6 +524,10 @@ static inline struct irq_domain *irq_find_matching_fwnode( { return NULL; } +static inline bool irq_domain_check_msi_remap(void) +{ + return false; +} #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ From 38355b2a44776c25b0f2ad466e8c51bb805b3032 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Tue, 28 Feb 2017 10:55:30 +0000 Subject: [PATCH 0602/1299] usb: gadget: configs: plug memory leak When binding a gadget to a device, "name" is stored in gi->udc_name, but this does not happen when unregistering and the string is leaked. Signed-off-by: John Keeping Signed-off-by: Felipe Balbi --- drivers/usb/gadget/configfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 78c44979dde3..cbff3b02840d 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -269,6 +269,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct config_item *item, ret = unregister_gadget(gi); if (ret) goto err; + kfree(name); } else { if (gi->composite.gadget_driver.udc_name) { ret = -EBUSY; From 73561128eb72ca04c3be6e240f162a861bf68a86 Mon Sep 17 00:00:00 2001 From: Franck Demathieu Date: Mon, 27 Feb 2017 11:52:46 +0100 Subject: [PATCH 0603/1299] usb: dwc3: Fix incorrect type for utmi mode The utmi mode is unsigned according the dt-bindings. Fix sparse issue (-Wtypesign): drivers/usb/dwc3/dwc3-omap.c:391:50: warning: incorrect type in argument 3 (different signedness) drivers/usb/dwc3/dwc3-omap.c:391:50: expected unsigned int [usertype] *out_value drivers/usb/dwc3/dwc3-omap.c:391:50: got int * Signed-off-by: Franck Demathieu Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 2092e46b1380..4a595777969e 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -392,7 +392,7 @@ static void dwc3_omap_set_utmi_mode(struct dwc3_omap *omap) { u32 reg; struct device_node *node = omap->dev->of_node; - int utmi_mode = 0; + u32 utmi_mode = 0; reg = dwc3_omap_read_utmi_ctrl(omap); From 4242820277b5378c9e4064e79306f326d731472f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 22 Feb 2017 11:33:27 +0100 Subject: [PATCH 0604/1299] usb: gadget: udc: atmel: fix debug output The debug output now contains the wrong variable, as seen from the compiler warning: drivers/usb/gadget/udc/atmel_usba_udc.c: In function 'usba_ep_enable': drivers/usb/gadget/udc/atmel_usba_udc.c:632:550: error: 'ept_cfg' may be used uninitialized in this function [-Werror=maybe-uninitialized] DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n", This changes the debug output the same way as the other code. Fixes: 741d2558bf0a ("usb: gadget: udc: atmel: Update endpoint allocation scheme") Signed-off-by: Arnd Bergmann Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/atmel_usba_udc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index 11bbce28bc23..2035906b8ced 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -610,7 +610,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) { struct usba_ep *ep = to_usba_ep(_ep); struct usba_udc *udc = ep->udc; - unsigned long flags, ept_cfg, maxpacket; + unsigned long flags, maxpacket; unsigned int nr_trans; DBG(DBG_GADGET, "%s: ep_enable: desc=%p\n", ep->ep.name, desc); @@ -630,7 +630,7 @@ usba_ep_enable(struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc) ep->is_in = 0; DBG(DBG_ERR, "%s: EPT_CFG = 0x%lx (maxpacket = %lu)\n", - ep->ep.name, ept_cfg, maxpacket); + ep->ep.name, ep->ept_cfg, maxpacket); if (usb_endpoint_dir_in(desc)) { ep->is_in = 1; From b6e7aeeaf235901c42ec35de4633c7c69501d303 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 21 Feb 2017 22:33:11 +0100 Subject: [PATCH 0605/1299] USB: gadgetfs: Fix a potential memory leak in 'dev_config()' 'kbuf' is allocated just a few lines above using 'memdup_user()'. If the 'if (dev->buf)' test fails, this memory is never released. Signed-off-by: Christophe JAILLET Signed-off-by: Felipe Balbi --- drivers/usb/gadget/legacy/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index a2615d64d07c..0513dfa008e6 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1782,8 +1782,10 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) spin_lock_irq (&dev->lock); value = -EINVAL; - if (dev->buf) + if (dev->buf) { + kfree(kbuf); goto fail; + } dev->buf = kbuf; /* full or low speed config */ From 3ba534df815f233535b5a3dd3de41055666bbd21 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Thu, 16 Feb 2017 14:54:12 +0100 Subject: [PATCH 0606/1299] Revert "usb: gadget: f_fs: Fix ExtCompat descriptor validation" This reverts commit ac670a3a650b899fc020b81f63e810d06015b865. This introduce bug we already fixed in commit 53642399aa71 ("usb: gadget: f_fs: Fix wrong check on reserved1 wof OS_DESC_EXT_COMPAT") Next FFS (adb) SS enumeration fail with Windows OS. Signed-off-by: Janusz Dziedzic Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 7e976f00cb02..a0085571824d 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2264,7 +2264,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type, if (len < sizeof(*d) || d->bFirstInterfaceNumber >= ffs->interfaces_count || - d->Reserved1) + !d->Reserved1) return -EINVAL; for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i) if (d->Reserved2[i]) From 1551e35ea4189c1f7199fe278395fc94196715f2 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Feb 2017 14:16:26 +0200 Subject: [PATCH 0607/1299] usb: dwc3: gadget: Fix system suspend/resume on TI platforms On TI platforms (dra7, am437x), the DWC3_DSTS_DEVCTRLHLT bit is not set after the device controller is stopped via DWC3_DCTL_RUN_STOP. If we don't disconnect and stop the gadget, it stops working after a system resume with the trace below. There is no point in preventing gadget disconnect and gadget stop during system suspend/resume as we're going to suspend in any case, whether DEVCTRLHLT timed out or not. [ 141.727480] ------------[ cut here ]------------ [ 141.732349] WARNING: CPU: 1 PID: 2135 at drivers/usb/dwc3/gadget.c:2384 dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3] [ 141.744299] Modules linked in: usb_f_ss_lb g_zero libcomposite xhci_plat_hcd xhci_hcd usbcore dwc3 evdev udc_core m25p80 usb_common spi_nor snd_soc_davinci_mcasp snd_soc_simple_card snd_soc_edma snd_soc_tlv3e [ 141.792163] CPU: 1 PID: 2135 Comm: irq/456-dwc3 Not tainted 4.10.0-rc8 #1138 [ 141.799547] Hardware name: Generic DRA74X (Flattened Device Tree) [ 141.805940] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 141.814066] [] (show_stack) from [] (dump_stack+0xac/0xe0) [ 141.821648] [] (dump_stack) from [] (__warn+0xd8/0x104) [ 141.828955] [] (__warn) from [] (warn_slowpath_null+0x20/0x28) [ 141.836902] [] (warn_slowpath_null) from [] (dwc3_stop_active_transfer.constprop.4+0xc4/0xe4 [dwc3]) [ 141.848329] [] (dwc3_stop_active_transfer.constprop.4 [dwc3]) from [] (__dwc3_gadget_ep_disable+0x64/0x528 [dwc3]) [ 141.861034] [] (__dwc3_gadget_ep_disable [dwc3]) from [] (dwc3_gadget_ep_disable+0x3c/0xc8 [dwc3]) [ 141.872280] [] (dwc3_gadget_ep_disable [dwc3]) from [] (usb_ep_disable+0x11c/0x18c [udc_core]) [ 141.883160] [] (usb_ep_disable [udc_core]) from [] (disable_ep+0x18/0x54 [usb_f_ss_lb]) [ 141.893408] [] (disable_ep [usb_f_ss_lb]) from [] (disable_endpoints+0x18/0x50 [usb_f_ss_lb]) [ 141.904168] [] (disable_endpoints [usb_f_ss_lb]) from [] (disable_source_sink+0x2c/0x34 [usb_f_ss_lb]) [ 141.915771] [] (disable_source_sink [usb_f_ss_lb]) from [] (reset_config+0x48/0x7c [libcomposite]) [ 141.927012] [] (reset_config [libcomposite]) from [] (composite_disconnect+0x2c/0x54 [libcomposite]) [ 141.938444] [] (composite_disconnect [libcomposite]) from [] (usb_gadget_udc_reset+0x10/0x34 [udc_core]) [ 141.950237] [] (usb_gadget_udc_reset [udc_core]) from [] (dwc3_gadget_reset_interrupt+0x64/0x698 [dwc3]) [ 141.962022] [] (dwc3_gadget_reset_interrupt [dwc3]) from [] (dwc3_thread_interrupt+0x618/0x1a3c [dwc3]) [ 141.973723] [] (dwc3_thread_interrupt [dwc3]) from [] (irq_thread_fn+0x1c/0x54) [ 141.983215] [] (irq_thread_fn) from [] (irq_thread+0x120/0x1f0) [ 141.991247] [] (irq_thread) from [] (kthread+0xf8/0x138) [ 141.998641] [] (kthread) from [] (ret_from_fork+0x14/0x24) [ 142.006213] ---[ end trace b4ecfe9f175b9a9c ]--- Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f875b3f4b0ec..3db5eeae2bfb 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -3291,15 +3291,10 @@ void dwc3_gadget_exit(struct dwc3 *dwc) int dwc3_gadget_suspend(struct dwc3 *dwc) { - int ret; - if (!dwc->gadget_driver) return 0; - ret = dwc3_gadget_run_stop(dwc, false, false); - if (ret < 0) - return ret; - + dwc3_gadget_run_stop(dwc, false, false); dwc3_disconnect_gadget(dwc); __dwc3_gadget_stop(dwc); From 0913750f9fb6f26bcd00c8f9dd9a8d1b8d031246 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 15 Feb 2017 13:38:22 +0200 Subject: [PATCH 0608/1299] usb: dwc3-omap: Fix missing break in dwc3_omap_set_mailbox() We need to break from all cases if we want to treat each one of them separately. Reported-by: Gustavo A. R. Silva Fixes: d2728fb3e01f ("usb: dwc3: omap: Pass VBUS and ID events transparently") Cc: #v4.8+ Signed-off-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-omap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 4a595777969e..f8d0747810e7 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -250,6 +250,7 @@ static void dwc3_omap_set_mailbox(struct dwc3_omap *omap, val = dwc3_omap_read_utmi_ctrl(omap); val |= USBOTGSS_UTMI_OTG_CTRL_IDDIG; dwc3_omap_write_utmi_ctrl(omap, val); + break; case OMAP_DWC3_VBUS_OFF: val = dwc3_omap_read_utmi_ctrl(omap); From df7545719a14fa7b481896fb8689e23d0a00f682 Mon Sep 17 00:00:00 2001 From: Petr Cvek Date: Fri, 24 Feb 2017 02:54:56 +0100 Subject: [PATCH 0609/1299] usb: gadget: pxa27x: Test for a valid argument pointer A call usb_put_phy(udc->transceiver) must be tested for a valid pointer. Use an already existing test for usb_unregister_notifier call. Acked-by: Robert Jarzmik Reported-by: Robert Jarzmik Signed-off-by: Petr Cvek Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/pxa27x_udc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index e1335ad5bce9..832c4fdbe985 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -2534,9 +2534,10 @@ static int pxa_udc_remove(struct platform_device *_dev) usb_del_gadget_udc(&udc->gadget); pxa_cleanup_debugfs(udc); - if (!IS_ERR_OR_NULL(udc->transceiver)) + if (!IS_ERR_OR_NULL(udc->transceiver)) { usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy); - usb_put_phy(udc->transceiver); + usb_put_phy(udc->transceiver); + } udc->transceiver = NULL; the_controller = NULL; From ef5e2fa9f65befa12f1113c734602d2c1964d2a5 Mon Sep 17 00:00:00 2001 From: Raz Manor Date: Thu, 9 Feb 2017 09:41:08 +0200 Subject: [PATCH 0610/1299] usb: gadget: udc: net2280: Fix tmp reusage in net2280 driver In the function scan_dma_completions() there is a reusage of tmp variable. That coused a wrong value being used in some case when reading a short packet terminated transaction from an endpoint, in 2 concecutive reads. This was my logic for the patch: The req->td->dmadesc equals to 0 iff: -- There was a transaction ending with a short packet, and -- The read() to read it was shorter than the transaction length, and -- The read() to complete it is longer than the residue. I believe this is true from the printouts of various cases, but I can't be positive it is correct. Entering this if, there should be no more data in the endpoint (a short packet terminated the transaction). If there is, the transaction wasn't really done and we should exit and wait for it to finish entirely. That is the inner if. That inner if should never happen, but it is there to be on the safe side. That is why it is marked with the comment /* paranoia */. The size of the data available in the endpoint is ep->dma->dmacount and it is read to tmp. This entire clause is based on my own educated guesses. If we passed that inner if without breaking in the original code, than tmp & DMA_BYTE_MASK_COUNT== 0. That means we will always pass dma bytes count of 0 to dma_done(), meaning all the requested bytes were read. dma_done() reports back to the upper layer that the request (read()) was done and how many bytes were read. In the original code that would always be the request size, regardless of the actual size of the data. That did not make sense to me at all. However, the original value of tmp is req->td->dmacount, which is the dmacount value when the request's dma transaction was finished. And that is a much more reasonable value to report back to the caller. To recreate the problem: Read from a bulk out endpoint in a loop, 1024 * n bytes in each iteration. Connect the PLX to a host you can control. Send to that endpoint 1024 * n + x bytes, such that 0 < x < 1024 * n and (x % 1024) != 0 You would expect the first read() to return 1024 * n and the second read() to return x. But you will get the first read to return 1024 * n and the second one to return 1024 * n. That is true for every positive integer n. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Raz Manor Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/net2280.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index 85504419ab31..3828c2ec8623 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1146,15 +1146,15 @@ static int scan_dma_completions(struct net2280_ep *ep) */ while (!list_empty(&ep->queue)) { struct net2280_request *req; - u32 tmp; + u32 req_dma_count; req = list_entry(ep->queue.next, struct net2280_request, queue); if (!req->valid) break; rmb(); - tmp = le32_to_cpup(&req->td->dmacount); - if ((tmp & BIT(VALID_BIT)) != 0) + req_dma_count = le32_to_cpup(&req->td->dmacount); + if ((req_dma_count & BIT(VALID_BIT)) != 0) break; /* SHORT_PACKET_TRANSFERRED_INTERRUPT handles "usb-short" @@ -1163,40 +1163,41 @@ static int scan_dma_completions(struct net2280_ep *ep) */ if (unlikely(req->td->dmadesc == 0)) { /* paranoia */ - tmp = readl(&ep->dma->dmacount); - if (tmp & DMA_BYTE_COUNT_MASK) + u32 const ep_dmacount = readl(&ep->dma->dmacount); + + if (ep_dmacount & DMA_BYTE_COUNT_MASK) break; /* single transfer mode */ - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; break; } else if (!ep->is_in && (req->req.length % ep->ep.maxpacket) && !(ep->dev->quirks & PLX_PCIE)) { - tmp = readl(&ep->regs->ep_stat); + u32 const ep_stat = readl(&ep->regs->ep_stat); /* AVOID TROUBLE HERE by not issuing short reads from * your gadget driver. That helps avoids errata 0121, * 0122, and 0124; not all cases trigger the warning. */ - if ((tmp & BIT(NAK_OUT_PACKETS)) == 0) { + if ((ep_stat & BIT(NAK_OUT_PACKETS)) == 0) { ep_warn(ep->dev, "%s lost packet sync!\n", ep->ep.name); req->req.status = -EOVERFLOW; } else { - tmp = readl(&ep->regs->ep_avail); - if (tmp) { + u32 const ep_avail = readl(&ep->regs->ep_avail); + if (ep_avail) { /* fifo gets flushed later */ ep->out_overflow = 1; ep_dbg(ep->dev, "%s dma, discard %d len %d\n", - ep->ep.name, tmp, + ep->ep.name, ep_avail, req->req.length); req->req.status = -EOVERFLOW; } } } - dma_done(ep, req, tmp, 0); + dma_done(ep, req, req_dma_count, 0); num_completed++; } From 587d7e72aedca91cee80c0a56811649c3efab765 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 2 Mar 2017 12:41:48 -0800 Subject: [PATCH 0611/1299] kvm: nVMX: VMCLEAR should not cause the vCPU to shut down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMCLEAR should silently ignore a failure to clear the launch state of the VMCS referenced by the operand. Signed-off-by: Jim Mattson [Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 283aa8601833..3b626d6dc3ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); From c771c14baa3319c85512e575671bf0bb18824c11 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Thu, 2 Mar 2017 15:02:06 -0800 Subject: [PATCH 0612/1299] iomap: invalidate page caches should be after iomap_dio_complete() in direct write After XFS switching to iomap based DIO (commit acdda3aae146 ("xfs: use iomap_dio_rw")), I started to notice dio29/dio30 tests failures from LTP run on ppc64 hosts, and they can be reproduced on x86_64 hosts with 512B/1k block size XFS too. dio29 diotest3 -b 65536 -n 100 -i 1000 -o 1024000 dio30 diotest6 -b 65536 -n 100 -i 1000 -o 1024000 The failure message is like: bufcmp: offset 0: Expected: 0x62, got 0x0 diotest03 1 TPASS : Read with Direct IO, Write without diotest03 2 TFAIL : diotest3.c:142: comparsion failed; child=98 offset=1425408 diotest03 3 TFAIL : diotest3.c:194: Write Direct-child 98 failed Direct write wrote 0x62 but buffer read got zero. This is because, when doing direct write to a hole or preallocated file, we invalidate the page caches before converting the extent from unwritten state to normal state, which is done by iomap_dio_complete(), thus leave a window for other buffer reader to cache the unwritten state extent. Consider this case, with sub-page blocksize XFS, two processes are direct writing to different blocksize-aligned regions (say 512B) of the same preallocated file, and reading the region back via buffered I/O to compare contents. process A, region [0,512] process B, region [512,1024] xfs_file_write_iter xfs_file_aio_dio_write iomap_dio_rw iomap_apply invalidate_inode_pages2_range xfs_file_write_iter xfs_file_aio_dio_write iomap_dio_rw iomap_apply invalidate_inode_pages2_range iomap_dio_complete xfs_file_read_iter xfs_file_buffered_aio_read generic_file_read_iter do_generic_file_read iomap_dio_complete xfs_file_read_iter Process A first invalidates page caches, at this point the underlying extent is still in unwritten state (iomap_dio_complete not called yet), and process B finishs direct write and populates page caches via readahead, which caches zeros in page for region A, then process A reads zeros from page cache, instead of the actual data. Fix it by invalidating page caches after converting unwritten extent to make sure we read content from disk after extent state changed, as what we did before switching to iomap based dio. Also introduce a new 'start' variable to save the original write offset (iomap_dio_complete() updates iocb->ki_pos), and a 'err' variable for invalidating caches result, cause we can't reuse 'ret' anymore. Signed-off-by: Eryu Guan Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/iomap.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 3ca1a8e44135..141c3cd55a8b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); size_t count = iov_iter_count(iter); - loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; + loff_t pos = iocb->ki_pos, start = pos; + loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; struct blk_plug plug; struct iomap_dio *dio; @@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } if (mapping->nrpages) { - ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); + ret = filemap_write_and_wait_range(mapping, start, end); if (ret) goto out_free_dio; ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + start >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(ret); ret = 0; } @@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } + ret = iomap_dio_complete(dio); + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source @@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, * this invalidation fails, tough, the write still worked... */ if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); + int err = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(err); } - return iomap_dio_complete(dio); + return ret; out_free_dio: kfree(dio); From 312eb712e15868236dd03c67971ab2c1d79b4ce6 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 17 Feb 2017 18:44:11 +0100 Subject: [PATCH 0613/1299] cgroup: Fix indenting in PID controller documentation Follow the common documentation style in the file and indent the interface file description by a tab instead of just a space. Signed-off-by: Tobias Klauser Signed-off-by: Tejun Heo --- Documentation/cgroup-v2.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 3b8449f8ac7e..49d7c997fa1e 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -1142,16 +1142,17 @@ used by the kernel. pids.max - A read-write single value file which exists on non-root cgroups. The - default is "max". + A read-write single value file which exists on non-root + cgroups. The default is "max". - Hard limit of number of processes. + Hard limit of number of processes. pids.current - A read-only single value file which exists on all cgroups. + A read-only single value file which exists on all cgroups. - The number of processes currently in the cgroup and its descendants. + The number of processes currently in the cgroup and its + descendants. Organisational operations are not blocked by cgroup policies, so it is possible to have pids.current > pids.max. This can be done by either From 1d18c2747f937f1b5ec65ce6bf4ccb9ca1aea9e8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 1 Mar 2017 15:39:07 -0500 Subject: [PATCH 0614/1299] cgroup/pids: remove spurious suspicious RCU usage warning pids_can_fork() is special in that the css association is guaranteed to be stable throughout the function and thus doesn't need RCU protection around task_css access. When determining the css to charge the pid, task_css_check() is used to override the RCU sanity check. While adding a warning message on fork rejection from pids limit, 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit") incorrectly added a task_css access which is neither RCU protected or explicitly annotated. This triggers the following suspicious RCU usage warning when RCU debugging is enabled. cgroup: fork rejected by pids controller in =============================== [ ERR: suspicious RCU usage. ] 4.10.0-work+ #1 Not tainted ------------------------------- ./include/linux/cgroup.h:435 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 0 1 lock held by bash/1748: #0: (&cgroup_threadgroup_rwsem){+++++.}, at: [] _do_fork+0xe6/0x6e0 stack backtrace: CPU: 3 PID: 1748 Comm: bash Not tainted 4.10.0-work+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.3-1.fc25 04/01/2014 Call Trace: dump_stack+0x68/0x93 lockdep_rcu_suspicious+0xd7/0x110 pids_can_fork+0x1c7/0x1d0 cgroup_can_fork+0x67/0xc0 copy_process.part.58+0x1709/0x1e90 _do_fork+0xe6/0x6e0 SyS_clone+0x19/0x20 do_syscall_64+0x5c/0x140 entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:0x7f7853fab93a RSP: 002b:00007ffc12d05c90 EFLAGS: 00000246 ORIG_RAX: 0000000000000038 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7853fab93a RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000001200011 RBP: 00007ffc12d05cc0 R08: 0000000000000000 R09: 00007f78548db700 R10: 00007f78548db9d0 R11: 0000000000000246 R12: 00000000000006d4 R13: 0000000000000001 R14: 0000000000000000 R15: 000055e3ebe2c04d /asdf There's no reason to dereference task_css again here when the associated css is already available. Fix it by replacing the task_cgroup() call with css->cgroup. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Fixes: 135b8b37bd91 ("cgroup: Add pids controller event when fork fails because of pid limit") Cc: Kenny Yu Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Tejun Heo --- kernel/cgroup/pids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index e756dae49300..2237201d66d5 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -229,7 +229,7 @@ static int pids_can_fork(struct task_struct *task) /* Only log the first time events_limit is incremented. */ if (atomic64_inc_return(&pids->events_limit) == 1) { pr_info("cgroup: fork rejected by pids controller in "); - pr_cont_cgroup_path(task_cgroup(current, pids_cgrp_id)); + pr_cont_cgroup_path(css->cgroup); pr_cont("\n"); } cgroup_file_notify(&pids->events_file); From b6a6759daf55dade2b65089957832759d502acfb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 25 Feb 2017 01:56:48 -0800 Subject: [PATCH 0615/1299] cgroups: censor kernel pointer in debug files As found in grsecurity, this avoids exposing a kernel pointer through the cgroup debug entries. Signed-off-by: Kees Cook Signed-off-by: Tejun Heo --- kernel/cgroup/cgroup-v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 56eba9caa632..1dc22f6b49f5 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1329,7 +1329,7 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) struct task_struct *task; int count = 0; - seq_printf(seq, "css_set %p\n", cset); + seq_printf(seq, "css_set %pK\n", cset); list_for_each_entry(task, &cset->tasks, cg_list) { if (count++ > MAX_TASKS_SHOWN_PER_CSS) From d85fc67dd11e9a32966140677d4d6429ca540b25 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Fri, 3 Mar 2017 09:00:09 -0800 Subject: [PATCH 0616/1299] libata: transport: Remove circular dependency at free time Without this patch, failed probe would not free resources like irq. ata port tdev object currently hold a reference to the ata port object. Therefore the ata port object release function will not get called until the ata_tport_release is called. But that would never happen, releasing the last reference of ata port dev is done by scsi_host_release, which is called by ata_host_release when the ata port object is released. The ata device objects actually do not need to explicitly hold a reference to their real counterpart, given the transport objects are the children of these objects and device_add() is call for each child. We know the parent will not be deleted until we call the child's device_del(). Reported-by: Matthew Whitehead Tested-by: Matthew Whitehead Suggested-by: Tejun Heo Signed-off-by: Gwendal Grignou Signed-off-by: Tejun Heo --- drivers/ata/libata-transport.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index 46698232e6bf..19e6e539a061 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -224,7 +224,6 @@ static DECLARE_TRANSPORT_CLASS(ata_port_class, static void ata_tport_release(struct device *dev) { - put_device(dev->parent); } /** @@ -284,7 +283,7 @@ int ata_tport_add(struct device *parent, device_initialize(dev); dev->type = &ata_port_type; - dev->parent = get_device(parent); + dev->parent = parent; dev->release = ata_tport_release; dev_set_name(dev, "ata%d", ap->print_id); transport_setup_device(dev); @@ -348,7 +347,6 @@ static DECLARE_TRANSPORT_CLASS(ata_link_class, static void ata_tlink_release(struct device *dev) { - put_device(dev->parent); } /** @@ -410,7 +408,7 @@ int ata_tlink_add(struct ata_link *link) int error; device_initialize(dev); - dev->parent = get_device(&ap->tdev); + dev->parent = &ap->tdev; dev->release = ata_tlink_release; if (ata_is_host_link(link)) dev_set_name(dev, "link%d", ap->print_id); @@ -589,7 +587,6 @@ static DECLARE_TRANSPORT_CLASS(ata_dev_class, static void ata_tdev_release(struct device *dev) { - put_device(dev->parent); } /** @@ -662,7 +659,7 @@ static int ata_tdev_add(struct ata_device *ata_dev) int error; device_initialize(dev); - dev->parent = get_device(&link->tdev); + dev->parent = &link->tdev; dev->release = ata_tdev_release; if (ata_is_host_link(link)) dev_set_name(dev, "dev%d.%d", ap->print_id,ata_dev->devno); From 0580b762a4d6b70817476b90042813f8573283fa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Mar 2017 15:26:54 -0500 Subject: [PATCH 0617/1299] libata: drop WARN from protocol error in ata_sff_qc_issue() ata_sff_qc_issue() expects upper layers to never issue commands on a command protocol that it doesn't implement. While the assumption holds fine with the usual IO path, nothing filters based on the command protocol in the passthrough path (which was added later), allowing the warning to be tripped with a passthrough command with the right (well, wrong) protocol. Failing with AC_ERR_SYSTEM is the right thing to do anyway. Remove the unnecessary WARN. Reported-by: Dmitry Vyukov Link: http://lkml.kernel.org/r/CACT4Y+bXkvevNZU8uP6X0QVqsj6wNoUA_1exfTSOzc+SmUtMOA@mail.gmail.com Signed-off-by: Tejun Heo --- drivers/ata/libata-sff.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 2bd92dca3e62..274d6d7193d7 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1482,7 +1482,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) break; default: - WARN_ON_ONCE(1); return AC_ERR_SYSTEM; } From 637fdbae60d6cb9f6e963c1079d7e0445c86ff7d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Mar 2017 15:33:42 -0500 Subject: [PATCH 0618/1299] workqueue: trigger WARN if queue_delayed_work() is called with NULL @wq If queue_delayed_work() gets called with NULL @wq, the kernel will oops asynchronuosly on timer expiration which isn't too helpful in tracking down the offender. This actually happened with smc. __queue_delayed_work() already does several input sanity checks synchronously. Add NULL @wq check. Reported-by: Dave Jones Link: http://lkml.kernel.org/r/20170227171439.jshx3qplflyrgcv7@codemonkey.org.uk Signed-off-by: Tejun Heo --- kernel/workqueue.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 072cbc9b175d..c0168b7da1ea 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1507,6 +1507,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct timer_list *timer = &dwork->timer; struct work_struct *work = &dwork->work; + WARN_ON_ONCE(!wq); WARN_ON_ONCE(timer->function != delayed_work_timer_fn || timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); From 320661b08dd6f1746d5c7ab4eb435ec64b97cd45 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 25 Feb 2017 13:00:19 -0800 Subject: [PATCH 0619/1299] percpu: acquire pcpu_lock when updating pcpu_nr_empty_pop_pages Update to pcpu_nr_empty_pop_pages in pcpu_alloc() is currently done without holding pcpu_lock. This can lead to bad updates to the variable. Add missing lock calls. Fixes: b539b87fed37 ("percpu: implmeent pcpu_nr_empty_pop_pages and chunk->nr_populated") Signed-off-by: Tahsin Erdogan Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v3.18+ --- mm/percpu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/percpu.c b/mm/percpu.c index 5696039b5c07..60a6488e9e6d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1011,8 +1011,11 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, mutex_unlock(&pcpu_alloc_mutex); } - if (chunk != pcpu_reserved_chunk) + if (chunk != pcpu_reserved_chunk) { + spin_lock_irqsave(&pcpu_lock, flags); pcpu_nr_empty_pop_pages -= occ_pages; + spin_unlock_irqrestore(&pcpu_lock, flags); + } if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) pcpu_schedule_balance_work(); From 8a1df543de8ad879d3c80bdda4c67ac4f82e7ee0 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Sat, 25 Feb 2017 12:59:26 -0800 Subject: [PATCH 0620/1299] percpu: remove unused chunk_alloc parameter from pcpu_get_pages() pcpu_get_pages() doesn't use chunk_alloc parameter, remove it. Fixes: fbbb7f4e149f ("percpu: remove the usage of separate populated bitmap in percpu-vm") Signed-off-by: Tahsin Erdogan Signed-off-by: Tejun Heo --- mm/percpu-vm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 538998a137d2..9ac639499bd1 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -21,7 +21,6 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, /** * pcpu_get_pages - get temp pages array - * @chunk: chunk of interest * * Returns pointer to array of pointers to struct page which can be indexed * with pcpu_page_idx(). Note that there is only one array and accesses @@ -30,7 +29,7 @@ static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, * RETURNS: * Pointer to temp pages array on success. */ -static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc) +static struct page **pcpu_get_pages(void) { static struct page **pages; size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); @@ -275,7 +274,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, { struct page **pages; - pages = pcpu_get_pages(chunk); + pages = pcpu_get_pages(); if (!pages) return -ENOMEM; @@ -313,7 +312,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, * successful population attempt so the temp pages array must * be available now. */ - pages = pcpu_get_pages(chunk); + pages = pcpu_get_pages(); BUG_ON(!pages); /* unmap and free */ From 040757f738e13caaa9c5078bca79aa97e11dde88 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 5 Mar 2017 15:03:22 -0600 Subject: [PATCH 0621/1299] ucount: Remove the atomicity from ucount->count Always increment/decrement ucount->count under the ucounts_lock. The increments are there already and moving the decrements there means the locking logic of the code is simpler. This simplification in the locking logic fixes a race between put_ucounts and get_ucounts that could result in a use-after-free because the count could go zero then be found by get_ucounts and then be freed by put_ucounts. A bug presumably this one was found by a combination of syzkaller and KASAN. JongWhan Kim reported the syzkaller failure and Dmitry Vyukov spotted the race in the code. Cc: stable@vger.kernel.org Fixes: f6b2db1a3e8d ("userns: Make the count of user namespaces per user") Reported-by: JongHwan Kim Reported-by: Dmitry Vyukov Reviewed-by: Andrei Vagin Signed-off-by: "Eric W. Biederman" --- include/linux/user_namespace.h | 2 +- kernel/ucount.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index be765234c0a2..32354b4b4b2b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -72,7 +72,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - atomic_t count; + int count; atomic_t ucount[UCOUNT_COUNTS]; }; diff --git a/kernel/ucount.c b/kernel/ucount.c index 62630a40ab3a..b4eeee03934f 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -144,7 +144,7 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - atomic_set(&new->count, 0); + new->count = 0; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -155,8 +155,10 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) ucounts = new; } } - if (!atomic_add_unless(&ucounts->count, 1, INT_MAX)) + if (ucounts->count == INT_MAX) ucounts = NULL; + else + ucounts->count += 1; spin_unlock_irq(&ucounts_lock); return ucounts; } @@ -165,13 +167,15 @@ static void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + spin_lock_irqsave(&ucounts_lock, flags); + ucounts->count -= 1; + if (!ucounts->count) hlist_del_init(&ucounts->node); - spin_unlock_irqrestore(&ucounts_lock, flags); + else + ucounts = NULL; + spin_unlock_irqrestore(&ucounts_lock, flags); - kfree(ucounts); - } + kfree(ucounts); } static inline bool atomic_inc_below(atomic_t *v, int u) From bd571195c9535c0b074fc7cd1b541b93817ed647 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Mar 2017 15:58:03 +0100 Subject: [PATCH 0622/1299] scsi: qedi: fix build error without DEBUG_FS Without CONFIG_DEBUG_FS, we run into a link error: drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_poll': qedi_iscsi.c:(.text.qedi_ep_poll+0x134): undefined reference to `do_not_recover' drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_disconnect': qedi_iscsi.c:(.text.qedi_ep_disconnect+0x36c): undefined reference to `do_not_recover' drivers/scsi/qedi/qedi_iscsi.o: In function `qedi_ep_connect': qedi_iscsi.c:(.text.qedi_ep_connect+0x350): undefined reference to `do_not_recover' drivers/scsi/qedi/qedi_fw.o: In function `qedi_tmf_work': qedi_fw.c:(.text.qedi_tmf_work+0x3b4): undefined reference to `do_not_recover' This defines the symbol as a constant in this case, as there is no way to set it to anything other than zero without DEBUG_FS. In addition, I'm renaming it to qedi_do_not_recover in order to put it into a driver specific namespace, as "do_not_recover" is a really bad name for a kernel-wide global identifier when it is used only in one driver. Fixes: ace7f46ba5fd ("scsi: qedi: Add QLogic FastLinQ offload iSCSI driver framework.") Reviewed-by: Johannes Thumshirn Signed-off-by: Arnd Bergmann Acked-by: Manish Rangankar Signed-off-by: Martin K. Petersen --- drivers/scsi/qedi/qedi_debugfs.c | 16 ++++++++-------- drivers/scsi/qedi/qedi_fw.c | 4 ++-- drivers/scsi/qedi/qedi_gbl.h | 8 +++++++- drivers/scsi/qedi/qedi_iscsi.c | 8 ++++---- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/drivers/scsi/qedi/qedi_debugfs.c b/drivers/scsi/qedi/qedi_debugfs.c index 955936274241..59417199bf36 100644 --- a/drivers/scsi/qedi/qedi_debugfs.c +++ b/drivers/scsi/qedi/qedi_debugfs.c @@ -14,7 +14,7 @@ #include #include -int do_not_recover; +int qedi_do_not_recover; static struct dentry *qedi_dbg_root; void @@ -74,22 +74,22 @@ qedi_dbg_exit(void) static ssize_t qedi_dbg_do_not_recover_enable(struct qedi_dbg_ctx *qedi_dbg) { - if (!do_not_recover) - do_not_recover = 1; + if (!qedi_do_not_recover) + qedi_do_not_recover = 1; QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n", - do_not_recover); + qedi_do_not_recover); return 0; } static ssize_t qedi_dbg_do_not_recover_disable(struct qedi_dbg_ctx *qedi_dbg) { - if (do_not_recover) - do_not_recover = 0; + if (qedi_do_not_recover) + qedi_do_not_recover = 0; QEDI_INFO(qedi_dbg, QEDI_LOG_DEBUGFS, "do_not_recover=%d\n", - do_not_recover); + qedi_do_not_recover); return 0; } @@ -141,7 +141,7 @@ qedi_dbg_do_not_recover_cmd_read(struct file *filp, char __user *buffer, if (*ppos) return 0; - cnt = sprintf(buffer, "do_not_recover=%d\n", do_not_recover); + cnt = sprintf(buffer, "do_not_recover=%d\n", qedi_do_not_recover); cnt = min_t(int, count, cnt - *ppos); *ppos += cnt; return cnt; diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c index c9f0ef4e11b3..2bce3efc66a4 100644 --- a/drivers/scsi/qedi/qedi_fw.c +++ b/drivers/scsi/qedi/qedi_fw.c @@ -1461,9 +1461,9 @@ static void qedi_tmf_work(struct work_struct *work) get_itt(tmf_hdr->rtt), get_itt(ctask->itt), cmd->task_id, qedi_conn->iscsi_conn_id); - if (do_not_recover) { + if (qedi_do_not_recover) { QEDI_ERR(&qedi->dbg_ctx, "DONT SEND CLEANUP/ABORT %d\n", - do_not_recover); + qedi_do_not_recover); goto abort_ret; } diff --git a/drivers/scsi/qedi/qedi_gbl.h b/drivers/scsi/qedi/qedi_gbl.h index 8e488de88ece..63d793f46064 100644 --- a/drivers/scsi/qedi/qedi_gbl.h +++ b/drivers/scsi/qedi/qedi_gbl.h @@ -12,8 +12,14 @@ #include "qedi_iscsi.h" +#ifdef CONFIG_DEBUG_FS +extern int qedi_do_not_recover; +#else +#define qedi_do_not_recover (0) +#endif + extern uint qedi_io_tracing; -extern int do_not_recover; + extern struct scsi_host_template qedi_host_template; extern struct iscsi_transport qedi_iscsi_transport; extern const struct qed_iscsi_ops *qedi_ops; diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index b9f79d36142d..4cc474364c50 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -833,7 +833,7 @@ qedi_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, return ERR_PTR(ret); } - if (do_not_recover) { + if (qedi_do_not_recover) { ret = -ENOMEM; return ERR_PTR(ret); } @@ -957,7 +957,7 @@ static int qedi_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) struct qedi_endpoint *qedi_ep; int ret = 0; - if (do_not_recover) + if (qedi_do_not_recover) return 1; qedi_ep = ep->dd_data; @@ -1025,7 +1025,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) } if (test_bit(QEDI_IN_RECOVERY, &qedi->flags)) { - if (do_not_recover) { + if (qedi_do_not_recover) { QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO, "Do not recover cid=0x%x\n", qedi_ep->iscsi_cid); @@ -1039,7 +1039,7 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) } } - if (do_not_recover) + if (qedi_do_not_recover) goto ep_exit_recover; switch (qedi_ep->state) { From 934767c56b0d9dbb95a40e9e6e4d9dcdc3a165ad Mon Sep 17 00:00:00 2001 From: Raghava Aditya Renukunta Date: Thu, 2 Mar 2017 09:21:33 -0800 Subject: [PATCH 0623/1299] scsi: aacraid: Fix typo in blink status The return status of the adapter check on KERNEL_PANIC is supposed to be the upper 16 bits of the OMR status register. Fixes: c421530bf848604e (scsi: aacraid: Reorder Adpater status check) Reported-by: Dan Carpenter Signed-off-by: Raghava Aditya Renukunta Reviewed-by: Dave Carroll Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/src.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/aacraid/src.c b/drivers/scsi/aacraid/src.c index 2e5338dec621..7b0410e0f569 100644 --- a/drivers/scsi/aacraid/src.c +++ b/drivers/scsi/aacraid/src.c @@ -468,7 +468,7 @@ static int aac_src_check_health(struct aac_dev *dev) return -1; err_blink: - return (status > 16) & 0xFF; + return (status >> 16) & 0xFF; } static inline u32 aac_get_vector(struct aac_dev *dev) From 23456565acf6d452e0368f7380aecd584c019c67 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 2 Mar 2017 17:14:47 -0800 Subject: [PATCH 0624/1299] scsi: qla2xxx: Fix ql_dump_buffer Recent printk changes for KERN_CONT cause this logging to be defectively emitted on multiple lines. Fix it. Also reduces object size a trivial amount. $ size drivers/scsi/qla2xxx/qla_dbg.o* text data bss dec hex filename 39125 0 0 39125 98d5 drivers/scsi/qla2xxx/qla_dbg.o.new 39164 0 0 39164 98fc drivers/scsi/qla2xxx/qla_dbg.o.old Signed-off-by: Joe Perches Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_dbg.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 21d9fb7fc887..51b4179469d1 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2707,13 +2707,9 @@ ql_dump_buffer(uint32_t level, scsi_qla_host_t *vha, int32_t id, "%-+5d 0 1 2 3 4 5 6 7 8 9 A B C D E F\n", size); ql_dbg(level, vha, id, "----- -----------------------------------------------\n"); - for (cnt = 0; cnt < size; cnt++, buf++) { - if (cnt % 16 == 0) - ql_dbg(level, vha, id, "%04x:", cnt & ~0xFU); - printk(" %02x", *buf); - if (cnt % 16 == 15) - printk("\n"); + for (cnt = 0; cnt < size; cnt += 16) { + ql_dbg(level, vha, id, "%04x: ", cnt); + print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, + buf + cnt, min(16U, size - cnt), false); } - if (cnt % 16 != 0) - printk("\n"); } From c527de41aea24c2cdb6638818008d810013b4d39 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Mar 2017 07:57:16 -0700 Subject: [PATCH 0625/1299] scsi: vmw_pvscsi: handle the return value from pci_alloc_irq_vectors correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It returns the number of vectors allocated when successful, so check for a negative error only. Fixes: 2e48e349 ("scsi: vmw_pvscsi: switch to pci_alloc_irq_vectors") Signed-off-by: Christoph Hellwig Reported-by: Loïc Yhuel Tested-by: Loïc Yhuel Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index ef474a748744..c374e3b5c678 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -1487,7 +1487,7 @@ static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id) irq_flag &= ~PCI_IRQ_MSI; error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag); - if (error) + if (error < 0) goto out_reset_adapter; adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true); From db6b2060bcae1ce1f9bde73a423e710b625ae1ef Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 4 Mar 2017 00:07:04 -0800 Subject: [PATCH 0626/1299] scsi: qedf: Fix defective logging format and argument mismatches Add __printf compiler verification of format and arguments. Fix fallout. Signed-off-by: Joe Perches Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_dbg.h | 13 ++++++++----- drivers/scsi/qedf/qedf_fip.c | 2 +- drivers/scsi/qedf/qedf_io.c | 4 ++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/qedf/qedf_dbg.h b/drivers/scsi/qedf/qedf_dbg.h index 23bd70628a2f..7d173f48a81e 100644 --- a/drivers/scsi/qedf/qedf_dbg.h +++ b/drivers/scsi/qedf/qedf_dbg.h @@ -81,14 +81,17 @@ struct qedf_dbg_ctx { #define QEDF_INFO(pdev, level, fmt, ...) \ qedf_dbg_info(pdev, __func__, __LINE__, level, fmt, \ ## __VA_ARGS__) - -extern void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line, +__printf(4, 5) +void qedf_dbg_err(struct qedf_dbg_ctx *qedf, const char *func, u32 line, const char *fmt, ...); -extern void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line, +__printf(4, 5) +void qedf_dbg_warn(struct qedf_dbg_ctx *qedf, const char *func, u32 line, const char *, ...); -extern void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func, +__printf(4, 5) +void qedf_dbg_notice(struct qedf_dbg_ctx *qedf, const char *func, u32 line, const char *, ...); -extern void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line, +__printf(5, 6) +void qedf_dbg_info(struct qedf_dbg_ctx *qedf, const char *func, u32 line, u32 info, const char *fmt, ...); /* GRC Dump related defines */ diff --git a/drivers/scsi/qedf/qedf_fip.c b/drivers/scsi/qedf/qedf_fip.c index 868d423380d1..ed58b9104f58 100644 --- a/drivers/scsi/qedf/qedf_fip.c +++ b/drivers/scsi/qedf/qedf_fip.c @@ -203,7 +203,7 @@ void qedf_fip_recv(struct qedf_ctx *qedf, struct sk_buff *skb) case FIP_DT_MAC: mp = (struct fip_mac_desc *)desc; QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_LL2, - "fd_mac=%pM.\n", __func__, mp->fd_mac); + "fd_mac=%pM\n", mp->fd_mac); ether_addr_copy(cvl_mac, mp->fd_mac); break; case FIP_DT_NAME: diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index ee0dcf9d3aba..46debe5034af 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -1342,7 +1342,7 @@ void qedf_scsi_completion(struct qedf_ctx *qedf, struct fcoe_cqe *cqe, } else { refcount = kref_read(&io_req->refcount); QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, - "%d:0:%d:%d xid=0x%0x op=0x%02x " + "%d:0:%d:%lld xid=0x%0x op=0x%02x " "lba=%02x%02x%02x%02x cdb_status=%d " "fcp_resid=0x%x refcount=%d.\n", qedf->lport->host->host_no, sc_cmd->device->id, @@ -1426,7 +1426,7 @@ void qedf_scsi_done(struct qedf_ctx *qedf, struct qedf_ioreq *io_req, sc_cmd->result = result << 16; refcount = kref_read(&io_req->refcount); - QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%d: Completing " + QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_IO, "%d:0:%d:%lld: Completing " "sc_cmd=%p result=0x%08x op=0x%02x lba=0x%02x%02x%02x%02x, " "allowed=%d retries=%d refcount=%d.\n", qedf->lport->host->host_no, sc_cmd->device->id, From fd2b18b4a7bf01453324733a18297ae9a197a4ae Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 6 Mar 2017 10:32:27 -0800 Subject: [PATCH 0627/1299] scsi: qedf: Use vsprintf extension %pad Using %llx for a dma_addr_t can lead to format/argument mismatches. Use %pad and the address of the dma_addr_t instead. Signed-off-by: Joe Perches Acked-by: Chad Dupuis Signed-off-by: Martin K. Petersen --- drivers/scsi/qedf/qedf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index d9d7a86b5f8b..8e2a160490e6 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -2456,8 +2456,8 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf) } QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_DISC, - "BDQ PBL addr=0x%p dma=0x%llx.\n", qedf->bdq_pbl, - qedf->bdq_pbl_dma); + "BDQ PBL addr=0x%p dma=%pad\n", + qedf->bdq_pbl, &qedf->bdq_pbl_dma); /* * Populate BDQ PBL with physical and virtual address of individual From 33cc559a81397bc813c392617d40ddfdfa3cffbd Mon Sep 17 00:00:00 2001 From: Tomas Jasek Date: Fri, 3 Mar 2017 13:45:48 +0100 Subject: [PATCH 0628/1299] scsi: lpfc: replace init_timer by setup_timer This patch shortens every init_timer in lpfc module followed by function and data assignment using setup_timer. This is purely cleanup patch, it does not add new functionality nor remove any existing functionality. An init_timer call in this form: init_timer(&vport->fc_disctmo); vport->fc_disctmo.function = lpfc_disc_timeout; vport->fc_disctmo.data = vport; is shortened to: setup_timer(&vport->fc_disctmo, lpfc_disc_timeout, vport); It increases readability and reduces chances of mistakes done by developers. Signed-off-by: Tomas Jasek Signed-off-by: Jiri Slaby Cc: James Smart Cc: Dick Kennedy Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Cc: Acked-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 5 ++-- drivers/scsi/lpfc/lpfc_init.c | 47 ++++++++++++-------------------- 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 194a14d5f8a9..2612dac75186 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4344,9 +4344,8 @@ lpfc_initialize_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, { INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp); INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp); - init_timer(&ndlp->nlp_delayfunc); - ndlp->nlp_delayfunc.function = lpfc_els_retry_delay; - ndlp->nlp_delayfunc.data = (unsigned long)ndlp; + setup_timer(&ndlp->nlp_delayfunc, lpfc_els_retry_delay, + (unsigned long)ndlp); ndlp->nlp_DID = did; ndlp->vport = vport; ndlp->phba = vport->phba; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 0ee429d773f3..f395f2e4aa97 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3734,17 +3734,14 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) INIT_LIST_HEAD(&vport->rcv_buffer_list); spin_lock_init(&vport->work_port_lock); - init_timer(&vport->fc_disctmo); - vport->fc_disctmo.function = lpfc_disc_timeout; - vport->fc_disctmo.data = (unsigned long)vport; + setup_timer(&vport->fc_disctmo, lpfc_disc_timeout, + (unsigned long)vport); - init_timer(&vport->els_tmofunc); - vport->els_tmofunc.function = lpfc_els_timeout; - vport->els_tmofunc.data = (unsigned long)vport; + setup_timer(&vport->els_tmofunc, lpfc_els_timeout, + (unsigned long)vport); - init_timer(&vport->delayed_disc_tmo); - vport->delayed_disc_tmo.function = lpfc_delayed_disc_tmo; - vport->delayed_disc_tmo.data = (unsigned long)vport; + setup_timer(&vport->delayed_disc_tmo, lpfc_delayed_disc_tmo, + (unsigned long)vport); error = scsi_add_host_with_dma(shost, dev, &phba->pcidev->dev); if (error) @@ -5406,21 +5403,15 @@ lpfc_setup_driver_resource_phase1(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->luns); /* MBOX heartbeat timer */ - init_timer(&psli->mbox_tmo); - psli->mbox_tmo.function = lpfc_mbox_timeout; - psli->mbox_tmo.data = (unsigned long) phba; + setup_timer(&psli->mbox_tmo, lpfc_mbox_timeout, (unsigned long)phba); /* Fabric block timer */ - init_timer(&phba->fabric_block_timer); - phba->fabric_block_timer.function = lpfc_fabric_block_timeout; - phba->fabric_block_timer.data = (unsigned long) phba; + setup_timer(&phba->fabric_block_timer, lpfc_fabric_block_timeout, + (unsigned long)phba); /* EA polling mode timer */ - init_timer(&phba->eratt_poll); - phba->eratt_poll.function = lpfc_poll_eratt; - phba->eratt_poll.data = (unsigned long) phba; + setup_timer(&phba->eratt_poll, lpfc_poll_eratt, + (unsigned long)phba); /* Heartbeat timer */ - init_timer(&phba->hb_tmofunc); - phba->hb_tmofunc.function = lpfc_hb_timeout; - phba->hb_tmofunc.data = (unsigned long)phba; + setup_timer(&phba->hb_tmofunc, lpfc_hb_timeout, (unsigned long)phba); return 0; } @@ -5446,9 +5437,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) */ /* FCP polling mode timer */ - init_timer(&phba->fcp_poll_timer); - phba->fcp_poll_timer.function = lpfc_poll_timeout; - phba->fcp_poll_timer.data = (unsigned long) phba; + setup_timer(&phba->fcp_poll_timer, lpfc_poll_timeout, + (unsigned long)phba); /* Host attention work mask setup */ phba->work_ha_mask = (HA_ERATT | HA_MBATT | HA_LATT); @@ -5617,14 +5607,11 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) * Initialize timers used by driver */ - init_timer(&phba->rrq_tmr); - phba->rrq_tmr.function = lpfc_rrq_timeout; - phba->rrq_tmr.data = (unsigned long)phba; + setup_timer(&phba->rrq_tmr, lpfc_rrq_timeout, (unsigned long)phba); /* FCF rediscover timer */ - init_timer(&phba->fcf.redisc_wait); - phba->fcf.redisc_wait.function = lpfc_sli4_fcf_redisc_wait_tmo; - phba->fcf.redisc_wait.data = (unsigned long)phba; + setup_timer(&phba->fcf.redisc_wait, lpfc_sli4_fcf_redisc_wait_tmo, + (unsigned long)phba); /* * Control structure for handling external multi-buffer mailbox From 70e5afd57d6d97d69bf5fd0187c2bb5a79990504 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:21 -0800 Subject: [PATCH 0629/1299] scsi: lpfc: remove redundant assignment of sgel From: Colin Ian King In the NVMET_FCOP_RSP case, sgel is assigned but never used and hence is redundant and can be removed. Detected by CoverityScan, CID#1411658 ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvmet.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index c421e1738ee9..e59a0a89d357 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1445,7 +1445,6 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, case NVMET_FCOP_RSP: /* Words 0 - 2 */ - sgel = &rsp->sg[0]; physaddr = rsp->rspdma; wqe->fcp_trsp.bde.tus.f.bdeFlags = BUFF_TYPE_BDE_64; wqe->fcp_trsp.bde.tus.f.bdeSize = rsp->rsplen; From 7aabe84b8a1bb7c3f75fb6a23dc96f8999bdcc8f Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:22 -0800 Subject: [PATCH 0630/1299] scsi: lpfc: sanity check hrq is null before dereferencing it From: Colin Ian King The sanity check for hrq should be moved to before the deference of hrq to ensure we don't perform a null pointer deference. Detected by CoverityScan, CID#1411650 ("Dereference before null check") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d8d8693040ac..562a5286bc47 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -15185,17 +15185,17 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp, drq = drqp[idx]; cq = cqp[idx]; - if (hrq->entry_count != drq->entry_count) { - status = -EINVAL; - goto out; - } - /* sanity check on queue memory */ if (!hrq || !drq || !cq) { status = -ENODEV; goto out; } + if (hrq->entry_count != drq->entry_count) { + status = -EINVAL; + goto out; + } + if (idx == 0) { bf_set(lpfc_mbx_rq_create_num_pages, &rq_create->u.request, From 332ba3b5d6d27a60d445704ed7c88c7e9f958a30 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:23 -0800 Subject: [PATCH 0631/1299] scsi: lpfc: don't dereference dma_buf->iocbq before null check From: Colin Ian King dma_buf->iocbq is being dereferenced immediately before it is being null checked, so we have a potential null pointer dereference bug. Fix this by only dereferencing it only once we have passed a null check on the pointer. Detected by CoverityScan, CID#1411652 ("Dereference before null check") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index c61d8d692ede..5986c7957199 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -646,7 +646,6 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) } dma_buf->iocbq = lpfc_sli_get_iocbq(phba); - dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET; if (!dma_buf->iocbq) { kfree(dma_buf->context); pci_pool_free(phba->lpfc_drb_pool, dma_buf->dbuf.virt, @@ -658,6 +657,7 @@ lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba) "2621 Ran out of nvmet iocb/WQEs\n"); return NULL; } + dma_buf->iocbq->iocb_flag = LPFC_IO_NVMET; nvmewqe = dma_buf->iocbq; wqe = (union lpfc_wqe128 *)&nvmewqe->wqe; /* Initialize WQE */ From d11f54b7d1d40463024be3b40ab7be2c8a84fa43 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:24 -0800 Subject: [PATCH 0632/1299] scsi: lpfc: fix missing spin_unlock on sql_list_lock From: Colin Ian King In the case where sglq is null, the current code just returns without unlocking the spinlock sql_list_lock. Fix this by breaking out of the while loop and the exit path will then unlock and return NULL as was the original intention. Detected by CoverityScan, CID#1411635 ("Missing unlock") Signed-off-by: Colin Ian King Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_sli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 562a5286bc47..7389c130ffcc 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -952,7 +952,7 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq) start_sglq = sglq; while (!found) { if (!sglq) - return NULL; + break; if (ndlp && ndlp->active_rrqs_xri_bitmap && test_bit(sglq->sli4_lxritag, ndlp->active_rrqs_xri_bitmap)) { From 5d181531bc6169e19a02a27d202cf0e982db9d0e Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:25 -0800 Subject: [PATCH 0633/1299] scsi: lpfc: Fix crash during Hardware error recovery on SLI3 adapters if REG_VPI fails, the driver was incorrectly issuing INIT_VFI (a SLI4 command) on a SLI3 adapter. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 2d26440e6f2f..d260a1391baf 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -8371,11 +8371,17 @@ lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) spin_lock_irq(shost->host_lock); vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI; spin_unlock_irq(shost->host_lock); - if (vport->port_type == LPFC_PHYSICAL_PORT - && !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) - lpfc_issue_init_vfi(vport); - else + if (mb->mbxStatus == MBX_NOT_FINISHED) + break; + if ((vport->port_type == LPFC_PHYSICAL_PORT) && + !(vport->fc_flag & FC_LOGO_RCVD_DID_CHNG)) { + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_issue_init_vfi(vport); + else + lpfc_initial_flogi(vport); + } else { lpfc_initial_fdisc(vport); + } break; } } else { From 8b3616392d32d2b04ce649caf6684da1b7050d8c Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:26 -0800 Subject: [PATCH 0634/1299] scsi: lpfc: Fix RCTL value on NVME LS request and response NVME LS requests and responses had wrong R_CTL values. Use the FC4 ELS Request and Response defines (defines badly named, they are FC4 LS's) instead of the base ELS values. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 609a908ea9db..6346d4dee9ff 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -316,7 +316,7 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, bf_set(wqe_dfctl, &wqe->gen_req.wge_ctl, 0); bf_set(wqe_si, &wqe->gen_req.wge_ctl, 1); bf_set(wqe_la, &wqe->gen_req.wge_ctl, 1); - bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_DD_UNSOL_CTL); + bf_set(wqe_rctl, &wqe->gen_req.wge_ctl, FC_RCTL_ELS4_REQ); bf_set(wqe_type, &wqe->gen_req.wge_ctl, FC_TYPE_NVME); /* Word 6 */ diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index e59a0a89d357..6c2221aac394 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1114,7 +1114,7 @@ lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba, bf_set(wqe_dfctl, &wqe->xmit_sequence.wge_ctl, 0); bf_set(wqe_ls, &wqe->xmit_sequence.wge_ctl, 1); bf_set(wqe_la, &wqe->xmit_sequence.wge_ctl, 0); - bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_DD_SOL_CTL); + bf_set(wqe_rctl, &wqe->xmit_sequence.wge_ctl, FC_RCTL_ELS4_REP); bf_set(wqe_type, &wqe->xmit_sequence.wge_ctl, FC_TYPE_NVME); /* Word 6 */ From b06a622ffe8dddcc09dad23dc768f7d1540fb4d6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:27 -0800 Subject: [PATCH 0635/1299] scsi: lpfc: Fix NVME CMD IU byte swapped word 1 problem Word 1 in NVME CMD IU appears byte swapped from value placed in WQE Should be Big Endian value in WQE word 16 Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 6346d4dee9ff..bf3ccc5d59ac 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -620,15 +620,15 @@ lpfc_nvme_adj_fcp_sgls(struct lpfc_vport *vport, * Embed the payload in the last half of the WQE * WQE words 16-30 get the NVME CMD IU payload * - * WQE Word 16 is already setup with flags - * WQE words 17-19 get payload Words 2-4 + * WQE words 16-19 get payload Words 1-4 * WQE words 20-21 get payload Words 6-7 * WQE words 22-29 get payload Words 16-23 */ - wptr = &wqe->words[17]; /* WQE ptr */ + wptr = &wqe->words[16]; /* WQE ptr */ dptr = (uint32_t *)nCmd->cmdaddr; /* payload ptr */ - dptr += 2; /* Skip Words 0-1 in payload */ + dptr++; /* Skip Word 0 in payload */ + *wptr++ = *dptr++; /* Word 1 */ *wptr++ = *dptr++; /* Word 2 */ *wptr++ = *dptr++; /* Word 3 */ *wptr++ = *dptr++; /* Word 4 */ @@ -978,9 +978,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_WRITE_CMD); - /* Word 16 */ - wqe->words[16] = LPFC_NVME_EMBED_WRITE; - phba->fc4NvmeOutputRequests++; } else { /* Word 7 */ @@ -1002,9 +999,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD); - /* Word 16 */ - wqe->words[16] = LPFC_NVME_EMBED_READ; - phba->fc4NvmeInputRequests++; } } else { @@ -1026,9 +1020,6 @@ lpfc_nvme_prep_io_cmd(struct lpfc_vport *vport, /* Word 11 */ bf_set(wqe_cmd_type, &wqe->generic.wqe_com, NVME_READ_CMD); - /* Word 16 */ - wqe->words[16] = LPFC_NVME_EMBED_CMD; - phba->fc4NvmeControlRequests++; } /* From a5068b46958870633ea340692f301507ef78d00b Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:28 -0800 Subject: [PATCH 0636/1299] scsi: lpfc: Fix IO submission if WQ is full For both initiator and target: if WQ is full, return -EBUSY. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index bf3ccc5d59ac..e1bf31eca845 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1310,7 +1310,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, "sid: x%x did: x%x oxid: x%x\n", ret, vport->fc_myDID, ndlp->nlp_DID, lpfc_ncmd->cur_iocbq.sli4_xritag); - ret = -EINVAL; + ret = -EBUSY; goto out_free_nvme_buf; } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 6c2221aac394..735e2ba70198 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -571,6 +571,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6102 Bad state IO x%x aborted\n", ctxp->oxid); + rc = -ENXIO; goto aerr; } @@ -580,6 +581,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6152 FCP Drop IO x%x: Prep\n", ctxp->oxid); + rc = -ENXIO; goto aerr; } @@ -618,8 +620,9 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, ctxp->wqeq->hba_wqidx = 0; nvmewqeq->context2 = NULL; nvmewqeq->context3 = NULL; + rc = -EBUSY; aerr: - return -ENXIO; + return rc; } static void From 3ebd9b4701ef77358030a0ef9cb6586db2149712 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:29 -0800 Subject: [PATCH 0637/1299] scsi: lpfc: Fix nvme allocation bug on failed nvme_fc_register_localport nvme bufs get allocated even when the registration fails. Move allocation into the rsgistration success path. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_nvme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index e1bf31eca845..26cde7c040ba 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2164,10 +2164,10 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) lport->vport = vport; INIT_LIST_HEAD(&lport->rport_list); vport->nvmei_support = 1; + len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max); + vport->phba->total_nvme_bufs += len; } - len = lpfc_new_nvme_buf(vport, phba->sli4_hba.nvme_xri_max); - vport->phba->total_nvme_bufs += len; return ret; } From 318083ad9230ff13cdac34ae4c4135e0c4e2d9ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:30 -0800 Subject: [PATCH 0638/1299] scsi: lpfc: add NVME exchange aborts previous code did little more than log a message. This patch adds abort path support, modeled after the SCSI code paths. Currently addresses only the initiator path. Target path under development, but stubbed out. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_hbadisc.c | 2 + drivers/scsi/lpfc/lpfc_init.c | 7 ++++ drivers/scsi/lpfc/lpfc_nvme.c | 64 +++++++++++++++++++++++++++++--- drivers/scsi/lpfc/lpfc_nvme.h | 1 + drivers/scsi/lpfc/lpfc_nvmet.c | 21 +++++++++-- drivers/scsi/lpfc/lpfc_sli.c | 51 ++++++++++++++++++++++++- drivers/scsi/lpfc/lpfc_sli4.h | 6 +++ 8 files changed, 143 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 0bba2e30b4f0..de6cd57dc7f6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -692,6 +692,7 @@ struct lpfc_hba { * capability */ #define HBA_NVME_IOQ_FLUSH 0x80000 /* NVME IO queues flushed. */ +#define NVME_XRI_ABORT_EVENT 0x100000 uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 2612dac75186..bd8635d303d2 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -641,6 +641,8 @@ lpfc_work_done(struct lpfc_hba *phba) lpfc_handle_rrq_active(phba); if (phba->hba_flag & FCP_XRI_ABORT_EVENT) lpfc_sli4_fcp_xri_abort_event_proc(phba); + if (phba->hba_flag & NVME_XRI_ABORT_EVENT) + lpfc_sli4_nvme_xri_abort_event_proc(phba); if (phba->hba_flag & ELS_XRI_ABORT_EVENT) lpfc_sli4_els_xri_abort_event_proc(phba); if (phba->hba_flag & ASYNC_EVENT) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index f395f2e4aa97..a0cba631869e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5723,6 +5723,8 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the Abort nvme buffer list used by driver */ spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + /* Fast-path XRI aborted CQ Event work queue list */ + INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue); } /* This abort list used by worker thread */ @@ -8960,6 +8962,11 @@ lpfc_sli4_cq_event_release_all(struct lpfc_hba *phba) /* Pending ELS XRI abort events */ list_splice_init(&phba->sli4_hba.sp_els_xri_aborted_work_queue, &cqelist); + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + /* Pending NVME XRI abort events */ + list_splice_init(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue, + &cqelist); + } /* Pending asynnc events */ list_splice_init(&phba->sli4_hba.sp_asynce_work_queue, &cqelist); diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 26cde7c040ba..b9012fee1632 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1277,6 +1277,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, pnvme_fcreq->private = (void *)lpfc_ncmd; lpfc_ncmd->nvmeCmd = pnvme_fcreq; lpfc_ncmd->nrport = rport; + lpfc_ncmd->ndlp = ndlp; lpfc_ncmd->start_time = jiffies; lpfc_nvme_prep_io_cmd(vport, lpfc_ncmd, ndlp); @@ -1812,10 +1813,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba, pdma_phys_sgl1, cur_xritag); if (status) { /* failure, put on abort nvme list */ - lpfc_ncmd->exch_busy = 1; + lpfc_ncmd->flags |= LPFC_SBUF_XBUSY; } else { /* success, put on NVME buffer list */ - lpfc_ncmd->exch_busy = 0; + lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; lpfc_ncmd->status = IOSTAT_SUCCESS; num_posted++; } @@ -1845,10 +1846,10 @@ lpfc_post_nvme_sgl_list(struct lpfc_hba *phba, struct lpfc_nvme_buf, list); if (status) { /* failure, put on abort nvme list */ - lpfc_ncmd->exch_busy = 1; + lpfc_ncmd->flags |= LPFC_SBUF_XBUSY; } else { /* success, put on NVME buffer list */ - lpfc_ncmd->exch_busy = 0; + lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; lpfc_ncmd->status = IOSTAT_SUCCESS; num_posted++; } @@ -2090,7 +2091,7 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd) unsigned long iflag = 0; lpfc_ncmd->nonsg_phys = 0; - if (lpfc_ncmd->exch_busy) { + if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) { spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); lpfc_ncmd->nvmeCmd = NULL; @@ -2453,3 +2454,56 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) "6168: State error: lport %p, rport%p FCID x%06x\n", vport->localport, ndlp->rport, ndlp->nlp_DID); } + +/** + * lpfc_sli4_nvme_xri_aborted - Fast-path process of NVME xri abort + * @phba: pointer to lpfc hba data structure. + * @axri: pointer to the fcp xri abort wcqe structure. + * + * This routine is invoked by the worker thread to process a SLI4 fast-path + * FCP aborted xri. + **/ +void +lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri) +{ + uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); + uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); + struct lpfc_nvme_buf *lpfc_ncmd, *next_lpfc_ncmd; + struct lpfc_nodelist *ndlp; + unsigned long iflag = 0; + int rrq_empty = 0; + + if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) + return; + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(lpfc_ncmd, next_lpfc_ncmd, + &phba->sli4_hba.lpfc_abts_nvme_buf_list, + list) { + if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) { + list_del(&lpfc_ncmd->list); + lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; + lpfc_ncmd->status = IOSTAT_SUCCESS; + spin_unlock( + &phba->sli4_hba.abts_nvme_buf_list_lock); + + rrq_empty = list_empty(&phba->active_rrq_list); + spin_unlock_irqrestore(&phba->hbalock, iflag); + ndlp = lpfc_ncmd->ndlp; + if (ndlp) { + lpfc_set_rrq_active( + phba, ndlp, + lpfc_ncmd->cur_iocbq.sli4_lxritag, + rxid, 1); + lpfc_sli4_abts_err_handler(phba, ndlp, axri); + } + lpfc_release_nvme_buf(phba, lpfc_ncmd); + if (rrq_empty) + lpfc_worker_wake_up(phba); + return; + } + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); +} diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index b2fae5e813f8..1347deb8dd6c 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -57,6 +57,7 @@ struct lpfc_nvme_buf { struct list_head list; struct nvmefc_fcp_req *nvmeCmd; struct lpfc_nvme_rport *nrport; + struct lpfc_nodelist *ndlp; uint32_t timeout; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 735e2ba70198..48ce9858bc11 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -734,6 +734,21 @@ lpfc_nvmet_update_targetport(struct lpfc_hba *phba) return 0; } +/** + * lpfc_sli4_nvmet_xri_aborted - Fast-path process of nvmet xri abort + * @phba: pointer to lpfc hba data structure. + * @axri: pointer to the nvmet xri abort wcqe structure. + * + * This routine is invoked by the worker thread to process a SLI4 fast-path + * NVMET aborted xri. + **/ +void +lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri) +{ + /* TODO: work in progress */ +} + void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) { @@ -958,7 +973,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, atomic_inc(&tgtp->rcv_fcp_cmd_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6159 FCP Drop IO x%x: nvmet_fc_rcv_fcp_req x%x\n", + "6159 FCP Drop IO x%x: err x%x\n", ctxp->oxid, rc); dropit: lpfc_nvmeio_data(phba, "NVMET FCP DROP: xri x%x sz %d from %06x\n", @@ -1683,8 +1698,8 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp; lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6067 %s: Entrypoint: sid %x xri %x\n", __func__, - sid, xri); + "6067 Abort: sid %x xri x%x/x%x\n", + sid, xri, ctxp->wqeq->sli4_xritag); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 7389c130ffcc..8a606d0d2c79 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -12212,6 +12212,41 @@ void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba) } } +/** + * lpfc_sli4_nvme_xri_abort_event_proc - Process nvme xri abort event + * @phba: pointer to lpfc hba data structure. + * + * This routine is invoked by the worker thread to process all the pending + * SLI4 NVME abort XRI events. + **/ +void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba) +{ + struct lpfc_cq_event *cq_event; + + /* First, declare the fcp xri abort event has been handled */ + spin_lock_irq(&phba->hbalock); + phba->hba_flag &= ~NVME_XRI_ABORT_EVENT; + spin_unlock_irq(&phba->hbalock); + /* Now, handle all the fcp xri abort events */ + while (!list_empty(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue)) { + /* Get the first event from the head of the event queue */ + spin_lock_irq(&phba->hbalock); + list_remove_head(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue, + cq_event, struct lpfc_cq_event, list); + spin_unlock_irq(&phba->hbalock); + /* Notify aborted XRI for NVME work queue */ + if (phba->nvmet_support) { + lpfc_sli4_nvmet_xri_aborted(phba, + &cq_event->cqe.wcqe_axri); + } else { + lpfc_sli4_nvme_xri_aborted(phba, + &cq_event->cqe.wcqe_axri); + } + /* Free the event processed back to the free pool */ + lpfc_sli4_cq_event_release(phba, cq_event); + } +} + /** * lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event * @phba: pointer to lpfc hba data structure. @@ -12709,10 +12744,22 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba, spin_unlock_irqrestore(&phba->hbalock, iflags); workposted = true; break; + case LPFC_NVME: + spin_lock_irqsave(&phba->hbalock, iflags); + list_add_tail(&cq_event->list, + &phba->sli4_hba.sp_nvme_xri_aborted_work_queue); + /* Set the nvme xri abort event flag */ + phba->hba_flag |= NVME_XRI_ABORT_EVENT; + spin_unlock_irqrestore(&phba->hbalock, iflags); + workposted = true; + break; default: lpfc_printf_log(phba, KERN_ERR, LOG_SLI, - "0603 Invalid work queue CQE subtype (x%x)\n", - cq->subtype); + "0603 Invalid CQ subtype %d: " + "%08x %08x %08x %08x\n", + cq->subtype, wcqe->word0, wcqe->parameter, + wcqe->word2, wcqe->word3); + lpfc_sli4_cq_event_release(phba, cq_event); workposted = false; break; } diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 91153c9f6d18..710458cf11d6 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -642,6 +642,7 @@ struct lpfc_sli4_hba { struct list_head sp_asynce_work_queue; struct list_head sp_fcp_xri_aborted_work_queue; struct list_head sp_els_xri_aborted_work_queue; + struct list_head sp_nvme_xri_aborted_work_queue; struct list_head sp_unsol_work_queue; struct lpfc_sli4_link link_state; struct lpfc_sli4_lnk_info lnk_info; @@ -794,9 +795,14 @@ void lpfc_sli4_fcf_redisc_event_proc(struct lpfc_hba *); int lpfc_sli4_resume_rpi(struct lpfc_nodelist *, void (*)(struct lpfc_hba *, LPFC_MBOXQ_t *), void *); void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *); +void lpfc_sli4_nvme_xri_abort_event_proc(struct lpfc_hba *phba); void lpfc_sli4_els_xri_abort_event_proc(struct lpfc_hba *); void lpfc_sli4_fcp_xri_aborted(struct lpfc_hba *, struct sli4_wcqe_xri_aborted *); +void lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri); +void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, + struct sli4_wcqe_xri_aborted *axri); void lpfc_sli4_els_xri_aborted(struct lpfc_hba *, struct sli4_wcqe_xri_aborted *); void lpfc_sli4_vport_delete_els_xri_aborted(struct lpfc_vport *); From 96418b5e2c8867da3279d877f5d1ffabfe460c3d Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:31 -0800 Subject: [PATCH 0639/1299] scsi: lpfc: Fix eh_deadline setting for sli3 adapters. A previous change unilaterally removed the hba reset entry point from the sli3 host template. This was done to allow tape devices being used for back up from being removed. Why was this done ? When there was non-responding device on the fabric, the error escalation policy would escalate to the reset handler. When the reset handler was called, it would reset the adapter, dropping link, thus logging out and terminating all i/o's - on any target. If there was a tape device on the same adapter that wasn't in error, it would kill the tape i/o's, effectively killing the tape device state. With the reset point removed, the adapter reset avoided the fabric logout, allowing the other devices to continue to operate unaffected. A hack - yes. Hint: we really need a transport I_T nexus reset callback added to the eh process (in between the SCSI target reset and hba reset points), so a fc logout could occur to the one bad target only and stop the error escalation process. This patch commonizes the approach so it can be used for sli3 and sli4 adapters, but mandates the admin, via module parameter, specifically identify which adapters the resets are to be removed for. Additionally, bus_reset, which sends Target Reset TMFs to all targets, is also removed from the template as it too has the same effect as the adapter reset. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Laurence Oberman Tested-by: Laurence Oberman Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 1 + drivers/scsi/lpfc/lpfc_attr.c | 6 ++++ drivers/scsi/lpfc/lpfc_crtn.h | 4 ++- drivers/scsi/lpfc/lpfc_init.c | 61 +++++++++++++++++++++++++++++++++-- drivers/scsi/lpfc/lpfc_scsi.c | 3 +- 5 files changed, 69 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index de6cd57dc7f6..763f32dd2d23 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -99,6 +99,7 @@ struct lpfc_sli2_slim; #define FC_MAX_ADPTMSG 64 #define MAX_HBAEVT 32 +#define MAX_HBAS_NO_RESET 16 /* Number of MSI-X vectors the driver uses */ #define LPFC_MSIX_VECTORS 2 diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4114cf4308d0..b741dcb8ee64 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3010,6 +3010,12 @@ MODULE_PARM_DESC(lpfc_poll, "FCP ring polling mode control:" static DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR, lpfc_poll_show, lpfc_poll_store); +int lpfc_no_hba_reset_cnt; +unsigned long lpfc_no_hba_reset[MAX_HBAS_NO_RESET] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +module_param_array(lpfc_no_hba_reset, ulong, &lpfc_no_hba_reset_cnt, 0444); +MODULE_PARM_DESC(lpfc_no_hba_reset, "WWPN of HBAs that should not be reset"); + LPFC_ATTR(sli_mode, 0, 0, 3, "SLI mode selector:" " 0 - auto (SLI-3 if supported)," diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 843dd73004da..54e6ac42fbcd 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -384,7 +384,7 @@ void lpfc_free_sysfs_attr(struct lpfc_vport *); extern struct device_attribute *lpfc_hba_attrs[]; extern struct device_attribute *lpfc_vport_attrs[]; extern struct scsi_host_template lpfc_template; -extern struct scsi_host_template lpfc_template_s3; +extern struct scsi_host_template lpfc_template_no_hr; extern struct scsi_host_template lpfc_template_nvme; extern struct scsi_host_template lpfc_vport_template; extern struct fc_function_template lpfc_transport_functions; @@ -554,3 +554,5 @@ void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_wcqe_complete *abts_cmpl); extern int lpfc_enable_nvmet_cnt; extern unsigned long long lpfc_enable_nvmet[]; +extern int lpfc_no_hba_reset_cnt; +extern unsigned long lpfc_no_hba_reset[]; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a0cba631869e..4fa21a9fd883 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3555,6 +3555,44 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba) return rc; } +static uint64_t +lpfc_get_wwpn(struct lpfc_hba *phba) +{ + uint64_t wwn; + int rc; + LPFC_MBOXQ_t *mboxq; + MAILBOX_t *mb; + + + mboxq = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, + GFP_KERNEL); + if (!mboxq) + return (uint64_t)-1; + + /* First get WWN of HBA instance */ + lpfc_read_nv(phba, mboxq); + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + if (rc != MBX_SUCCESS) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "6019 Mailbox failed , mbxCmd x%x " + "READ_NV, mbxStatus x%x\n", + bf_get(lpfc_mqe_command, &mboxq->u.mqe), + bf_get(lpfc_mqe_status, &mboxq->u.mqe)); + mempool_free(mboxq, phba->mbox_mem_pool); + return (uint64_t) -1; + } + mb = &mboxq->u.mb; + memcpy(&wwn, (char *)mb->un.varRDnvp.portname, sizeof(uint64_t)); + /* wwn is WWPN of HBA instance */ + mempool_free(mboxq, phba->mbox_mem_pool); + if (phba->sli_rev == LPFC_SLI_REV4) + return be64_to_cpu(wwn); + else + return (((wwn & 0xffffffff00000000) >> 32) | + ((wwn & 0x00000000ffffffff) << 32)); + +} + /** * lpfc_sli4_nvme_sgl_update - update xri-sgl sizing and mapping * @phba: pointer to lpfc hba data structure. @@ -3676,17 +3714,32 @@ lpfc_create_port(struct lpfc_hba *phba, int instance, struct device *dev) struct lpfc_vport *vport; struct Scsi_Host *shost = NULL; int error = 0; + int i; + uint64_t wwn; + bool use_no_reset_hba = false; + + wwn = lpfc_get_wwpn(phba); + + for (i = 0; i < lpfc_no_hba_reset_cnt; i++) { + if (wwn == lpfc_no_hba_reset[i]) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "6020 Setting use_no_reset port=%llx\n", + wwn); + use_no_reset_hba = true; + break; + } + } if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { if (dev != &phba->pcidev->dev) { shost = scsi_host_alloc(&lpfc_vport_template, sizeof(struct lpfc_vport)); } else { - if (phba->sli_rev == LPFC_SLI_REV4) + if (!use_no_reset_hba) shost = scsi_host_alloc(&lpfc_template, sizeof(struct lpfc_vport)); else - shost = scsi_host_alloc(&lpfc_template_s3, + shost = scsi_host_alloc(&lpfc_template_no_hr, sizeof(struct lpfc_vport)); } } else if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { @@ -5472,7 +5525,8 @@ lpfc_sli_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the host templates the configured values. */ lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt; - lpfc_template_s3.sg_tablesize = phba->cfg_sg_seg_cnt; + lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt; + lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt; /* There are going to be 2 reserved BDEs: 1 FCP cmnd + 1 FCP rsp */ if (phba->cfg_enable_bg) { @@ -5693,6 +5747,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the host templates with the updated values. */ lpfc_vport_template.sg_tablesize = phba->cfg_sg_seg_cnt; lpfc_template.sg_tablesize = phba->cfg_sg_seg_cnt; + lpfc_template_no_hr.sg_tablesize = phba->cfg_sg_seg_cnt; if (phba->cfg_sg_dma_buf_size <= LPFC_MIN_SG_SLI4_BUF_SZ) phba->cfg_sg_dma_buf_size = LPFC_MIN_SG_SLI4_BUF_SZ; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 9d6384af9fce..bcfd6d6ab16d 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5953,7 +5953,7 @@ struct scsi_host_template lpfc_template_nvme = { .track_queue_depth = 0, }; -struct scsi_host_template lpfc_template_s3 = { +struct scsi_host_template lpfc_template_no_hr = { .module = THIS_MODULE, .name = LPFC_DRIVER_NAME, .proc_name = LPFC_DRIVER_NAME, @@ -6015,7 +6015,6 @@ struct scsi_host_template lpfc_vport_template = { .eh_abort_handler = lpfc_abort_handler, .eh_device_reset_handler = lpfc_device_reset_handler, .eh_target_reset_handler = lpfc_target_reset_handler, - .eh_bus_reset_handler = lpfc_bus_reset_handler, .slave_alloc = lpfc_slave_alloc, .slave_configure = lpfc_slave_configure, .slave_destroy = lpfc_slave_destroy, From 856984b71cefab1580e9439e5382db1247d689b0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:32 -0800 Subject: [PATCH 0640/1299] scsi: lpfc: add transport eh_timed_out reference Christoph's prior patch missed the template for the sli3 adapters, which is now the "no host reset" template. Add the transport eh_timed_out handler to the no host reset template Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_scsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index bcfd6d6ab16d..54fd0c81ceaf 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5959,6 +5959,7 @@ struct scsi_host_template lpfc_template_no_hr = { .proc_name = LPFC_DRIVER_NAME, .info = lpfc_info, .queuecommand = lpfc_queuecommand, + .eh_timed_out = fc_eh_timed_out, .eh_abort_handler = lpfc_abort_handler, .eh_device_reset_handler = lpfc_device_reset_handler, .eh_target_reset_handler = lpfc_target_reset_handler, From 166d721120c1cf768af11706c3e0411324bf138f Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:33 -0800 Subject: [PATCH 0641/1299] scsi: lpfc: Rework lpfc Kconfig for NVME options Reworked Kconfig so that lfpc only requires the scsi stack. NVME Initiator and NVME Target support can be enabled if the other NVMe subsystems have been enabled. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/Kconfig | 19 ++++++++++++++++--- drivers/scsi/lpfc/lpfc_nvme.c | 18 +++++++++++++++--- drivers/scsi/lpfc/lpfc_nvmet.c | 10 ++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 230043c1c90f..4bf55b5d78be 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1241,19 +1241,32 @@ config SCSI_LPFC tristate "Emulex LightPulse Fibre Channel Support" depends on PCI && SCSI depends on SCSI_FC_ATTRS - depends on NVME_FC && NVME_TARGET_FC select CRC_T10DIF - help + ---help--- This lpfc driver supports the Emulex LightPulse Family of Fibre Channel PCI host adapters. config SCSI_LPFC_DEBUG_FS bool "Emulex LightPulse Fibre Channel debugfs Support" depends on SCSI_LPFC && DEBUG_FS - help + ---help--- This makes debugging information from the lpfc driver available via the debugfs filesystem. +config LPFC_NVME_INITIATOR + bool "Emulex LightPulse Fibre Channel NVME Initiator Support" + depends on SCSI_LPFC && NVME_FC + ---help--- + This enables NVME Initiator support in the Emulex lpfc driver. + +config LPFC_NVME_TARGET + bool "Emulex LightPulse Fibre Channel NVME Initiator Support" + depends on SCSI_LPFC && NVME_TARGET_FC + ---help--- + This enables NVME Target support in the Emulex lpfc driver. + Target enablement must still be enabled on a per adapter + basis by module parameters. + config SCSI_SIM710 tristate "Simple 53c710 SCSI support (Compaq, NCR machines)" depends on (EISA || MCA) && SCSI diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index b9012fee1632..0a4c19081409 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2127,11 +2127,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd) int lpfc_nvme_create_localport(struct lpfc_vport *vport) { + int ret = 0; struct lpfc_hba *phba = vport->phba; struct nvme_fc_port_info nfcp_info; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; - int len, ret = 0; + int len; /* Initialize this localport instance. The vport wwn usage ensures * that NPIV is accounted for. @@ -2148,8 +2149,12 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) /* localport is allocated from the stack, but the registration * call allocates heap memory as well as the private area. */ +#ifdef CONFIG_LPFC_NVME_INITIATOR ret = nvme_fc_register_localport(&nfcp_info, &lpfc_nvme_template, &vport->phba->pcidev->dev, &localport); +#else + ret = -ENOMEM; +#endif if (!ret) { lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_NVME_DISC, "6005 Successfully registered local " @@ -2185,6 +2190,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) void lpfc_nvme_destroy_localport(struct lpfc_vport *vport) { +#ifdef CONFIG_LPFC_NVME_INITIATOR struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; struct lpfc_nvme_rport *rport = NULL, *rport_next = NULL; @@ -2200,7 +2206,6 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, "6011 Destroying NVME localport %p\n", localport); - list_for_each_entry_safe(rport, rport_next, &lport->rport_list, list) { /* The last node ref has to get released now before the rport * private memory area is released by the transport. @@ -2214,6 +2219,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) "6008 rport fail destroy %x\n", ret); wait_for_completion_timeout(&rport->rport_unreg_done, 5); } + /* lport's rport list is clear. Unregister * lport and release resources. */ @@ -2237,6 +2243,7 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) "Failed, status x%x\n", ret); } +#endif } void @@ -2267,6 +2274,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) int lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { +#ifdef CONFIG_LPFC_NVME_INITIATOR int ret = 0; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; @@ -2340,7 +2348,6 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) rpinfo.port_role |= FC_PORT_ROLE_NVME_INITIATOR; rpinfo.port_name = wwn_to_u64(ndlp->nlp_portname.u.wwn); rpinfo.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn); - ret = nvme_fc_register_remoteport(localport, &rpinfo, &remote_port); if (!ret) { @@ -2376,6 +2383,9 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) ndlp->nlp_type, ndlp->nlp_DID, ndlp); } return ret; +#else + return 0; +#endif } /* lpfc_nvme_unregister_port - unbind the DID and port_role from this rport. @@ -2393,6 +2403,7 @@ lpfc_nvme_register_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) void lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) { +#ifdef CONFIG_LPFC_NVME_INITIATOR int ret; struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; @@ -2450,6 +2461,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) return; input_err: +#endif lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, "6168: State error: lport %p, rport%p FCID x%06x\n", vport->localport, ndlp->rport, ndlp->nlp_DID); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 48ce9858bc11..a69ca6ea1d6c 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -671,9 +671,13 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; +#ifdef CONFIG_LPFC_NVME_TARGET error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, &phba->pcidev->dev, &phba->targetport); +#else + error = -ENOMEM; +#endif if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, "6025 Cannot register NVME targetport " @@ -752,6 +756,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) { +#ifdef CONFIG_LPFC_NVME_TARGET struct lpfc_nvmet_tgtport *tgtp; if (phba->nvmet_support == 0) @@ -763,6 +768,7 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) wait_for_completion_timeout(&tgtp->tport_unreg_done, 5); } phba->targetport = NULL; +#endif } /** @@ -782,6 +788,7 @@ static void lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct hbq_dmabuf *nvmebuf) { +#ifdef CONFIG_LPFC_NVME_TARGET struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct lpfc_nvmet_rcv_ctx *ctxp; @@ -862,6 +869,7 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, atomic_inc(&tgtp->xmt_ls_abort); lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid); +#endif } /** @@ -883,6 +891,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, struct rqb_dmabuf *nvmebuf, uint64_t isr_timestamp) { +#ifdef CONFIG_LPFC_NVME_TARGET struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; @@ -988,6 +997,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */ lpfc_nvmet_rq_post(phba, NULL, &nvmebuf->hbuf); } +#endif } /** From 43140ca68d1a071ddbe92f10a3256e01701ae390 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:34 -0800 Subject: [PATCH 0642/1299] scsi: lpfc: Rename LPFC_MAX_EQ_DELAY to LPFC_MAX_EQ_DELAY_EQID_CNT Without apriori understanding of what the define is, the name gives a very different impression of what it is (a max delay value for an EQ). Rename the define so it reflects what it is: the number of EQ IDs that can be set in one instance of the MODIFY_EQ_DELAY mbx command. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_attr.c | 3 ++- drivers/scsi/lpfc/lpfc_hw4.h | 4 ++-- drivers/scsi/lpfc/lpfc_init.c | 7 ++----- drivers/scsi/lpfc/lpfc_sli.c | 5 ++++- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index b741dcb8ee64..fbd3a563be53 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -4457,7 +4457,8 @@ lpfc_fcp_imax_store(struct device *dev, struct device_attribute *attr, return -EINVAL; phba->cfg_fcp_imax = (uint32_t)val; - for (i = 0; i < phba->io_channel_irqs; i++) + + for (i = 0; i < phba->io_channel_irqs; i += LPFC_MAX_EQ_DELAY_EQID_CNT) lpfc_modify_hba_eq_delay(phba, i); return strlen(buf); diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index cfdb068a3bfc..15277705cb6b 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -1001,7 +1001,7 @@ struct eq_delay_info { uint32_t phase; uint32_t delay_multi; }; -#define LPFC_MAX_EQ_DELAY 8 +#define LPFC_MAX_EQ_DELAY_EQID_CNT 8 struct sgl_page_pairs { uint32_t sgl_pg0_addr_lo; @@ -1070,7 +1070,7 @@ struct lpfc_mbx_modify_eq_delay { union { struct { uint32_t num_eq; - struct eq_delay_info eq[LPFC_MAX_EQ_DELAY]; + struct eq_delay_info eq[LPFC_MAX_EQ_DELAY_EQID_CNT]; } request; struct { uint32_t word0; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4fa21a9fd883..c883110178ea 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -8756,12 +8756,9 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) } } - /* - * Configure EQ delay multipier for interrupt coalescing using - * MODIFY_EQ_DELAY for all EQs created, LPFC_MAX_EQ_DELAY at a time. - */ - for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY) + for (qidx = 0; qidx < io_channel; qidx += LPFC_MAX_EQ_DELAY_EQID_CNT) lpfc_modify_hba_eq_delay(phba, qidx); + return 0; out_destroy: diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 8a606d0d2c79..618cdacf13dd 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -1,3 +1,4 @@ + /******************************************************************* * This file is part of the Emulex Linux Device Driver for * * Fibre Channel Host Bus Adapters. * @@ -13874,6 +13875,8 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) * @startq: The starting FCP EQ to modify * * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA. + * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be + * updated in one mailbox command. * * The @phba struct is used to send mailbox command to HBA. The @startq * is used to get the starting FCP EQ to change. @@ -13926,7 +13929,7 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq) eq_delay->u.request.eq[cnt].phase = 0; eq_delay->u.request.eq[cnt].delay_multi = dmult; cnt++; - if (cnt >= LPFC_MAX_EQ_DELAY) + if (cnt >= LPFC_MAX_EQ_DELAY_EQID_CNT) break; } eq_delay->u.request.num_eq = cnt; From da6b044a28fe603fe2c3fd908cda8150aa0abe74 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:35 -0800 Subject: [PATCH 0643/1299] scsi: lpfc: correct double print Correct a merge error that had debug data printed twice for the same element Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_debugfs.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 9f4798e9d938..913eed822cb8 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -3653,17 +3653,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf, idiag.ptr_private = phba->sli4_hba.nvmels_cq; goto pass_check; } - /* NVME LS complete queue */ - if (phba->sli4_hba.nvmels_cq && - phba->sli4_hba.nvmels_cq->queue_id == queid) { - /* Sanity check */ - rc = lpfc_idiag_que_param_check( - phba->sli4_hba.nvmels_cq, index, count); - if (rc) - goto error_out; - idiag.ptr_private = phba->sli4_hba.nvmels_cq; - goto pass_check; - } /* FCP complete queue */ if (phba->sli4_hba.fcp_cq) { for (qidx = 0; qidx < phba->cfg_fcp_io_channel; @@ -3738,17 +3727,6 @@ lpfc_idiag_queacc_write(struct file *file, const char __user *buf, idiag.ptr_private = phba->sli4_hba.nvmels_wq; goto pass_check; } - /* NVME LS work queue */ - if (phba->sli4_hba.nvmels_wq && - phba->sli4_hba.nvmels_wq->queue_id == queid) { - /* Sanity check */ - rc = lpfc_idiag_que_param_check( - phba->sli4_hba.nvmels_wq, index, count); - if (rc) - goto error_out; - idiag.ptr_private = phba->sli4_hba.nvmels_wq; - goto pass_check; - } /* FCP work queue */ if (phba->sli4_hba.fcp_wq) { for (qidx = 0; qidx < phba->cfg_fcp_io_channel; From ba3bd6e2a9a2753439a1fd1fe39e8d5162fb3aa9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:36 -0800 Subject: [PATCH 0644/1299] scsi: lpfc: remove dead sli3 nvme code Remove nvme teardown calls that should not be there on sli3 devices Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index c883110178ea..661bd25a404a 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -10446,12 +10446,7 @@ lpfc_pci_remove_one_s3(struct pci_dev *pdev) fc_remove_host(shost); scsi_remove_host(shost); - /* Perform ndlp cleanup on the physical port. The nvme and nvmet - * localports are destroyed after to cleanup all transport memory. - */ lpfc_cleanup(vport); - lpfc_nvmet_destroy_targetport(phba); - lpfc_nvme_destroy_localport(vport); /* * Bring down the SLI Layer. This step disable all interrupts, From cd46cdedb3238f1f878c42650ec05c6344c7083d Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:37 -0800 Subject: [PATCH 0645/1299] scsi: lpfc: correct rdp diag portnames NVME merge reverted diag port names to the physical port. They should be the vport. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_els.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d260a1391baf..d9c61d030034 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5177,15 +5177,15 @@ lpfc_rdp_res_speed(struct fc_rdp_port_speed_desc *desc, struct lpfc_hba *phba) static uint32_t lpfc_rdp_res_diag_port_names(struct fc_rdp_port_name_desc *desc, - struct lpfc_hba *phba) + struct lpfc_vport *vport) { desc->tag = cpu_to_be32(RDP_PORT_NAMES_DESC_TAG); - memcpy(desc->port_names.wwnn, phba->wwnn, + memcpy(desc->port_names.wwnn, &vport->fc_nodename, sizeof(desc->port_names.wwnn)); - memcpy(desc->port_names.wwpn, phba->wwpn, + memcpy(desc->port_names.wwpn, &vport->fc_portname, sizeof(desc->port_names.wwpn)); desc->length = cpu_to_be32(sizeof(desc->port_names)); @@ -5279,7 +5279,7 @@ lpfc_els_rdp_cmpl(struct lpfc_hba *phba, struct lpfc_rdp_context *rdp_context, len += lpfc_rdp_res_link_error((struct fc_rdp_link_error_status_desc *) (len + pcmd), &rdp_context->link_stat); len += lpfc_rdp_res_diag_port_names((struct fc_rdp_port_name_desc *) - (len + pcmd), phba); + (len + pcmd), vport); len += lpfc_rdp_res_attach_port_names((struct fc_rdp_port_name_desc *) (len + pcmd), vport, ndlp); len += lpfc_rdp_res_fec_desc((struct fc_fec_rdp_desc *)(len + pcmd), From 2ade92ae6d6572858acb2bde6d3664af3ad592e2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:38 -0800 Subject: [PATCH 0646/1299] scsi: lpfc: code cleanups in NVME initiator base This patch addresses the smatch issues identified by Dan Carpenter in http://www.spinics.net/lists/linux-scsi/msg105663.html The issues are: drivers/scsi/lpfc/lpfc_hbadisc.c:316 lpfc_dev_loss_tmo_handler() warn: we tested 'vport->load_flag & 2' before and it was 'false' Action: removed item from test drivers/scsi/lpfc/lpfc_hbadisc.c:701 lpfc_work_done() warn: test_bit() takes a bit number Action: changed definition so bit number drivers/scsi/lpfc/lpfc_hbadisc.c:2206 lpfc_mbx_cmpl_fcf_scan_read_fcf_rec() error: uninitialized symbol 'vlan_id'. drivers/scsi/lpfc/lpfc_hbadisc.c:2582 lpfc_mbx_cmpl_fcf_rr_read_fcf_rec() error: uninitialized symbol 'vlan_id'. drivers/scsi/lpfc/lpfc_hbadisc.c:2683 lpfc_mbx_cmpl_read_fcf_rec() error: uninitialized symbol 'vlan_id'. Action: initilized value drivers/scsi/lpfc/lpfc_hbadisc.c:4025 lpfc_register_remote_port() error: we previously assumed 'rdata' could be null (see line 4023) Action: refactored check block drivers/scsi/lpfc/lpfc_hbadisc.c:4613 lpfc_sli4_dequeue_nport_iocbs() error: double unlock 'irq:' Action: removed inner irq reference Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc.h | 2 +- drivers/scsi/lpfc/lpfc_hbadisc.c | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 763f32dd2d23..257bbdd0f0b8 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -105,7 +105,7 @@ struct lpfc_sli2_slim; #define LPFC_MSIX_VECTORS 2 /* lpfc wait event data ready flag */ -#define LPFC_DATA_READY (1<<0) +#define LPFC_DATA_READY 0 /* bit 0 */ /* queue dump line buffer size */ #define LPFC_LBUF_SZ 128 diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index bd8635d303d2..180b072beef6 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -313,8 +313,7 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp) ndlp->nlp_state, ndlp->nlp_rpi); } - if (!(vport->load_flag & FC_UNLOADING) && - !(ndlp->nlp_flag & NLP_DELAY_TMO) && + if (!(ndlp->nlp_flag & NLP_DELAY_TMO) && !(ndlp->nlp_flag & NLP_NPR_2B_DISC) && (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && (ndlp->nlp_state != NLP_STE_REG_LOGIN_ISSUE) && @@ -2175,7 +2174,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq) uint32_t boot_flag, addr_mode; uint16_t fcf_index, next_fcf_index; struct lpfc_fcf_rec *fcf_rec = NULL; - uint16_t vlan_id; + uint16_t vlan_id = LPFC_FCOE_NULL_VID; bool select_new_fcf; int rc; @@ -4022,9 +4021,11 @@ lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) rdata = rport->dd_data; /* break the link before dropping the ref */ ndlp->rport = NULL; - if (rdata && rdata->pnode == ndlp) - lpfc_nlp_put(ndlp); - rdata->pnode = NULL; + if (rdata) { + if (rdata->pnode == ndlp) + lpfc_nlp_put(ndlp); + rdata->pnode = NULL; + } /* drop reference for earlier registeration */ put_device(&rport->dev); } @@ -4607,9 +4608,9 @@ lpfc_sli4_dequeue_nport_iocbs(struct lpfc_hba *phba, pring = qp->pring; if (!pring) continue; - spin_lock_irq(&pring->ring_lock); + spin_lock(&pring->ring_lock); __lpfc_dequeue_nport_iocbs(phba, ndlp, pring, dequeue_list); - spin_unlock_irq(&pring->ring_lock); + spin_unlock(&pring->ring_lock); } spin_unlock_irq(&phba->hbalock); } From b5ccc7d61c76006f839b41c6f15876342b46cb02 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:39 -0800 Subject: [PATCH 0647/1299] scsi: lpfc: code cleanups in NVME initiator discovery This patch addresses the smatch issues identified by Dan Carpenter in http://www.spinics.net/lists/linux-scsi/msg105665.html The issues are: drivers/scsi/lpfc/lpfc_ct.c:943 lpfc_cmpl_ct_cmd_gft_id() error: we previously assumed 'ndlp' could be null (see line 928) Action: moved under if check drivers/scsi/lpfc/lpfc_nvmet.c:1694 lpfc_nvmet_unsol_issue_abort() error: we previously assumed 'ndlp' could be null (see line 1690) Action: conditionalized arg in printf stmt drivers/scsi/lpfc/lpfc_nvmet.c:1792 lpfc_nvmet_sol_fcp_issue_abort() error: we previously assumed 'ndlp' could be null (see line 1788) Action: conditionalized arg in printf stmt Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_ct.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index c22bb3f887e1..d3e9af983015 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -939,8 +939,8 @@ lpfc_cmpl_ct_cmd_gft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, "FC4 x%08x, Data: x%08x x%08x\n", ndlp, did, ndlp->nlp_fc4_type, FC_TYPE_FCP, FC_TYPE_NVME); + ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; } - ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE; lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE); lpfc_issue_els_prli(vport, ndlp, 0); } else diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index a69ca6ea1d6c..b7739a554fe0 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1720,7 +1720,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6134 Drop ABTS - wrong NDLP state x%x.\n", - ndlp->nlp_state); + (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); /* No failure to an ABTS request. */ return 0; @@ -1818,7 +1818,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6160 Drop ABTS - wrong NDLP state x%x.\n", - ndlp->nlp_state); + (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); /* No failure to an ABTS request. */ return 0; From eeb810849a20e32aa1b60335e46ea5446ba07e1f Mon Sep 17 00:00:00 2001 From: James Smart Date: Sat, 4 Mar 2017 09:30:40 -0800 Subject: [PATCH 0648/1299] scsi: lpfc: revise version number to 11.2.0.10 Revise lpfc version number to 11.2.0.10 Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index 86c6c9b26b82..d4e95e28f4e3 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "11.2.0.7" +#define LPFC_DRIVER_VERSION "11.2.0.10" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 69d468f4775b0da504140d0ed5e8f25c6ac44e7a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 21 Feb 2017 10:05:58 +1000 Subject: [PATCH 0649/1299] drm/nouveau/priv: punt messages to debug level Ideally we'd be able to keep these at a more obvious error level, as they're a good indication of us doing something wrong. However, NVIDIA's FECS/GPCCS firmware touches registers that trigger priv ring faults, and we can't do anything to fix that ourselves due to the need for them to be signed by NVIDIA. This issue was reported a while back, but hasn't been fixed, so, for now we will hide the messages to prevent spamming Optimus users with messages whenever the NVIDIA GPU is powered off and on again. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c | 6 +++--- drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c index 2c6b374f1420..d80dbc8f09b2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c @@ -30,7 +30,7 @@ gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 addr = nvkm_rd32(device, 0x122120 + (i * 0x0400)); u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0400)); - nvkm_error(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); + nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); nvkm_mask(device, 0x122128 + (i * 0x0400), 0x00000200, 0x00000000); } @@ -41,7 +41,7 @@ gf100_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 addr = nvkm_rd32(device, 0x124120 + (i * 0x0400)); u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0400)); - nvkm_error(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); + nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); nvkm_mask(device, 0x124128 + (i * 0x0400), 0x00000200, 0x00000000); } @@ -52,7 +52,7 @@ gf100_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 addr = nvkm_rd32(device, 0x128120 + (i * 0x0400)); u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0400)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0400)); - nvkm_error(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); + nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); nvkm_mask(device, 0x128128 + (i * 0x0400), 0x00000200, 0x00000000); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c index c673853f3213..9025ed1bd2a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk104.c @@ -30,7 +30,7 @@ gk104_ibus_intr_hub(struct nvkm_subdev *ibus, int i) u32 addr = nvkm_rd32(device, 0x122120 + (i * 0x0800)); u32 data = nvkm_rd32(device, 0x122124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x122128 + (i * 0x0800)); - nvkm_error(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); + nvkm_debug(ibus, "HUB%d: %06x %08x (%08x)\n", i, addr, data, stat); nvkm_mask(device, 0x122128 + (i * 0x0800), 0x00000200, 0x00000000); } @@ -41,7 +41,7 @@ gk104_ibus_intr_rop(struct nvkm_subdev *ibus, int i) u32 addr = nvkm_rd32(device, 0x124120 + (i * 0x0800)); u32 data = nvkm_rd32(device, 0x124124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x124128 + (i * 0x0800)); - nvkm_error(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); + nvkm_debug(ibus, "ROP%d: %06x %08x (%08x)\n", i, addr, data, stat); nvkm_mask(device, 0x124128 + (i * 0x0800), 0x00000200, 0x00000000); } @@ -52,7 +52,7 @@ gk104_ibus_intr_gpc(struct nvkm_subdev *ibus, int i) u32 addr = nvkm_rd32(device, 0x128120 + (i * 0x0800)); u32 data = nvkm_rd32(device, 0x128124 + (i * 0x0800)); u32 stat = nvkm_rd32(device, 0x128128 + (i * 0x0800)); - nvkm_error(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); + nvkm_debug(ibus, "GPC%d: %06x %08x (%08x)\n", i, addr, data, stat); nvkm_mask(device, 0x128128 + (i * 0x0800), 0x00000200, 0x00000000); } From ba735d061dcbbb061721f302f1ef519c547d5bab Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 15 Dec 2016 14:40:25 +0900 Subject: [PATCH 0650/1299] drm/nouveau/secboot: make nvkm_secboot_falcon_name visible Make nvkm_secboot_falcon_name publicly visible as other subdevs will need to use it for debug messages. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h | 2 ++ drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h index 5dbd8aa4f8c2..d03a1b086723 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h @@ -34,6 +34,8 @@ enum nvkm_secboot_falcon { NVKM_SECBOOT_FALCON_INVALID = 0xffffffff, }; +extern const char *nvkm_secboot_falcon_name[]; + /** * @wpr_set: whether the WPR region is currently set */ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h index 936a65f5658c..430b88ec74cf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h @@ -33,8 +33,6 @@ struct nvkm_secboot_func { int (*run_blob)(struct nvkm_secboot *, struct nvkm_gpuobj *); }; -extern const char *nvkm_secboot_falcon_name[]; - int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_acr *, struct nvkm_device *, int, struct nvkm_secboot *); int nvkm_secboot_falcon_reset(struct nvkm_secboot *); From 485a20eff20a2cbc55c9061724356cae3df01781 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 17 Nov 2016 16:37:29 +0900 Subject: [PATCH 0651/1299] drm/nouveau/pmu: make sure the reset hook exists before running it Some PMU implementations (in particular the ones managed by secure boot) may not have a reset() hook. Make sure we don't crash in that case. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c index a73f690eb4b5..ad4ac4e8b29e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c @@ -85,7 +85,8 @@ nvkm_pmu_reset(struct nvkm_pmu *pmu) ); /* Reset. */ - pmu->func->reset(pmu); + if (pmu->func->reset) + pmu->func->reset(pmu); /* Wait for IMEM/DMEM scrubbing to be complete. */ nvkm_msec(device, 2000, From 6bd4b5233d2f0c14344d5a1ae4be8961c4f0edfa Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 14:59:56 +0900 Subject: [PATCH 0652/1299] drm/nouveau/falcon: add missing context binding memory target This is not used currently, but is added for the sake of completeness. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/v1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index b537f111f39c..e11fafd02aa1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -138,6 +138,7 @@ nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) /* Set context */ switch (nvkm_memory_target(ctx->memory)) { case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break; + case NVKM_MEM_TARGET_HOST: inst_loc = 2; break; case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break; default: WARN_ON(1); From e444de56bc60ce0a388e328bcf86dcd0baabc32e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jan 2017 12:11:15 +0900 Subject: [PATCH 0653/1299] drm/nouveau/falcon: protect against concurrent DMEM accesses The falcon library may be used concurrently, especially after the introduction of the msgqueue interface. Make it safe to use it that way. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h | 1 + drivers/gpu/drm/nouveau/nvkm/falcon/base.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h index 7e498e65b1e8..9384dff81494 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h @@ -19,6 +19,7 @@ struct nvkm_falcon { u32 addr; struct mutex mutex; + struct mutex dmem_mutex; const struct nvkm_subdev *user; u8 version; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index 4852f313762f..ee25fdc21e1f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -41,14 +41,22 @@ void nvkm_falcon_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start, u32 size, u8 port) { + mutex_lock(&falcon->dmem_mutex); + falcon->func->load_dmem(falcon, data, start, size, port); + + mutex_unlock(&falcon->dmem_mutex); } void nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port, void *data) { + mutex_lock(&falcon->dmem_mutex); + falcon->func->read_dmem(falcon, start, size, port, data); + + mutex_unlock(&falcon->dmem_mutex); } void @@ -166,6 +174,7 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func, falcon->name = name; falcon->addr = addr; mutex_init(&falcon->mutex); + mutex_init(&falcon->dmem_mutex); reg = nvkm_falcon_rd32(falcon, 0x12c); falcon->version = reg & 0xf; From ca179c852a8befe76221d64dc47105726f64f065 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 14 Feb 2017 15:52:54 +0900 Subject: [PATCH 0654/1299] drm/nouveau/falcon: fix port offset for DMEM register DMEM registers are replicated with a stride of 8 bytes. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/v1.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index e11fafd02aa1..0f5f0f6213b2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -72,18 +72,19 @@ nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start, size -= rem; - nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 24)); + nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), start | (0x1 << 24)); for (i = 0; i < size / 4; i++) - nvkm_falcon_wr32(falcon, 0x1c4, ((u32 *)data)[i]); + nvkm_falcon_wr32(falcon, 0x1c4 + (port * 8), ((u32 *)data)[i]); /* - * If size is not a multiple of 4, mask the last work to ensure garbage - * does not get read + * If size is not a multiple of 4, mask the last word to ensure garbage + * does not get written */ if (rem) { u32 extra = ((u32 *)data)[i]; - nvkm_falcon_wr32(falcon, 0x1c4, extra & (BIT(rem * 8) - 1)); + nvkm_falcon_wr32(falcon, 0x1c4 + (port * 8), + extra & (BIT(rem * 8) - 1)); } } @@ -96,16 +97,16 @@ nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, size -= rem; - nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 25)); + nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), start | (0x1 << 25)); for (i = 0; i < size / 4; i++) - ((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0x1c4); + ((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0x1c4 + (port * 8)); /* - * If size is not a multiple of 4, mask the last work to ensure garbage + * If size is not a multiple of 4, mask the last word to ensure garbage * does not get read */ if (rem) { - u32 extra = nvkm_falcon_rd32(falcon, 0x1c4); + u32 extra = nvkm_falcon_rd32(falcon, 0x1c4 + (port * 8)); for (i = size; i < size + rem; i++) { ((u8 *)data)[i] = (u8)(extra & 0xff); From 17c602e3760da48697ce8e1fb7d992629fa7e876 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 14 Feb 2017 15:55:23 +0900 Subject: [PATCH 0655/1299] drm/nouveau/falcon: fix IMEM port access All IMEM registers are duplicated per port. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/v1.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index 0f5f0f6213b2..7085432dc861 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -40,8 +40,8 @@ nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start, for (i = 0; i < size / 4; i++) { /* write new tag every 256B */ if ((i & 0x3f) == 0) - nvkm_falcon_wr32(falcon, 0x188, tag++); - nvkm_falcon_wr32(falcon, 0x184, ((u32 *)data)[i]); + nvkm_falcon_wr32(falcon, 0x188 + (port * 16), tag++); + nvkm_falcon_wr32(falcon, 0x184 + (port * 16), ((u32 *)data)[i]); } /* @@ -53,14 +53,15 @@ nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start, /* write new tag every 256B */ if ((i & 0x3f) == 0) - nvkm_falcon_wr32(falcon, 0x188, tag++); - nvkm_falcon_wr32(falcon, 0x184, extra & (BIT(rem * 8) - 1)); + nvkm_falcon_wr32(falcon, 0x188 + (port * 16), tag++); + nvkm_falcon_wr32(falcon, 0x184 + (port * 16), + extra & (BIT(rem * 8) - 1)); ++i; } /* code must be padded to 0x40 words */ for (; i & 0x3f; i++) - nvkm_falcon_wr32(falcon, 0x184, 0); + nvkm_falcon_wr32(falcon, 0x184 + (port * 16), 0); } static void From 489a5fe868f1d1b09d1c1337424c09e686437c3c Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 16 Dec 2016 18:00:44 +0900 Subject: [PATCH 0656/1299] drm/nouveau/secboot: remove unused hook Remove a leftover that became obsolete with the falcon interface. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/subdev/secboot/acr.h | 2 -- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 27 ------------------- 2 files changed, 29 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h index 97795b342b6f..58eb20a69e9c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h @@ -41,8 +41,6 @@ struct nvkm_acr_func { struct nvkm_gpuobj *, u64); int (*reset)(struct nvkm_acr *, struct nvkm_secboot *, enum nvkm_secboot_falcon); - int (*start)(struct nvkm_acr *, struct nvkm_secboot *, - enum nvkm_secboot_falcon); }; /** diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 1aa37ea18580..b1e565237575 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -823,32 +823,6 @@ acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb, return 0; } -static int -acr_r352_start(struct nvkm_acr *_acr, struct nvkm_secboot *sb, - enum nvkm_secboot_falcon falcon) -{ - struct acr_r352 *acr = acr_r352(_acr); - const struct nvkm_subdev *subdev = &sb->subdev; - int base; - - switch (falcon) { - case NVKM_SECBOOT_FALCON_FECS: - base = 0x409000; - break; - case NVKM_SECBOOT_FALCON_GPCCS: - base = 0x41a000; - break; - default: - nvkm_error(subdev, "cannot start unhandled falcon!\n"); - return -EINVAL; - } - - nvkm_wr32(subdev->device, base + 0x130, 0x00000002); - acr->falcon_state[falcon] = RUNNING; - - return 0; -} - static int acr_r352_fini(struct nvkm_acr *_acr, struct nvkm_secboot *sb, bool suspend) { @@ -906,7 +880,6 @@ acr_r352_base_func = { .fini = acr_r352_fini, .load = acr_r352_load, .reset = acr_r352_reset, - .start = acr_r352_start, }; struct nvkm_acr * From a335f078df6006bd7694a1480e7dfbcf7e0de00c Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 23 Jan 2017 12:48:09 +0900 Subject: [PATCH 0657/1299] drm/nouveau/secboot: make sure requested falcons are supported Check at contruction time that we have support for all the LS firmwares asked by the caller. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index b1e565237575..372d29521fd8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -888,6 +888,13 @@ acr_r352_new_(const struct acr_r352_func *func, unsigned long managed_falcons) { struct acr_r352 *acr; + int i; + + /* Check that all requested falcons are supported */ + for_each_set_bit(i, &managed_falcons, NVKM_SECBOOT_FALCON_END) { + if (!func->ls_func[i]) + return ERR_PTR(-ENOTSUPP); + } acr = kzalloc(sizeof(*acr), GFP_KERNEL); if (!acr) From 3e8fbe3191d1fd94d4b25f72d383c4db7309ca4a Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 31 Jan 2017 18:16:08 +0900 Subject: [PATCH 0658/1299] drm/nouveau/secboot: fix WPR address to be 64-bit The WPR address parameter of the ls_write_wpr hook was defined as a u32, which will very likely overflow on boards with more than 4GB VRAM. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 372d29521fd8..32de99196ec0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -341,7 +341,7 @@ acr_r352_ls_fill_headers(struct acr_r352 *acr, struct list_head *imgs) */ int acr_r352_ls_write_wpr(struct acr_r352 *acr, struct list_head *imgs, - struct nvkm_gpuobj *wpr_blob, u32 wpr_addr) + struct nvkm_gpuobj *wpr_blob, u64 wpr_addr) { struct ls_ucode_img *_img; u32 pos = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index ad5923b0fd3c..a5ec71aecaa2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -185,7 +185,7 @@ struct acr_r352_func { enum nvkm_secboot_falcon); int (*ls_fill_headers)(struct acr_r352 *, struct list_head *); int (*ls_write_wpr)(struct acr_r352 *, struct list_head *, - struct nvkm_gpuobj *, u32); + struct nvkm_gpuobj *, u64); const struct acr_r352_ls_func *ls_func[NVKM_SECBOOT_FALCON_END]; }; @@ -245,6 +245,6 @@ struct ls_ucode_img *acr_r352_ls_ucode_img_load(const struct acr_r352 *, enum nvkm_secboot_falcon); int acr_r352_ls_fill_headers(struct acr_r352 *, struct list_head *); int acr_r352_ls_write_wpr(struct acr_r352 *, struct list_head *, - struct nvkm_gpuobj *, u32); + struct nvkm_gpuobj *, u64); #endif From 098ee77224bc76026e736e05407ff46c78f13d22 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 14:55:56 +0900 Subject: [PATCH 0659/1299] drm/nouveau/secboot: fix WPR region alignment A WPR region smaller than 256K will result in secure boot failure. Adjust the minimal size. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 32de99196ec0..205bf453ad47 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -381,8 +381,8 @@ acr_r352_ls_write_wpr(struct acr_r352 *acr, struct list_head *imgs, return 0; } -/* Both size and address of WPR need to be 128K-aligned */ -#define WPR_ALIGNMENT 0x20000 +/* Both size and address of WPR need to be 256K-aligned */ +#define WPR_ALIGNMENT 0x40000 /** * acr_r352_prepare_ls_blob() - prepare the LS blob * From 913b97f94478db1c93cc5a1f07fa03ee4e2d5f8b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 24 Jan 2017 19:29:39 +0900 Subject: [PATCH 0660/1299] drm/nouveau/secboot: prevent address trimming Using 32-bit integers would trim the WPR address if it is allocated above 4GB. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 205bf453ad47..424179589bc4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -502,8 +502,8 @@ acr_r352_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb, /* WPR region information if WPR is not fixed */ if (sb->wpr_size == 0) { - u32 wpr_start = ls_blob->addr; - u32 wpr_end = wpr_start + ls_blob->size; + u64 wpr_start = ls_blob->addr; + u64 wpr_end = wpr_start + ls_blob->size; desc->wpr_region_id = 1; desc->regions.no_regions = 2; From 5c4e0602d692fee7310cf468157d6056f61196eb Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 15 Nov 2016 16:29:02 +0900 Subject: [PATCH 0661/1299] drm/nouveau/secboot: fix usage of hsf_load_header Offsets were not properly computed. This went unnoticed because we are only using one app for now. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 6 ++-- .../nouveau/nvkm/subdev/secboot/acr_r352.h | 33 ++++++++++++++----- .../nouveau/nvkm/subdev/secboot/acr_r361.c | 4 +-- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 424179589bc4..f7946ed01acb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -533,8 +533,8 @@ acr_r352_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc, bl_desc->code_dma_base = lower_32_bits(addr_code); bl_desc->non_sec_code_off = hdr->non_sec_code_off; bl_desc->non_sec_code_size = hdr->non_sec_code_size; - bl_desc->sec_code_off = hdr->app[0].sec_code_off; - bl_desc->sec_code_size = hdr->app[0].sec_code_size; + bl_desc->sec_code_off = hsf_load_header_app_off(hdr, 0); + bl_desc->sec_code_size = hsf_load_header_app_size(hdr, 0); bl_desc->code_entry_point = 0; bl_desc->data_dma_base = lower_32_bits(addr_data); bl_desc->data_size = hdr->data_size; @@ -589,7 +589,7 @@ acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb, goto cleanup; } memcpy(load_header, load_hdr, sizeof(*load_header) + - (sizeof(load_hdr->app[0]) * load_hdr->num_apps)); + (sizeof(load_hdr->apps[0]) * 2 * load_hdr->num_apps)); /* Create ACR blob and copy HS data to it */ ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256), diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index a5ec71aecaa2..564413c96f55 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -133,11 +133,6 @@ struct ls_ucode_img_r352 { * HS blob structures */ -struct hsf_load_header_app { - u32 sec_code_off; - u32 sec_code_size; -}; - /** * struct hsf_load_header - HS firmware load header */ @@ -147,9 +142,31 @@ struct hsf_load_header { u32 data_dma_base; u32 data_size; u32 num_apps; - struct hsf_load_header_app app[0]; + /* + * Organized as follows: + * - app0_code_off + * - app1_code_off + * - ... + * - appn_code_off + * - app0_code_size + * - app1_code_size + * - ... + */ + u32 apps[0]; }; +static inline u32 +hsf_load_header_app_off(const struct hsf_load_header *hdr, u32 app) +{ + return hdr->apps[app]; +} + +static inline u32 +hsf_load_header_app_size(const struct hsf_load_header *hdr, u32 app) +{ + return hdr->apps[hdr->num_apps + app]; +} + /** * struct acr_r352_ls_func - manages a single LS firmware * @@ -204,14 +221,14 @@ struct acr_r352 { struct nvkm_gpuobj *load_blob; struct { struct hsf_load_header load_bl_header; - struct hsf_load_header_app __load_apps[ACR_R352_MAX_APPS]; + u32 __load_apps[ACR_R352_MAX_APPS * 2]; }; /* HS FW - unlock WPR region (dGPU only) */ struct nvkm_gpuobj *unload_blob; struct { struct hsf_load_header unload_bl_header; - struct hsf_load_header_app __unload_apps[ACR_R352_MAX_APPS]; + u32 __unload_apps[ACR_R352_MAX_APPS * 2]; }; /* HS bootloader */ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c index f0aff1d98474..ebb185e0b4a8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c @@ -94,8 +94,8 @@ acr_r361_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc, bl_desc->code_dma_base = u64_to_flcn64(offset); bl_desc->non_sec_code_off = hdr->non_sec_code_off; bl_desc->non_sec_code_size = hdr->non_sec_code_size; - bl_desc->sec_code_off = hdr->app[0].sec_code_off; - bl_desc->sec_code_size = hdr->app[0].sec_code_size; + bl_desc->sec_code_off = hsf_load_header_app_off(hdr, 0); + bl_desc->sec_code_size = hsf_load_header_app_size(hdr, 0); bl_desc->code_entry_point = 0; bl_desc->data_dma_base = u64_to_flcn64(offset + hdr->data_dma_base); bl_desc->data_size = hdr->data_size; From 48eee549da0cd95456f23489fe9830a69c3565df Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 15 Nov 2016 15:02:27 +0900 Subject: [PATCH 0662/1299] drm/nouveau/secboot: store ucode offset in base image structure This allows the bootloader descriptor generation code to not rely on specialized ls_ucode_img structures, making it reusable in other instances. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 9 ++++----- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c | 7 +++---- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h | 2 ++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index f7946ed01acb..8f32d113f5af 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -95,15 +95,14 @@ struct acr_r352_flcn_bl_desc { */ static void acr_r352_generate_flcn_bl_desc(const struct nvkm_acr *acr, - const struct ls_ucode_img *_img, u64 wpr_addr, + const struct ls_ucode_img *img, u64 wpr_addr, void *_desc) { - struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img); struct acr_r352_flcn_bl_desc *desc = _desc; - const struct ls_ucode_img_desc *pdesc = &_img->ucode_desc; + const struct ls_ucode_img_desc *pdesc = &img->ucode_desc; u64 base, addr_code, addr_data; - base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset; + base = wpr_addr + img->ucode_off + pdesc->app_start_offset; addr_code = (base + pdesc->app_resident_code_offset) >> 8; addr_data = (base + pdesc->app_resident_data_offset) >> 8; @@ -255,7 +254,7 @@ acr_r352_ls_img_fill_headers(struct acr_r352 *acr, * image size */ offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN); - lhdr->ucode_off = offset; + _img->ucode_off = lhdr->ucode_off = offset; offset += _img->ucode_size; /* diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c index ebb185e0b4a8..ea6230cfb870 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c @@ -63,15 +63,14 @@ struct acr_r361_flcn_bl_desc { static void acr_r361_generate_flcn_bl_desc(const struct nvkm_acr *acr, - const struct ls_ucode_img *_img, u64 wpr_addr, + const struct ls_ucode_img *img, u64 wpr_addr, void *_desc) { - struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img); struct acr_r361_flcn_bl_desc *desc = _desc; - const struct ls_ucode_img_desc *pdesc = &img->base.ucode_desc; + const struct ls_ucode_img_desc *pdesc = &img->ucode_desc; u64 base, addr_code, addr_data; - base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset; + base = wpr_addr + img->ucode_off + pdesc->app_start_offset; addr_code = base + pdesc->app_resident_code_offset; addr_data = base + pdesc->app_resident_data_offset; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h index 00886cee57eb..181c0d80d194 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h @@ -83,6 +83,7 @@ struct ls_ucode_img_desc { * @ucode_desc: loaded or generated map of ucode_data * @ucode_data: firmware payload (code and data) * @ucode_size: size in bytes of data in ucode_data + * @ucode_off: offset of the ucode in ucode_data * @sig: signature for this firmware * @sig:size: size of the signature in bytes * @@ -97,6 +98,7 @@ struct ls_ucode_img { struct ls_ucode_img_desc ucode_desc; u8 *ucode_data; u32 ucode_size; + u32 ucode_off; u8 *sig; u32 sig_size; From f7152a232c6f72c196ec6c64ae5ebb040396ec31 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 20 Jan 2017 11:59:23 +0900 Subject: [PATCH 0663/1299] drm/nouveau/secboot: make specialized ls_ucode_img struct private This structure does not need to be shared anymore. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 90 +++++++++++++++++ .../nouveau/nvkm/subdev/secboot/acr_r352.h | 96 ------------------- 2 files changed, 90 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 8f32d113f5af..60c7b9e0fc47 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -165,6 +165,96 @@ struct hsflcn_acr_desc { * Low-secure blob creation */ +/** + * struct acr_r352_lsf_lsb_header - LS firmware header + * @signature: signature to verify the firmware against + * @ucode_off: offset of the ucode blob in the WPR region. The ucode + * blob contains the bootloader, code and data of the + * LS falcon + * @ucode_size: size of the ucode blob, including bootloader + * @data_size: size of the ucode blob data + * @bl_code_size: size of the bootloader code + * @bl_imem_off: offset in imem of the bootloader + * @bl_data_off: offset of the bootloader data in WPR region + * @bl_data_size: size of the bootloader data + * @app_code_off: offset of the app code relative to ucode_off + * @app_code_size: size of the app code + * @app_data_off: offset of the app data relative to ucode_off + * @app_data_size: size of the app data + * @flags: flags for the secure bootloader + * + * This structure is written into the WPR region for each managed falcon. Each + * instance is referenced by the lsb_offset member of the corresponding + * lsf_wpr_header. + */ +struct acr_r352_lsf_lsb_header { + /** + * LS falcon signatures + * @prd_keys: signature to use in production mode + * @dgb_keys: signature to use in debug mode + * @b_prd_present: whether the production key is present + * @b_dgb_present: whether the debug key is present + * @falcon_id: ID of the falcon the ucode applies to + */ + struct { + u8 prd_keys[2][16]; + u8 dbg_keys[2][16]; + u32 b_prd_present; + u32 b_dbg_present; + u32 falcon_id; + } signature; + u32 ucode_off; + u32 ucode_size; + u32 data_size; + u32 bl_code_size; + u32 bl_imem_off; + u32 bl_data_off; + u32 bl_data_size; + u32 app_code_off; + u32 app_code_size; + u32 app_data_off; + u32 app_data_size; + u32 flags; +}; + +/** + * struct acr_r352_lsf_wpr_header - LS blob WPR Header + * @falcon_id: LS falcon ID + * @lsb_offset: offset of the lsb_lsf_header in the WPR region + * @bootstrap_owner: secure falcon reponsible for bootstrapping the LS falcon + * @lazy_bootstrap: skip bootstrapping by ACR + * @status: bootstrapping status + * + * An array of these is written at the beginning of the WPR region, one for + * each managed falcon. The array is terminated by an instance which falcon_id + * is LSF_FALCON_ID_INVALID. + */ +struct acr_r352_lsf_wpr_header { + u32 falcon_id; + u32 lsb_offset; + u32 bootstrap_owner; + u32 lazy_bootstrap; + u32 status; +#define LSF_IMAGE_STATUS_NONE 0 +#define LSF_IMAGE_STATUS_COPY 1 +#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED 2 +#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED 3 +#define LSF_IMAGE_STATUS_VALIDATION_DONE 4 +#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED 5 +#define LSF_IMAGE_STATUS_BOOTSTRAP_READY 6 +}; + +/** + * struct ls_ucode_img_r352 - ucode image augmented with r352 headers + */ +struct ls_ucode_img_r352 { + struct ls_ucode_img base; + + struct acr_r352_lsf_wpr_header wpr_header; + struct acr_r352_lsf_lsb_header lsb_header; +}; +#define ls_ucode_img_r352(i) container_of(i, struct ls_ucode_img_r352, base) + /** * ls_ucode_img_load() - create a lsf_ucode_img and load it */ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index 564413c96f55..6634fb0d982d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -29,105 +29,9 @@ struct ls_ucode_img; #define ACR_R352_MAX_APPS 8 -/* - * - * LS blob structures - * - */ - -/** - * struct acr_r352_lsf_lsb_header - LS firmware header - * @signature: signature to verify the firmware against - * @ucode_off: offset of the ucode blob in the WPR region. The ucode - * blob contains the bootloader, code and data of the - * LS falcon - * @ucode_size: size of the ucode blob, including bootloader - * @data_size: size of the ucode blob data - * @bl_code_size: size of the bootloader code - * @bl_imem_off: offset in imem of the bootloader - * @bl_data_off: offset of the bootloader data in WPR region - * @bl_data_size: size of the bootloader data - * @app_code_off: offset of the app code relative to ucode_off - * @app_code_size: size of the app code - * @app_data_off: offset of the app data relative to ucode_off - * @app_data_size: size of the app data - * @flags: flags for the secure bootloader - * - * This structure is written into the WPR region for each managed falcon. Each - * instance is referenced by the lsb_offset member of the corresponding - * lsf_wpr_header. - */ -struct acr_r352_lsf_lsb_header { - /** - * LS falcon signatures - * @prd_keys: signature to use in production mode - * @dgb_keys: signature to use in debug mode - * @b_prd_present: whether the production key is present - * @b_dgb_present: whether the debug key is present - * @falcon_id: ID of the falcon the ucode applies to - */ - struct { - u8 prd_keys[2][16]; - u8 dbg_keys[2][16]; - u32 b_prd_present; - u32 b_dbg_present; - u32 falcon_id; - } signature; - u32 ucode_off; - u32 ucode_size; - u32 data_size; - u32 bl_code_size; - u32 bl_imem_off; - u32 bl_data_off; - u32 bl_data_size; - u32 app_code_off; - u32 app_code_size; - u32 app_data_off; - u32 app_data_size; - u32 flags; #define LSF_FLAG_LOAD_CODE_AT_0 1 #define LSF_FLAG_DMACTL_REQ_CTX 4 #define LSF_FLAG_FORCE_PRIV_LOAD 8 -}; - -/** - * struct acr_r352_lsf_wpr_header - LS blob WPR Header - * @falcon_id: LS falcon ID - * @lsb_offset: offset of the lsb_lsf_header in the WPR region - * @bootstrap_owner: secure falcon reponsible for bootstrapping the LS falcon - * @lazy_bootstrap: skip bootstrapping by ACR - * @status: bootstrapping status - * - * An array of these is written at the beginning of the WPR region, one for - * each managed falcon. The array is terminated by an instance which falcon_id - * is LSF_FALCON_ID_INVALID. - */ -struct acr_r352_lsf_wpr_header { - u32 falcon_id; - u32 lsb_offset; - u32 bootstrap_owner; - u32 lazy_bootstrap; - u32 status; -#define LSF_IMAGE_STATUS_NONE 0 -#define LSF_IMAGE_STATUS_COPY 1 -#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED 2 -#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED 3 -#define LSF_IMAGE_STATUS_VALIDATION_DONE 4 -#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED 5 -#define LSF_IMAGE_STATUS_BOOTSTRAP_READY 6 -}; - -/** - * struct ls_ucode_img_r352 - ucode image augmented with r352 headers - */ -struct ls_ucode_img_r352 { - struct ls_ucode_img base; - - struct acr_r352_lsf_wpr_header wpr_header; - struct acr_r352_lsf_lsb_header lsb_header; -}; -#define ls_ucode_img_r352(i) container_of(i, struct ls_ucode_img_r352, base) - /* * HS blob structures From 9bb55bb79f480d6422db55f0ed5052d3b0a2d22e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 1 Nov 2016 15:05:03 +0900 Subject: [PATCH 0664/1299] drm/nouveau/secboot: abstract fixup_hs_desc function As different firmare versions use different HS descriptor formats, we need to abstract this part as well. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 8 +++++--- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h | 3 +++ drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 60c7b9e0fc47..e6edde0bdf90 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -583,10 +583,11 @@ acr_r352_hsf_patch_signature(struct nvkm_secboot *sb, void *acr_image) memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size); } -static void +void acr_r352_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb, - struct hsflcn_acr_desc *desc) + void *_desc) { + struct hsflcn_acr_desc *desc = _desc; struct nvkm_gpuobj *ls_blob = acr->ls_blob; /* WPR region information if WPR is not fixed */ @@ -668,7 +669,7 @@ acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb, struct hsflcn_acr_desc *desc; desc = acr_data + load_hdr->data_dma_base; - acr_r352_fixup_hs_desc(acr, sb, desc); + acr->func->fixup_hs_desc(acr, sb, desc); } if (load_hdr->num_apps > ACR_R352_MAX_APPS) { @@ -952,6 +953,7 @@ acr_r352_ls_gpccs_func = { const struct acr_r352_func acr_r352_func = { + .fixup_hs_desc = acr_r352_fixup_hs_desc, .generate_hs_bl_desc = acr_r352_generate_hs_bl_desc, .hs_bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc), .ls_ucode_img_load = acr_r352_ls_ucode_img_load, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index 6634fb0d982d..3e86d45eabd9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -100,6 +100,7 @@ struct acr_r352; struct acr_r352_func { void (*generate_hs_bl_desc)(const struct hsf_load_header *, void *, u64); + void (*fixup_hs_desc)(struct acr_r352 *, struct nvkm_secboot *, void *); u32 hs_bl_desc_size; struct ls_ucode_img *(*ls_ucode_img_load)(const struct acr_r352 *, @@ -168,4 +169,6 @@ int acr_r352_ls_fill_headers(struct acr_r352 *, struct list_head *); int acr_r352_ls_write_wpr(struct acr_r352 *, struct list_head *, struct nvkm_gpuobj *, u64); +void acr_r352_fixup_hs_desc(struct acr_r352 *, struct nvkm_secboot *, void *); + #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c index ea6230cfb870..8561037d4cb7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c @@ -118,6 +118,7 @@ acr_r361_ls_gpccs_func = { const struct acr_r352_func acr_r361_func = { + .fixup_hs_desc = acr_r352_fixup_hs_desc, .generate_hs_bl_desc = acr_r361_generate_hs_bl_desc, .hs_bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc), .ls_ucode_img_load = acr_r352_ls_ucode_img_load, From 0117b3369f669e6f6ab9cc408358ba745dfc6d26 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 26 Oct 2016 15:15:42 +0900 Subject: [PATCH 0665/1299] drm/nouveau/secboot: add LS firmware post-run hooks Add the ability for LS firmwares to declare a post-run hook that is invoked right after the HS firmware is executed. This allows them to e.g. write some initialization data into the falcon's DMEM. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 11 +++++++++++ .../gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index e6edde0bdf90..0a35d35e2f46 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -852,6 +852,8 @@ acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb) static int acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) { + unsigned long managed_falcons = acr->base.managed_falcons; + int falcon_id; int ret; if (sb->wpr_set) @@ -872,6 +874,15 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) sb->wpr_set = true; + /* Run LS firmwares post_run hooks */ + for_each_set_bit(falcon_id, &managed_falcons, NVKM_SECBOOT_FALCON_END) { + const struct acr_r352_ls_func *func = + acr->func->ls_func[falcon_id]; + + if (func->post_run) + func->post_run(&acr->base, sb); + } + return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index 3e86d45eabd9..9ca23c1e3aa9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -78,6 +78,7 @@ hsf_load_header_app_size(const struct hsf_load_header *hdr, u32 app) * @generate_bl_desc: function called on a block of bl_desc_size to generate the * proper bootloader descriptor for this LS firmware * @bl_desc_size: size of the bootloader descriptor + * @post_run: hook called right after the ACR is executed * @lhdr_flags: LS flags */ struct acr_r352_ls_func { @@ -85,6 +86,7 @@ struct acr_r352_ls_func { void (*generate_bl_desc)(const struct nvkm_acr *, const struct ls_ucode_img *, u64, void *); u32 bl_desc_size; + void (*post_run)(const struct nvkm_acr *, const struct nvkm_secboot *); u32 lhdr_flags; }; From 9b536e9d525191cab931c997556af6b9b4637164 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jan 2017 12:52:50 +0900 Subject: [PATCH 0666/1299] drm/nouveau/falcon: add msgqueue interface A message queue firmware implements a specific protocol allowing the host to send "commands" to a falcon, and the falcon to reply using "messages". This patch implements the common part of this protocol and defines the interface that the host can use. Due to the way the firmware is developped internally at NVIDIA (where kernel driver and firmware evolve in lockstep), firmwares taken at different points in time can have frustratingly subtle differences that must be taken into account. This code is architectured to make implementing such differences as easy as possible. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/core/msgqueue.h | 47 ++ drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/falcon/msgqueue.c | 544 ++++++++++++++++++ .../gpu/drm/nouveau/nvkm/falcon/msgqueue.h | 204 +++++++ 4 files changed, 796 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/include/nvkm/core/msgqueue.h create mode 100644 drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/msgqueue.h b/drivers/gpu/drm/nouveau/include/nvkm/core/msgqueue.h new file mode 100644 index 000000000000..fac0824197f1 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/msgqueue.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVKM_CORE_MSGQUEUE_H +#define __NVKM_CORE_MSGQUEUE_H + +#include + +struct nvkm_falcon; +struct nvkm_msgqueue; +enum nvkm_secboot_falcon; + +/* Hopefully we will never have firmware arguments larger than that... */ +#define NVKM_MSGQUEUE_CMDLINE_SIZE 0x100 + +int nvkm_msgqueue_new(u32, struct nvkm_falcon *, struct nvkm_msgqueue **); +void nvkm_msgqueue_del(struct nvkm_msgqueue **); +void nvkm_msgqueue_recv(struct nvkm_msgqueue *); +int nvkm_msgqueue_reinit(struct nvkm_msgqueue *); + +/* useful if we run a NVIDIA-signed firmware */ +void nvkm_msgqueue_write_cmdline(struct nvkm_msgqueue *, void *); + +/* interface to ACR unit running on falcon (NVIDIA signed firmware) */ +int nvkm_msgqueue_acr_boot_falcon(struct nvkm_msgqueue *, + enum nvkm_secboot_falcon); + +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild index 584863db9bfc..305b4391aeea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild @@ -1,2 +1,3 @@ nvkm-y += nvkm/falcon/base.o nvkm-y += nvkm/falcon/v1.o +nvkm-y += nvkm/falcon/msgqueue.o diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c new file mode 100644 index 000000000000..b2de33fc7d04 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "msgqueue.h" +#include + +#include + + +#define HDR_SIZE sizeof(struct nvkm_msgqueue_hdr) +#define QUEUE_ALIGNMENT 4 +/* max size of the messages we can receive */ +#define MSG_BUF_SIZE 128 + +static int +msg_queue_open(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue) +{ + struct nvkm_falcon *falcon = priv->falcon; + + mutex_lock(&queue->mutex); + + queue->position = nvkm_falcon_rd32(falcon, queue->tail_reg); + + return 0; +} + +static void +msg_queue_close(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue, + bool commit) +{ + struct nvkm_falcon *falcon = priv->falcon; + + if (commit) + nvkm_falcon_wr32(falcon, queue->tail_reg, queue->position); + + mutex_unlock(&queue->mutex); +} + +static bool +msg_queue_empty(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue) +{ + struct nvkm_falcon *falcon = priv->falcon; + u32 head, tail; + + head = nvkm_falcon_rd32(falcon, queue->head_reg); + tail = nvkm_falcon_rd32(falcon, queue->tail_reg); + + return head == tail; +} + +static int +msg_queue_pop(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue, + void *data, u32 size) +{ + struct nvkm_falcon *falcon = priv->falcon; + const struct nvkm_subdev *subdev = priv->falcon->owner; + u32 head, tail, available; + + head = nvkm_falcon_rd32(falcon, queue->head_reg); + /* has the buffer looped? */ + if (head < queue->position) + queue->position = queue->offset; + + tail = queue->position; + + available = head - tail; + + if (available == 0) { + nvkm_warn(subdev, "no message data available\n"); + return 0; + } + + if (size > available) { + nvkm_warn(subdev, "message data smaller than read request\n"); + size = available; + } + + nvkm_falcon_read_dmem(priv->falcon, tail, size, 0, data); + queue->position += ALIGN(size, QUEUE_ALIGNMENT); + + return size; +} + +static int +msg_queue_read(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue, + struct nvkm_msgqueue_hdr *hdr) +{ + const struct nvkm_subdev *subdev = priv->falcon->owner; + int err; + + err = msg_queue_open(priv, queue); + if (err) { + nvkm_error(subdev, "fail to open queue %d\n", queue->index); + return err; + } + + if (msg_queue_empty(priv, queue)) { + err = 0; + goto close; + } + + err = msg_queue_pop(priv, queue, hdr, HDR_SIZE); + if (err >= 0 && err != HDR_SIZE) + err = -EINVAL; + if (err < 0) { + nvkm_error(subdev, "failed to read message header: %d\n", err); + goto close; + } + + if (hdr->size > MSG_BUF_SIZE) { + nvkm_error(subdev, "message too big (%d bytes)\n", hdr->size); + err = -ENOSPC; + goto close; + } + + if (hdr->size > HDR_SIZE) { + u32 read_size = hdr->size - HDR_SIZE; + + err = msg_queue_pop(priv, queue, (hdr + 1), read_size); + if (err >= 0 && err != read_size) + err = -EINVAL; + if (err < 0) { + nvkm_error(subdev, "failed to read message: %d\n", err); + goto close; + } + } + +close: + msg_queue_close(priv, queue, (err >= 0)); + + return err; +} + +static bool +cmd_queue_has_room(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue, + u32 size, bool *rewind) +{ + struct nvkm_falcon *falcon = priv->falcon; + u32 head, tail, free; + + size = ALIGN(size, QUEUE_ALIGNMENT); + + head = nvkm_falcon_rd32(falcon, queue->head_reg); + tail = nvkm_falcon_rd32(falcon, queue->tail_reg); + + if (head >= tail) { + free = queue->offset + queue->size - head; + free -= HDR_SIZE; + + if (size > free) { + *rewind = true; + head = queue->offset; + } + } + + if (head < tail) + free = tail - head - 1; + + return size <= free; +} + +static int +cmd_queue_push(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue, + void *data, u32 size) +{ + nvkm_falcon_load_dmem(priv->falcon, data, queue->position, size, 0); + queue->position += ALIGN(size, QUEUE_ALIGNMENT); + + return 0; +} + +/* REWIND unit is always 0x00 */ +#define MSGQUEUE_UNIT_REWIND 0x00 + +static void +cmd_queue_rewind(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue) +{ + const struct nvkm_subdev *subdev = priv->falcon->owner; + struct nvkm_msgqueue_hdr cmd; + int err; + + cmd.unit_id = MSGQUEUE_UNIT_REWIND; + cmd.size = sizeof(cmd); + err = cmd_queue_push(priv, queue, &cmd, cmd.size); + if (err) + nvkm_error(subdev, "queue %d rewind failed\n", queue->index); + else + nvkm_error(subdev, "queue %d rewinded\n", queue->index); + + queue->position = queue->offset; +} + +static int +cmd_queue_open(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue, + u32 size) +{ + struct nvkm_falcon *falcon = priv->falcon; + const struct nvkm_subdev *subdev = priv->falcon->owner; + bool rewind = false; + + mutex_lock(&queue->mutex); + + if (!cmd_queue_has_room(priv, queue, size, &rewind)) { + nvkm_error(subdev, "queue full\n"); + mutex_unlock(&queue->mutex); + return -EAGAIN; + } + + queue->position = nvkm_falcon_rd32(falcon, queue->head_reg); + + if (rewind) + cmd_queue_rewind(priv, queue); + + return 0; +} + +static void +cmd_queue_close(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_queue *queue, + bool commit) +{ + struct nvkm_falcon *falcon = priv->falcon; + + if (commit) + nvkm_falcon_wr32(falcon, queue->head_reg, queue->position); + + mutex_unlock(&queue->mutex); +} + +static int +cmd_write(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_hdr *cmd, + struct nvkm_msgqueue_queue *queue) +{ + const struct nvkm_subdev *subdev = priv->falcon->owner; + static unsigned long timeout = ~0; + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout); + int ret = -EAGAIN; + bool commit = true; + + while (ret == -EAGAIN && time_before(jiffies, end_jiffies)) + ret = cmd_queue_open(priv, queue, cmd->size); + if (ret) { + nvkm_error(subdev, "pmu_queue_open_write failed\n"); + return ret; + } + + ret = cmd_queue_push(priv, queue, cmd, cmd->size); + if (ret) { + nvkm_error(subdev, "pmu_queue_push failed\n"); + commit = false; + } + + cmd_queue_close(priv, queue, commit); + + return ret; +} + +static struct nvkm_msgqueue_seq * +msgqueue_seq_acquire(struct nvkm_msgqueue *priv) +{ + const struct nvkm_subdev *subdev = priv->falcon->owner; + struct nvkm_msgqueue_seq *seq; + u32 index; + + mutex_lock(&priv->seq_lock); + + index = find_first_zero_bit(priv->seq_tbl, NVKM_MSGQUEUE_NUM_SEQUENCES); + + if (index >= NVKM_MSGQUEUE_NUM_SEQUENCES) { + nvkm_error(subdev, "no free sequence available\n"); + mutex_unlock(&priv->seq_lock); + return ERR_PTR(-EAGAIN); + } + + set_bit(index, priv->seq_tbl); + + mutex_unlock(&priv->seq_lock); + + seq = &priv->seq[index]; + seq->state = SEQ_STATE_PENDING; + + return seq; +} + +static void +msgqueue_seq_release(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_seq *seq) +{ + /* no need to acquire seq_lock since clear_bit is atomic */ + seq->state = SEQ_STATE_FREE; + seq->callback = NULL; + seq->completion = NULL; + clear_bit(seq->id, priv->seq_tbl); +} + +/* specifies that we want to know the command status in the answer message */ +#define CMD_FLAGS_STATUS BIT(0) +/* specifies that we want an interrupt when the answer message is queued */ +#define CMD_FLAGS_INTR BIT(1) + +int +nvkm_msgqueue_post(struct nvkm_msgqueue *priv, enum msgqueue_msg_priority prio, + struct nvkm_msgqueue_hdr *cmd, nvkm_msgqueue_callback cb, + struct completion *completion, bool wait_init) +{ + struct nvkm_msgqueue_seq *seq; + struct nvkm_msgqueue_queue *queue; + int ret; + + if (wait_init && !wait_for_completion_timeout(&priv->init_done, + msecs_to_jiffies(1000))) + return -ETIMEDOUT; + + queue = priv->func->cmd_queue(priv, prio); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + seq = msgqueue_seq_acquire(priv); + if (IS_ERR(seq)) + return PTR_ERR(seq); + + cmd->seq_id = seq->id; + cmd->ctrl_flags = CMD_FLAGS_STATUS | CMD_FLAGS_INTR; + + seq->callback = cb; + seq->state = SEQ_STATE_USED; + seq->completion = completion; + + ret = cmd_write(priv, cmd, queue); + if (ret) { + seq->state = SEQ_STATE_PENDING; + msgqueue_seq_release(priv, seq); + } + + return ret; +} + +static int +msgqueue_msg_handle(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_hdr *hdr) +{ + const struct nvkm_subdev *subdev = priv->falcon->owner; + struct nvkm_msgqueue_seq *seq; + + seq = &priv->seq[hdr->seq_id]; + if (seq->state != SEQ_STATE_USED && seq->state != SEQ_STATE_CANCELLED) { + nvkm_error(subdev, "msg for unknown sequence %d", seq->id); + return -EINVAL; + } + + if (seq->state == SEQ_STATE_USED) { + if (seq->callback) + seq->callback(priv, hdr); + } + + if (seq->completion) + complete(seq->completion); + + msgqueue_seq_release(priv, seq); + + return 0; +} + +static int +msgqueue_handle_init_msg(struct nvkm_msgqueue *priv, + struct nvkm_msgqueue_hdr *hdr) +{ + struct nvkm_falcon *falcon = priv->falcon; + const struct nvkm_subdev *subdev = falcon->owner; + u32 tail; + u32 tail_reg; + int ret; + + /* + * Of course the message queue registers vary depending on the falcon + * used... + */ + switch (falcon->owner->index) { + case NVKM_SUBDEV_PMU: + tail_reg = 0x4cc; + break; + default: + nvkm_error(subdev, "falcon %s unsupported for msgqueue!\n", + nvkm_subdev_name[falcon->owner->index]); + return -EINVAL; + } + + /* + * Read the message - queues are not initialized yet so we cannot rely + * on msg_queue_read() + */ + tail = nvkm_falcon_rd32(falcon, tail_reg); + nvkm_falcon_read_dmem(falcon, tail, HDR_SIZE, 0, hdr); + + if (hdr->size > MSG_BUF_SIZE) { + nvkm_error(subdev, "message too big (%d bytes)\n", hdr->size); + return -ENOSPC; + } + + nvkm_falcon_read_dmem(falcon, tail + HDR_SIZE, hdr->size - HDR_SIZE, 0, + (hdr + 1)); + + tail += ALIGN(hdr->size, QUEUE_ALIGNMENT); + nvkm_falcon_wr32(falcon, tail_reg, tail); + + ret = priv->func->init_func->init_callback(priv, hdr); + if (ret) + return ret; + + return 0; +} + +void +nvkm_msgqueue_process_msgs(struct nvkm_msgqueue *priv, + struct nvkm_msgqueue_queue *queue) +{ + /* + * We are invoked from a worker thread, so normally we have plenty of + * stack space to work with. + */ + u8 msg_buffer[MSG_BUF_SIZE]; + struct nvkm_msgqueue_hdr *hdr = (void *)msg_buffer; + int ret; + + /* the first message we receive must be the init message */ + if ((!priv->init_msg_received)) { + ret = msgqueue_handle_init_msg(priv, hdr); + if (!ret) + priv->init_msg_received = true; + } else { + while (msg_queue_read(priv, queue, hdr) > 0) + msgqueue_msg_handle(priv, hdr); + } +} + +void +nvkm_msgqueue_write_cmdline(struct nvkm_msgqueue *queue, void *buf) +{ + if (!queue || !queue->func || !queue->func->init_func) + return; + + queue->func->init_func->gen_cmdline(queue, buf); +} + +int +nvkm_msgqueue_acr_boot_falcon(struct nvkm_msgqueue *queue, enum nvkm_secboot_falcon falcon) +{ + if (!queue || !queue->func->acr_func || !queue->func->acr_func->boot_falcon) + return -ENODEV; + + return queue->func->acr_func->boot_falcon(queue, falcon); +} + +int +nvkm_msgqueue_new(u32 version, struct nvkm_falcon *falcon, struct nvkm_msgqueue **queue) +{ + const struct nvkm_subdev *subdev = falcon->owner; + int ret = -EINVAL; + + switch (version) { + default: + nvkm_error(subdev, "unhandled firmware version 0x%08x\n", + version); + break; + } + + if (ret == 0) { + nvkm_debug(subdev, "firmware version: 0x%08x\n", version); + (*queue)->fw_version = version; + } + + return ret; +} + +void +nvkm_msgqueue_del(struct nvkm_msgqueue **queue) +{ + if (*queue) { + (*queue)->func->dtor(*queue); + *queue = NULL; + } +} + +void +nvkm_msgqueue_recv(struct nvkm_msgqueue *queue) +{ + if (!queue || !queue->func || !queue->func->recv) { + const struct nvkm_subdev *subdev = queue->falcon->owner; + + nvkm_warn(subdev, + "cmdqueue recv function called while no firmware set!\n"); + return; + } + + queue->func->recv(queue); +} + +int +nvkm_msgqueue_reinit(struct nvkm_msgqueue *queue) +{ + /* firmware not set yet... */ + if (!queue) + return 0; + + queue->init_msg_received = false; + reinit_completion(&queue->init_done); + + return 0; +} + +void +nvkm_msgqueue_ctor(const struct nvkm_msgqueue_func *func, + struct nvkm_falcon *falcon, + struct nvkm_msgqueue *queue) +{ + int i; + + queue->func = func; + queue->falcon = falcon; + mutex_init(&queue->seq_lock); + for (i = 0; i < NVKM_MSGQUEUE_NUM_SEQUENCES; i++) + queue->seq[i].id = i; + + init_completion(&queue->init_done); + + +} diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h new file mode 100644 index 000000000000..1f555bc1852d --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NVKM_CORE_FALCON_MSGQUEUE_H +#define __NVKM_CORE_FALCON_MSGQUEUE_H + +#include + +/* + * The struct nvkm_msgqueue (named so for lack of better candidate) manages + * a firmware (typically, NVIDIA signed firmware) running under a given falcon. + * + * Such firmwares expect to receive commands (through one or several command + * queues) and will reply to such command by sending messages (using one + * message queue). + * + * Each firmware can support one or several units - ACR for managing secure + * falcons, PMU for power management, etc. A unit can be seen as a class to + * which command can be sent. + * + * One usage example would be to send a command to the SEC falcon to ask it to + * reset a secure falcon. The SEC falcon will receive the command, process it, + * and send a message to signal success or failure. Only when the corresponding + * message is received can the requester assume the request has been processed. + * + * Since we expect many variations between the firmwares NVIDIA will release + * across GPU generations, this library is built in a very modular way. Message + * formats and queues details (such as number of usage) are left to + * specializations of struct nvkm_msgqueue, while the functions in msgqueue.c + * take care of posting commands and processing messages in a fashion that is + * universal. + * + */ + +enum msgqueue_msg_priority { + MSGQUEUE_MSG_PRIORITY_HIGH, + MSGQUEUE_MSG_PRIORITY_LOW, +}; + +/** + * struct nvkm_msgqueue_hdr - header for all commands/messages + * @unit_id: id of firmware using receiving the command/sending the message + * @size: total size of command/message + * @ctrl_flags: type of command/message + * @seq_id: used to match a message from its corresponding command + */ +struct nvkm_msgqueue_hdr { + u8 unit_id; + u8 size; + u8 ctrl_flags; + u8 seq_id; +}; + +/** + * struct nvkm_msgqueue_msg - base message. + * + * This is just a header and a message (or command) type. Useful when + * building command-specific structures. + */ +struct nvkm_msgqueue_msg { + struct nvkm_msgqueue_hdr hdr; + u8 msg_type; +}; + +struct nvkm_msgqueue; +typedef void +(*nvkm_msgqueue_callback)(struct nvkm_msgqueue *, struct nvkm_msgqueue_hdr *); + +/** + * struct nvkm_msgqueue_init_func - msgqueue functions related to initialization + * + * @gen_cmdline: build the commandline into a pre-allocated buffer + * @init_callback: called to process the init message + */ +struct nvkm_msgqueue_init_func { + void (*gen_cmdline)(struct nvkm_msgqueue *, void *); + int (*init_callback)(struct nvkm_msgqueue *, struct nvkm_msgqueue_hdr *); +}; + +/** + * struct nvkm_msgqueue_acr_func - msgqueue functions related to ACR + * + * @boot_falcon: build and send the command to reset a given falcon + */ +struct nvkm_msgqueue_acr_func { + int (*boot_falcon)(struct nvkm_msgqueue *, enum nvkm_secboot_falcon); +}; + +struct nvkm_msgqueue_func { + const struct nvkm_msgqueue_init_func *init_func; + const struct nvkm_msgqueue_acr_func *acr_func; + void (*dtor)(struct nvkm_msgqueue *); + struct nvkm_msgqueue_queue *(*cmd_queue)(struct nvkm_msgqueue *, + enum msgqueue_msg_priority); + void (*recv)(struct nvkm_msgqueue *queue); +}; + +/** + * struct nvkm_msgqueue_queue - information about a command or message queue + * + * The number of queues is firmware-dependent. All queues must have their + * information filled by the init message handler. + * + * @mutex_lock: to be acquired when the queue is being used + * @index: physical queue index + * @offset: DMEM offset where this queue begins + * @size: size allocated to this queue in DMEM (in bytes) + * @position: current write position + * @head_reg: address of the HEAD register for this queue + * @tail_reg: address of the TAIL register for this queue + */ +struct nvkm_msgqueue_queue { + struct mutex mutex; + u32 index; + u32 offset; + u32 size; + u32 position; + + u32 head_reg; + u32 tail_reg; +}; + +/** + * struct nvkm_msgqueue_seq - keep track of ongoing commands + * + * Every time a command is sent, a sequence is assigned to it so the + * corresponding message can be matched. Upon receiving the message, a callback + * can be called and/or a completion signaled. + * + * @id: sequence ID + * @state: current state + * @callback: callback to call upon receiving matching message + * @completion: completion to signal after callback is called + */ +struct nvkm_msgqueue_seq { + u16 id; + enum { + SEQ_STATE_FREE = 0, + SEQ_STATE_PENDING, + SEQ_STATE_USED, + SEQ_STATE_CANCELLED + } state; + nvkm_msgqueue_callback callback; + struct completion *completion; +}; + +/* + * We can have an arbitrary number of sequences, but realistically we will + * probably not use that much simultaneously. + */ +#define NVKM_MSGQUEUE_NUM_SEQUENCES 16 + +/** + * struct nvkm_msgqueue - manage a command/message based FW on a falcon + * + * @falcon: falcon to be managed + * @func: implementation of the firmware to use + * @init_msg_received: whether the init message has already been received + * @init_done: whether all init is complete and commands can be processed + * @seq_lock: protects seq and seq_tbl + * @seq: sequences to match commands and messages + * @seq_tbl: bitmap of sequences currently in use + */ +struct nvkm_msgqueue { + struct nvkm_falcon *falcon; + const struct nvkm_msgqueue_func *func; + u32 fw_version; + bool init_msg_received; + struct completion init_done; + + struct mutex seq_lock; + struct nvkm_msgqueue_seq seq[NVKM_MSGQUEUE_NUM_SEQUENCES]; + unsigned long seq_tbl[BITS_TO_LONGS(NVKM_MSGQUEUE_NUM_SEQUENCES)]; +}; + +void nvkm_msgqueue_ctor(const struct nvkm_msgqueue_func *, struct nvkm_falcon *, + struct nvkm_msgqueue *); +int nvkm_msgqueue_post(struct nvkm_msgqueue *, enum msgqueue_msg_priority, + struct nvkm_msgqueue_hdr *, nvkm_msgqueue_callback, + struct completion *, bool); +void nvkm_msgqueue_process_msgs(struct nvkm_msgqueue *, + struct nvkm_msgqueue_queue *); + +#endif From 42847d8a1f8fa38f11ab54b4a186fdb8b7b26a1d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Feb 2017 17:25:59 +0900 Subject: [PATCH 0667/1299] drm/nouveau/falcon: support for gm20b msgqueue Add support for the msgqueue firmware used to process PMU commands for gm20b. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/falcon/msgqueue.c | 3 + .../gpu/drm/nouveau/nvkm/falcon/msgqueue.h | 2 + .../nouveau/nvkm/falcon/msgqueue_0137c63d.c | 323 ++++++++++++++++++ 4 files changed, 329 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0137c63d.c diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild index 305b4391aeea..a2e9ae3db6dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild @@ -1,3 +1,4 @@ nvkm-y += nvkm/falcon/base.o nvkm-y += nvkm/falcon/v1.o nvkm-y += nvkm/falcon/msgqueue.o +nvkm-y += nvkm/falcon/msgqueue_0137c63d.o diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c index b2de33fc7d04..8583fe1c2183 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c @@ -475,6 +475,9 @@ nvkm_msgqueue_new(u32 version, struct nvkm_falcon *falcon, struct nvkm_msgqueue int ret = -EINVAL; switch (version) { + case 0x0137c63d: + ret = msgqueue_0137c63d_new(falcon, queue); + break; default: nvkm_error(subdev, "unhandled firmware version 0x%08x\n", version); diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h index 1f555bc1852d..2f537e0ec9b4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h @@ -201,4 +201,6 @@ int nvkm_msgqueue_post(struct nvkm_msgqueue *, enum msgqueue_msg_priority, void nvkm_msgqueue_process_msgs(struct nvkm_msgqueue *, struct nvkm_msgqueue_queue *); +int msgqueue_0137c63d_new(struct nvkm_falcon *, struct nvkm_msgqueue **); + #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0137c63d.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0137c63d.c new file mode 100644 index 000000000000..bba91207fb18 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0137c63d.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "msgqueue.h" +#include +#include + +/* Queues identifiers */ +enum { + /* High Priority Command Queue for Host -> PMU communication */ + MSGQUEUE_0137C63D_COMMAND_QUEUE_HPQ = 0, + /* Low Priority Command Queue for Host -> PMU communication */ + MSGQUEUE_0137C63D_COMMAND_QUEUE_LPQ = 1, + /* Message queue for PMU -> Host communication */ + MSGQUEUE_0137C63D_MESSAGE_QUEUE = 4, + MSGQUEUE_0137C63D_NUM_QUEUES = 5, +}; + +struct msgqueue_0137c63d { + struct nvkm_msgqueue base; + + struct nvkm_msgqueue_queue queue[MSGQUEUE_0137C63D_NUM_QUEUES]; +}; +#define msgqueue_0137c63d(q) \ + container_of(q, struct msgqueue_0137c63d, base) + +static struct nvkm_msgqueue_queue * +msgqueue_0137c63d_cmd_queue(struct nvkm_msgqueue *queue, + enum msgqueue_msg_priority priority) +{ + struct msgqueue_0137c63d *priv = msgqueue_0137c63d(queue); + const struct nvkm_subdev *subdev = priv->base.falcon->owner; + + switch (priority) { + case MSGQUEUE_MSG_PRIORITY_HIGH: + return &priv->queue[MSGQUEUE_0137C63D_COMMAND_QUEUE_HPQ]; + case MSGQUEUE_MSG_PRIORITY_LOW: + return &priv->queue[MSGQUEUE_0137C63D_COMMAND_QUEUE_LPQ]; + default: + nvkm_error(subdev, "invalid command queue!\n"); + return ERR_PTR(-EINVAL); + } +} + +static void +msgqueue_0137c63d_process_msgs(struct nvkm_msgqueue *queue) +{ + struct msgqueue_0137c63d *priv = msgqueue_0137c63d(queue); + struct nvkm_msgqueue_queue *q_queue = + &priv->queue[MSGQUEUE_0137C63D_MESSAGE_QUEUE]; + + nvkm_msgqueue_process_msgs(&priv->base, q_queue); +} + +/* Init unit */ +#define MSGQUEUE_0137C63D_UNIT_INIT 0x07 + +enum { + INIT_MSG_INIT = 0x0, +}; + +static void +init_gen_cmdline(struct nvkm_msgqueue *queue, void *buf) +{ + struct { + u32 reserved; + u32 freq_hz; + u32 trace_size; + u32 trace_dma_base; + u16 trace_dma_base1; + u8 trace_dma_offset; + u32 trace_dma_idx; + bool secure_mode; + bool raise_priv_sec; + struct { + u32 dma_base; + u16 dma_base1; + u8 dma_offset; + u16 fb_size; + u8 dma_idx; + } gc6_ctx; + u8 pad; + } *args = buf; + + args->secure_mode = 1; +} + +/* forward declaration */ +static int acr_init_wpr(struct nvkm_msgqueue *queue); + +static int +init_callback(struct nvkm_msgqueue *_queue, struct nvkm_msgqueue_hdr *hdr) +{ + struct msgqueue_0137c63d *priv = msgqueue_0137c63d(_queue); + struct { + struct nvkm_msgqueue_msg base; + + u8 pad; + u16 os_debug_entry_point; + + struct { + u16 size; + u16 offset; + u8 index; + u8 pad; + } queue_info[MSGQUEUE_0137C63D_NUM_QUEUES]; + + u16 sw_managed_area_offset; + u16 sw_managed_area_size; + } *init = (void *)hdr; + const struct nvkm_subdev *subdev = _queue->falcon->owner; + int i; + + if (init->base.hdr.unit_id != MSGQUEUE_0137C63D_UNIT_INIT) { + nvkm_error(subdev, "expected message from init unit\n"); + return -EINVAL; + } + + if (init->base.msg_type != INIT_MSG_INIT) { + nvkm_error(subdev, "expected PMU init msg\n"); + return -EINVAL; + } + + for (i = 0; i < MSGQUEUE_0137C63D_NUM_QUEUES; i++) { + struct nvkm_msgqueue_queue *queue = &priv->queue[i]; + + mutex_init(&queue->mutex); + + queue->index = init->queue_info[i].index; + queue->offset = init->queue_info[i].offset; + queue->size = init->queue_info[i].size; + + if (i != MSGQUEUE_0137C63D_MESSAGE_QUEUE) { + queue->head_reg = 0x4a0 + (queue->index * 4); + queue->tail_reg = 0x4b0 + (queue->index * 4); + } else { + queue->head_reg = 0x4c8; + queue->tail_reg = 0x4cc; + } + + nvkm_debug(subdev, + "queue %d: index %d, offset 0x%08x, size 0x%08x\n", + i, queue->index, queue->offset, queue->size); + } + + /* Complete initialization by initializing WPR region */ + return acr_init_wpr(&priv->base); +} + +static const struct nvkm_msgqueue_init_func +msgqueue_0137c63d_init_func = { + .gen_cmdline = init_gen_cmdline, + .init_callback = init_callback, +}; + + + +/* ACR unit */ +#define MSGQUEUE_0137C63D_UNIT_ACR 0x0a + +enum { + ACR_CMD_INIT_WPR_REGION = 0x00, + ACR_CMD_BOOTSTRAP_FALCON = 0x01, +}; + +static void +acr_init_wpr_callback(struct nvkm_msgqueue *queue, + struct nvkm_msgqueue_hdr *hdr) +{ + struct { + struct nvkm_msgqueue_msg base; + u32 error_code; + } *msg = (void *)hdr; + const struct nvkm_subdev *subdev = queue->falcon->owner; + + if (msg->error_code) { + nvkm_error(subdev, "ACR WPR init failure: %d\n", + msg->error_code); + return; + } + + nvkm_debug(subdev, "ACR WPR init complete\n"); + complete_all(&queue->init_done); +} + +static int +acr_init_wpr(struct nvkm_msgqueue *queue) +{ + /* + * region_id: region ID in WPR region + * wpr_offset: offset in WPR region + */ + struct { + struct nvkm_msgqueue_hdr hdr; + u8 cmd_type; + u32 region_id; + u32 wpr_offset; + } cmd; + memset(&cmd, 0, sizeof(cmd)); + + cmd.hdr.unit_id = MSGQUEUE_0137C63D_UNIT_ACR; + cmd.hdr.size = sizeof(cmd); + cmd.cmd_type = ACR_CMD_INIT_WPR_REGION; + cmd.region_id = 0x01; + cmd.wpr_offset = 0x00; + + nvkm_msgqueue_post(queue, MSGQUEUE_MSG_PRIORITY_HIGH, &cmd.hdr, + acr_init_wpr_callback, NULL, false); + + return 0; +} + + +static void +acr_boot_falcon_callback(struct nvkm_msgqueue *priv, + struct nvkm_msgqueue_hdr *hdr) +{ + struct acr_bootstrap_falcon_msg { + struct nvkm_msgqueue_msg base; + + u32 falcon_id; + } *msg = (void *)hdr; + const struct nvkm_subdev *subdev = priv->falcon->owner; + u32 falcon_id = msg->falcon_id; + + if (falcon_id >= NVKM_SECBOOT_FALCON_END) { + nvkm_error(subdev, "in bootstrap falcon callback:\n"); + nvkm_error(subdev, "invalid falcon ID 0x%x\n", falcon_id); + return; + } + nvkm_debug(subdev, "%s booted\n", nvkm_secboot_falcon_name[falcon_id]); +} + +enum { + ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES = 0, + ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_NO = 1, +}; + +static int +acr_boot_falcon(struct nvkm_msgqueue *priv, enum nvkm_secboot_falcon falcon) +{ + DECLARE_COMPLETION_ONSTACK(completed); + /* + * flags - Flag specifying RESET or no RESET. + * falcon id - Falcon id specifying falcon to bootstrap. + */ + struct { + struct nvkm_msgqueue_hdr hdr; + u8 cmd_type; + u32 flags; + u32 falcon_id; + } cmd; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.hdr.unit_id = MSGQUEUE_0137C63D_UNIT_ACR; + cmd.hdr.size = sizeof(cmd); + cmd.cmd_type = ACR_CMD_BOOTSTRAP_FALCON; + cmd.flags = ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; + cmd.falcon_id = falcon; + nvkm_msgqueue_post(priv, MSGQUEUE_MSG_PRIORITY_HIGH, &cmd.hdr, + acr_boot_falcon_callback, &completed, true); + + if (!wait_for_completion_timeout(&completed, msecs_to_jiffies(1000))) + return -ETIMEDOUT; + + return 0; +} + +static const struct nvkm_msgqueue_acr_func +msgqueue_0137c63d_acr_func = { + .boot_falcon = acr_boot_falcon, +}; + +static void +msgqueue_0137c63d_dtor(struct nvkm_msgqueue *queue) +{ + kfree(msgqueue_0137c63d(queue)); +} + +static const struct nvkm_msgqueue_func +msgqueue_0137c63d_func = { + .init_func = &msgqueue_0137c63d_init_func, + .acr_func = &msgqueue_0137c63d_acr_func, + .cmd_queue = msgqueue_0137c63d_cmd_queue, + .recv = msgqueue_0137c63d_process_msgs, + .dtor = msgqueue_0137c63d_dtor, +}; + +int +msgqueue_0137c63d_new(struct nvkm_falcon *falcon, struct nvkm_msgqueue **queue) +{ + struct msgqueue_0137c63d *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return -ENOMEM; + + *queue = &ret->base; + + nvkm_msgqueue_ctor(&msgqueue_0137c63d_func, falcon, &ret->base); + + return 0; +} From 9ce480fead6cf57640a2bde8024578e61acb77a3 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 19 Jan 2017 13:16:40 +0900 Subject: [PATCH 0668/1299] drm/nouveau/pmu: add msgqueue member NVIDIA-provided PMU firmware is controlled by a msgqueue. Add a member to the PMU structure as well as the required cleanup code if this feature is used. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h index 179b6ed3f595..e7f04732a425 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h @@ -7,6 +7,7 @@ struct nvkm_pmu { const struct nvkm_pmu_func *func; struct nvkm_subdev subdev; struct nvkm_falcon *falcon; + struct nvkm_msgqueue *queue; struct { u32 base; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c index ad4ac4e8b29e..1871a92e8f2f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c @@ -23,6 +23,7 @@ */ #include "priv.h" +#include #include void @@ -118,6 +119,7 @@ static void * nvkm_pmu_dtor(struct nvkm_subdev *subdev) { struct nvkm_pmu *pmu = nvkm_pmu(subdev); + nvkm_msgqueue_del(&pmu->queue); nvkm_falcon_del(&pmu->falcon); return nvkm_pmu(subdev); } From eabe4ea6a418a60b4df666154f2b0d8cd7d5ba29 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 27 Oct 2016 14:22:28 +0900 Subject: [PATCH 0669/1299] drm/nouveau/secboot: support for loading LS PMU firmware Allow secboot to load a LS PMU firmware. LS PMU is one instance of firmwares based on the message queue mechanism, which is also used for other firmwares like SEC, so name its source file accordingly. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/nvkm/subdev/secboot/Kbuild | 1 + .../nouveau/nvkm/subdev/secboot/ls_ucode.h | 4 +- .../nvkm/subdev/secboot/ls_ucode_msgqueue.c | 117 ++++++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild index 5076d1500f47..4e0c671e19dd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild @@ -1,5 +1,6 @@ nvkm-y += nvkm/subdev/secboot/base.o nvkm-y += nvkm/subdev/secboot/ls_ucode_gr.o +nvkm-y += nvkm/subdev/secboot/ls_ucode_msgqueue.o nvkm-y += nvkm/subdev/secboot/acr.o nvkm-y += nvkm/subdev/secboot/acr_r352.o nvkm-y += nvkm/subdev/secboot/acr_r361.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h index 181c0d80d194..9ea295d41861 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h @@ -27,6 +27,7 @@ #include #include +struct nvkm_acr; /** * struct ls_ucode_img_desc - descriptor of firmware image @@ -148,6 +149,7 @@ struct fw_bl_desc { int acr_ls_ucode_load_fecs(const struct nvkm_subdev *, struct ls_ucode_img *); int acr_ls_ucode_load_gpccs(const struct nvkm_subdev *, struct ls_ucode_img *); - +int acr_ls_ucode_load_pmu(const struct nvkm_subdev *, struct ls_ucode_img *); +void acr_ls_pmu_post_run(const struct nvkm_acr *, const struct nvkm_secboot *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c new file mode 100644 index 000000000000..333dd2068a8a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include "ls_ucode.h" +#include "acr.h" + +#include +#include +#include + +/** + * acr_ls_ucode_load_msgqueue - load and prepare a ucode img for a msgqueue fw + * + * Load the LS microcode, desc and signature and pack them into a single + * blob. + */ +static int +acr_ls_ucode_load_msgqueue(const struct nvkm_subdev *subdev, const char *name, + struct ls_ucode_img *img) +{ + const struct firmware *image, *desc, *sig; + char f[64]; + int ret; + + snprintf(f, sizeof(f), "%s/image", name); + ret = nvkm_firmware_get(subdev->device, f, &image); + if (ret) + return ret; + img->ucode_data = kmemdup(image->data, image->size, GFP_KERNEL); + nvkm_firmware_put(image); + if (!img->ucode_data) + return -ENOMEM; + + snprintf(f, sizeof(f), "%s/desc", name); + ret = nvkm_firmware_get(subdev->device, f, &desc); + if (ret) + return ret; + memcpy(&img->ucode_desc, desc->data, sizeof(img->ucode_desc)); + img->ucode_size = ALIGN(img->ucode_desc.app_start_offset + img->ucode_desc.app_size, 256); + nvkm_firmware_put(desc); + + snprintf(f, sizeof(f), "%s/sig", name); + ret = nvkm_firmware_get(subdev->device, f, &sig); + if (ret) + return ret; + img->sig_size = sig->size; + img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL); + nvkm_firmware_put(sig); + if (!img->sig) + return -ENOMEM; + + return 0; +} + +static void +acr_ls_msgqueue_post_run(struct nvkm_msgqueue *queue, + struct nvkm_falcon *falcon, u32 addr_args) +{ + u32 cmdline_size = NVKM_MSGQUEUE_CMDLINE_SIZE; + u8 buf[cmdline_size]; + + memset(buf, 0, cmdline_size); + nvkm_msgqueue_write_cmdline(queue, buf); + nvkm_falcon_load_dmem(falcon, buf, addr_args, cmdline_size, 0); + /* rearm the queue so it will wait for the init message */ + nvkm_msgqueue_reinit(queue); +} + +int +acr_ls_ucode_load_pmu(const struct nvkm_subdev *subdev, + struct ls_ucode_img *img) +{ + struct nvkm_pmu *pmu = subdev->device->pmu; + int ret; + + ret = acr_ls_ucode_load_msgqueue(subdev, "pmu", img); + if (ret) + return ret; + + /* Allocate the PMU queue corresponding to the FW version */ + ret = nvkm_msgqueue_new(img->ucode_desc.app_version, pmu->falcon, + &pmu->queue); + if (ret) + return ret; + + return 0; +} + +void +acr_ls_pmu_post_run(const struct nvkm_acr *acr, const struct nvkm_secboot *sb) +{ + struct nvkm_device *device = sb->subdev.device; + struct nvkm_pmu *pmu = device->pmu; + u32 addr_args = pmu->falcon->data.limit - NVKM_MSGQUEUE_CMDLINE_SIZE; + + acr_ls_msgqueue_post_run(pmu->queue, pmu->falcon, addr_args); +} From 7579e22f38ffe0acf0c4daea0932847023450ac7 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 27 Oct 2016 14:24:34 +0900 Subject: [PATCH 0670/1299] drm/nouveau/secboot: base support for PMU falcon Adapt secboot's behavior if a PMU firmware is present, in particular the way LS falcons are reset. Without PMU firmware, secboot needs to be performed again from scratch so all LS falcons are reset. With PMU firmware, we can ask the PMU's ACR unit to reset a specific falcon through a PMU message. As we must preserve the old behavior to avoid breaking user-space, add a few conditionals to the way falcons are reset. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 98 ++++++++++++++++--- 1 file changed, 82 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 0a35d35e2f46..a937c34bc28f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -25,6 +25,9 @@ #include #include #include +#include +#include +#include /** * struct hsf_fw_header - HS firmware descriptor @@ -852,6 +855,7 @@ acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb) static int acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) { + const struct nvkm_subdev *subdev = &sb->subdev; unsigned long managed_falcons = acr->base.managed_falcons; int falcon_id; int ret; @@ -864,13 +868,13 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) if (ret) return ret; - nvkm_debug(&sb->subdev, "running HS load blob\n"); + nvkm_debug(subdev, "running HS load blob\n"); ret = sb->func->run_blob(sb, acr->load_blob); /* clear halt interrupt */ nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10); if (ret) return ret; - nvkm_debug(&sb->subdev, "HS load blob completed\n"); + nvkm_debug(subdev, "HS load blob completed\n"); sb->wpr_set = true; @@ -883,6 +887,50 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) func->post_run(&acr->base, sb); } + /* Re-start ourselves if we are managed */ + if (!nvkm_secboot_is_managed(sb, acr->base.boot_falcon)) + return 0; + + /* Enable interrupts */ + nvkm_falcon_wr32(sb->boot_falcon, 0x10, 0xff); + nvkm_mc_intr_mask(subdev->device, sb->boot_falcon->owner->index, true); + + /* Start PMU */ + nvkm_falcon_start(sb->boot_falcon); + nvkm_debug(subdev, "PMU started\n"); + + return 0; +} + +/** + * acr_r352_reset_nopmu - dummy reset method when no PMU firmware is loaded + * + * Reset is done by re-executing secure boot from scratch, with lazy bootstrap + * disabled. This has the effect of making all managed falcons ready-to-run. + */ +static int +acr_r352_reset_nopmu(struct acr_r352 *acr, struct nvkm_secboot *sb, + enum nvkm_secboot_falcon falcon) +{ + int ret; + + /* + * Perform secure boot each time we are called on FECS. Since only FECS + * and GPCCS are managed and started together, this ought to be safe. + */ + if (falcon != NVKM_SECBOOT_FALCON_FECS) + goto end; + + ret = acr_r352_shutdown(acr, sb); + if (ret) + return ret; + + ret = acr_r352_bootstrap(acr, sb); + if (ret) + return ret; + +end: + acr->falcon_state[falcon] = RESET; return 0; } @@ -898,29 +946,30 @@ acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon) { struct acr_r352 *acr = acr_r352(_acr); + struct nvkm_pmu *pmu = sb->subdev.device->pmu; + const char *fname = nvkm_secboot_falcon_name[falcon]; int ret; + /* Not self-managed? Redo secure boot entirely */ + if (!nvkm_secboot_is_managed(sb, _acr->boot_falcon)) + return acr_r352_reset_nopmu(acr, sb, falcon); + /* - * Dummy GM200 implementation: perform secure boot each time we are - * called on FECS. Since only FECS and GPCCS are managed and started - * together, this ought to be safe. - * - * Once we have proper PMU firmware and support, this will be changed - * to a proper call to the PMU method. + * Otherwise ensure secure boot is done, and command the PMU to reset + * the desired falcon. */ - if (falcon != NVKM_SECBOOT_FALCON_FECS) - goto end; - - ret = acr_r352_shutdown(acr, sb); + ret = acr_r352_bootstrap(acr, sb); if (ret) return ret; - acr_r352_bootstrap(acr, sb); - if (ret) + nvkm_debug(&sb->subdev, "resetting %s falcon\n", fname); + ret = nvkm_msgqueue_acr_boot_falcon(pmu->queue, falcon); + if (ret) { + nvkm_error(&sb->subdev, "cannot boot %s falcon\n", fname); return ret; + } + nvkm_debug(&sb->subdev, "falcon %s reset\n", fname); -end: - acr->falcon_state[falcon] = RESET; return 0; } @@ -1007,6 +1056,23 @@ acr_r352_new_(const struct acr_r352_func *func, acr->base.func = &acr_r352_base_func; acr->func = func; + /* + * If we have a PMU firmware, let it manage the bootstrap of other + * falcons. + */ + if (func->ls_func[NVKM_SECBOOT_FALCON_PMU] && + (managed_falcons & BIT(NVKM_SECBOOT_FALCON_PMU))) { + int i; + + for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++) { + if (i == NVKM_SECBOOT_FALCON_PMU) + continue; + + if (func->ls_func[i]) + acr->lazy_bootstrap |= BIT(i); + } + } + return &acr->base; } From bb5ec9c9dd1cc71bbae37a31229d72ffab9b6df4 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 27 Oct 2016 14:25:02 +0900 Subject: [PATCH 0671/1299] drm/nouveau/secboot: support PMU LS firmware Add the PMU bootloader generator and PMU LS ops that will enable proper PMU operation if the PMU falcon is designated as managed. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 80 +++++++++++++++++++ .../nouveau/nvkm/subdev/secboot/acr_r361.c | 54 +++++++++++++ 2 files changed, 134 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index a937c34bc28f..a5d881438c1f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -1011,6 +1011,85 @@ acr_r352_ls_gpccs_func = { .lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD, }; + + +/** + * struct acr_r352_pmu_bl_desc - PMU DMEM bootloader descriptor + * @dma_idx: DMA context to be used by BL while loading code/data + * @code_dma_base: 256B-aligned Physical FB Address where code is located + * @total_code_size: total size of the code part in the ucode + * @code_size_to_load: size of the code part to load in PMU IMEM. + * @code_entry_point: entry point in the code. + * @data_dma_base: Physical FB address where data part of ucode is located + * @data_size: Total size of the data portion. + * @overlay_dma_base: Physical Fb address for resident code present in ucode + * @argc: Total number of args + * @argv: offset where args are copied into PMU's DMEM. + * + * Structure used by the PMU bootloader to load the rest of the code + */ +struct acr_r352_pmu_bl_desc { + u32 dma_idx; + u32 code_dma_base; + u32 code_size_total; + u32 code_size_to_load; + u32 code_entry_point; + u32 data_dma_base; + u32 data_size; + u32 overlay_dma_base; + u32 argc; + u32 argv; + u16 code_dma_base1; + u16 data_dma_base1; + u16 overlay_dma_base1; +}; + +/** + * acr_r352_generate_pmu_bl_desc() - populate a DMEM BL descriptor for PMU LS image + * + */ +static void +acr_r352_generate_pmu_bl_desc(const struct nvkm_acr *acr, + const struct ls_ucode_img *img, u64 wpr_addr, + void *_desc) +{ + const struct ls_ucode_img_desc *pdesc = &img->ucode_desc; + const struct nvkm_pmu *pmu = acr->subdev->device->pmu; + struct acr_r352_pmu_bl_desc *desc = _desc; + u64 base; + u64 addr_code; + u64 addr_data; + u32 addr_args; + + base = wpr_addr + img->ucode_off + pdesc->app_start_offset; + addr_code = (base + pdesc->app_resident_code_offset) >> 8; + addr_data = (base + pdesc->app_resident_data_offset) >> 8; + addr_args = pmu->falcon->data.limit; + addr_args -= NVKM_MSGQUEUE_CMDLINE_SIZE; + + desc->dma_idx = FALCON_DMAIDX_UCODE; + desc->code_dma_base = lower_32_bits(addr_code); + desc->code_dma_base1 = upper_32_bits(addr_code); + desc->code_size_total = pdesc->app_size; + desc->code_size_to_load = pdesc->app_resident_code_size; + desc->code_entry_point = pdesc->app_imem_entry; + desc->data_dma_base = lower_32_bits(addr_data); + desc->data_dma_base1 = upper_32_bits(addr_data); + desc->data_size = pdesc->app_resident_data_size; + desc->overlay_dma_base = lower_32_bits(addr_code); + desc->overlay_dma_base1 = upper_32_bits(addr_code); + desc->argc = 1; + desc->argv = addr_args; +} + +static const struct acr_r352_ls_func +acr_r352_ls_pmu_func = { + .load = acr_ls_ucode_load_pmu, + .generate_bl_desc = acr_r352_generate_pmu_bl_desc, + .bl_desc_size = sizeof(struct acr_r352_pmu_bl_desc), + .post_run = acr_ls_pmu_post_run, +}; + const struct acr_r352_func acr_r352_func = { .fixup_hs_desc = acr_r352_fixup_hs_desc, @@ -1022,6 +1101,7 @@ acr_r352_func = { .ls_func = { [NVKM_SECBOOT_FALCON_FECS] = &acr_r352_ls_fecs_func, [NVKM_SECBOOT_FALCON_GPCCS] = &acr_r352_ls_gpccs_func, + [NVKM_SECBOOT_FALCON_PMU] = &acr_r352_ls_pmu_func, }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c index 8561037d4cb7..1ed23e77e4e9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c @@ -23,6 +23,8 @@ #include "acr_r352.h" #include +#include +#include /** * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor @@ -116,6 +118,57 @@ acr_r361_ls_gpccs_func = { .lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD, }; +struct acr_r361_pmu_bl_desc { + u32 reserved; + u32 dma_idx; + struct flcn_u64 code_dma_base; + u32 total_code_size; + u32 code_size_to_load; + u32 code_entry_point; + struct flcn_u64 data_dma_base; + u32 data_size; + struct flcn_u64 overlay_dma_base; + u32 argc; + u32 argv; +}; + +static void +acr_r361_generate_pmu_bl_desc(const struct nvkm_acr *acr, + const struct ls_ucode_img *img, u64 wpr_addr, + void *_desc) +{ + const struct ls_ucode_img_desc *pdesc = &img->ucode_desc; + const struct nvkm_pmu *pmu = acr->subdev->device->pmu; + struct acr_r361_pmu_bl_desc *desc = _desc; + u64 base, addr_code, addr_data; + u32 addr_args; + + base = wpr_addr + img->ucode_off + pdesc->app_start_offset; + addr_code = base + pdesc->app_resident_code_offset; + addr_data = base + pdesc->app_resident_data_offset; + addr_args = pmu->falcon->data.limit; + addr_args -= NVKM_MSGQUEUE_CMDLINE_SIZE; + + desc->dma_idx = FALCON_DMAIDX_UCODE; + desc->code_dma_base = u64_to_flcn64(addr_code); + desc->total_code_size = pdesc->app_size; + desc->code_size_to_load = pdesc->app_resident_code_size; + desc->code_entry_point = pdesc->app_imem_entry; + desc->data_dma_base = u64_to_flcn64(addr_data); + desc->data_size = pdesc->app_resident_data_size; + desc->overlay_dma_base = u64_to_flcn64(addr_code); + desc->argc = 1; + desc->argv = addr_args; +} + +const struct acr_r352_ls_func +acr_r361_ls_pmu_func = { + .load = acr_ls_ucode_load_pmu, + .generate_bl_desc = acr_r361_generate_pmu_bl_desc, + .bl_desc_size = sizeof(struct acr_r361_pmu_bl_desc), + .post_run = acr_ls_pmu_post_run, +}; + const struct acr_r352_func acr_r361_func = { .fixup_hs_desc = acr_r352_fixup_hs_desc, @@ -127,6 +180,7 @@ acr_r361_func = { .ls_func = { [NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func, [NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func, + [NVKM_SECBOOT_FALCON_PMU] = &acr_r361_ls_pmu_func, }, }; From 7775d0dcb215ce0eb639940d201fb9f557823bd7 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 18 Nov 2016 17:47:08 +0900 Subject: [PATCH 0672/1299] drm/nouveau/secboot: support optional falcons PMU support has been enabled for r352 ACR, but it must remain optional if we want to preserve existing user-space that do not include it. Allow ACR to be instanciated with a list of optional LS falcons, that will not produce a fatal error if their firmware is not loaded. Also change the secure boot bootstrap logic to be able to fall back to legacy behavior if it turns out the boot falcon's LS firmware cannot be loaded. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/subdev/secboot/acr.h | 2 + .../nouveau/nvkm/subdev/secboot/acr_r352.c | 62 +++++++++++-------- 2 files changed, 39 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h index 58eb20a69e9c..580036d623b2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h @@ -48,6 +48,7 @@ struct nvkm_acr_func { * * @boot_falcon: ID of the falcon that will perform secure boot * @managed_falcons: bitfield of falcons managed by this ACR + * @optional_falcons: bitfield of falcons we can live without * @start_address: virtual start address of the HS bootloader */ struct nvkm_acr { @@ -56,6 +57,7 @@ struct nvkm_acr { enum nvkm_secboot_falcon boot_falcon; unsigned long managed_falcons; + unsigned long optional_falcons; u32 start_address; }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index a5d881438c1f..40c6568bfc31 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -503,6 +503,12 @@ acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size) img = acr->func->ls_ucode_img_load(acr, falcon_id); if (IS_ERR(img)) { + if (acr->base.optional_falcons & BIT(falcon_id)) { + managed_falcons &= ~BIT(falcon_id); + nvkm_info(subdev, "skipping %s falcon...\n", + nvkm_secboot_falcon_name[falcon_id]); + continue; + } ret = PTR_ERR(img); goto cleanup; } @@ -511,6 +517,24 @@ acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size) managed_count++; } + /* Commit the actual list of falcons we will manage from now on */ + acr->base.managed_falcons = managed_falcons; + + /* + * If the boot falcon has a firmare, let it manage the bootstrap of other + * falcons. + */ + if (acr->func->ls_func[acr->base.boot_falcon] && + (managed_falcons & BIT(acr->base.boot_falcon))) { + for_each_set_bit(falcon_id, &managed_falcons, + NVKM_SECBOOT_FALCON_END) { + if (falcon_id == acr->base.boot_falcon) + continue; + + acr->lazy_bootstrap |= BIT(falcon_id); + } + } + /* * Fill the WPR and LSF headers with the right offsets and compute * required WPR size @@ -948,20 +972,25 @@ acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb, struct acr_r352 *acr = acr_r352(_acr); struct nvkm_pmu *pmu = sb->subdev.device->pmu; const char *fname = nvkm_secboot_falcon_name[falcon]; + bool wpr_already_set = sb->wpr_set; int ret; - /* Not self-managed? Redo secure boot entirely */ - if (!nvkm_secboot_is_managed(sb, _acr->boot_falcon)) - return acr_r352_reset_nopmu(acr, sb, falcon); - - /* - * Otherwise ensure secure boot is done, and command the PMU to reset - * the desired falcon. - */ + /* Make sure secure boot is performed */ ret = acr_r352_bootstrap(acr, sb); if (ret) return ret; + /* No PMU interface? */ + if (!nvkm_secboot_is_managed(sb, _acr->boot_falcon)) { + /* Redo secure boot entirely if it was already done */ + if (wpr_already_set) + return acr_r352_reset_nopmu(acr, sb, falcon); + /* Else return the result of the initial invokation */ + else + return ret; + } + + /* Otherwise just ask the PMU to reset the falcon */ nvkm_debug(&sb->subdev, "resetting %s falcon\n", fname); ret = nvkm_msgqueue_acr_boot_falcon(pmu->queue, falcon); if (ret) { @@ -1136,23 +1165,6 @@ acr_r352_new_(const struct acr_r352_func *func, acr->base.func = &acr_r352_base_func; acr->func = func; - /* - * If we have a PMU firmware, let it manage the bootstrap of other - * falcons. - */ - if (func->ls_func[NVKM_SECBOOT_FALCON_PMU] && - (managed_falcons & BIT(NVKM_SECBOOT_FALCON_PMU))) { - int i; - - for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++) { - if (i == NVKM_SECBOOT_FALCON_PMU) - continue; - - if (func->ls_func[i]) - acr->lazy_bootstrap |= BIT(i); - } - } - return &acr->base; } From fc12745717392b5a4421a72c98cfce2c1f287d70 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 15 Nov 2016 16:26:09 +0900 Subject: [PATCH 0673/1299] drm/nouveau/secboot: check that WPR region is properly set The ACR firmware may return no error but fail nonetheless. Such cases can be detected by verifying that the WPR region has been properly set in FB. If this is not the case, this is an error, but the unload firmware should still not be run. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 41 ++++++++++++++++++- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 40c6568bfc31..9e64b7c8b0b2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -876,6 +876,37 @@ acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb) return 0; } +/** + * Check if the WPR region has been indeed set by the ACR firmware, and + * matches where it should be. + */ +static bool +acr_r352_wpr_is_set(const struct acr_r352 *acr, const struct nvkm_secboot *sb) +{ + const struct nvkm_subdev *subdev = &sb->subdev; + const struct nvkm_device *device = subdev->device; + u64 wpr_lo, wpr_hi; + u64 wpr_range_lo, wpr_range_hi; + + nvkm_wr32(device, 0x100cd4, 0x2); + wpr_lo = (nvkm_rd32(device, 0x100cd4) & ~0xff); + wpr_lo <<= 8; + nvkm_wr32(device, 0x100cd4, 0x3); + wpr_hi = (nvkm_rd32(device, 0x100cd4) & ~0xff); + wpr_hi <<= 8; + + if (sb->wpr_size != 0) { + wpr_range_lo = sb->wpr_addr; + wpr_range_hi = wpr_range_lo + sb->wpr_size; + } else { + wpr_range_lo = acr->ls_blob->addr; + wpr_range_hi = wpr_range_lo + acr->ls_blob->size; + } + + return (wpr_lo >= wpr_range_lo && wpr_lo < wpr_range_hi && + wpr_hi > wpr_range_lo && wpr_hi <= wpr_range_hi); +} + static int acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) { @@ -896,11 +927,17 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) ret = sb->func->run_blob(sb, acr->load_blob); /* clear halt interrupt */ nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10); + sb->wpr_set = acr_r352_wpr_is_set(acr, sb); if (ret) return ret; nvkm_debug(subdev, "HS load blob completed\n"); - - sb->wpr_set = true; + if (ret) + return ret; + /* WPR must be set at this point */ + if (!sb->wpr_set) { + nvkm_error(subdev, "ACR blob completed but WPR not set!\n"); + return -EINVAL; + } /* Run LS firmwares post_run hooks */ for_each_set_bit(falcon_id, &managed_falcons, NVKM_SECBOOT_FALCON_END) { From 937deb06d0604b87f44f1e45b9f6e15bb6fb316f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Fri, 10 Feb 2017 16:29:01 +0900 Subject: [PATCH 0674/1299] drm/nouveau/pmu/gm20b: add msgqueue support gm20b PMU firmware is driven by a msgqueue, so connect relevant PMU hooks to their msgqueue counterparts. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c index 0b8a1cc4a0ee..48ae02d45656 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c @@ -20,15 +20,30 @@ * DEALINGS IN THE SOFTWARE. */ +#include +#include #include "priv.h" +static void +gm20b_pmu_recv(struct nvkm_pmu *pmu) +{ + nvkm_msgqueue_recv(pmu->queue); +} + static const struct nvkm_pmu_func gm20b_pmu = { - .reset = gt215_pmu_reset, + .intr = gt215_pmu_intr, + .recv = gm20b_pmu_recv, }; int gm20b_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu) { - return nvkm_pmu_new_(&gm20b_pmu, device, index, ppmu); + int ret; + + ret = nvkm_pmu_new_(&gm20b_pmu, device, index, ppmu); + if (ret) + return ret; + + return 0; } From 1106459e9f691e0655d3559734f2ff2084711474 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 26 Oct 2016 13:08:14 +0900 Subject: [PATCH 0675/1299] drm/nouveau/secboot/gm20b: enable PMU firmware Enable the PMU firmware in gm20b, managed by secure boot. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c index 6707b8edc086..29e6f73dfd7e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c @@ -107,9 +107,12 @@ gm20b_secboot_new(struct nvkm_device *device, int index, struct gm200_secboot *gsb; struct nvkm_acr *acr; - acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS)); + acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS) | + BIT(NVKM_SECBOOT_FALCON_PMU)); if (IS_ERR(acr)) return PTR_ERR(acr); + /* Support the initial GM20B firmware release without PMU */ + acr->optional_falcons = BIT(NVKM_SECBOOT_FALCON_PMU); gsb = kzalloc(sizeof(*gsb), GFP_KERNEL); if (!gsb) { @@ -137,3 +140,6 @@ MODULE_FIRMWARE("nvidia/gm20b/gr/sw_ctx.bin"); MODULE_FIRMWARE("nvidia/gm20b/gr/sw_nonctx.bin"); MODULE_FIRMWARE("nvidia/gm20b/gr/sw_bundle_init.bin"); MODULE_FIRMWARE("nvidia/gm20b/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/gm20b/pmu/desc.bin"); +MODULE_FIRMWARE("nvidia/gm20b/pmu/image.bin"); +MODULE_FIRMWARE("nvidia/gm20b/pmu/sig.bin"); From 65d9376b74ff651a8d4c6736d32a3bb3aa47bf09 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 15:00:45 +0900 Subject: [PATCH 0676/1299] drm/nouveau/falcon: use NXTCTX register instead of NEW_INSTBLK Both registers allow to bind a new context, but NXTCTX will work on all falcons, while legacy NEW_INSTBLK is reserved to PMU. After setting NXTCTX we trigger a context switch by writing 0x090 and 0x0a4. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/v1.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index 7085432dc861..c8283e2cc1f4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -149,9 +149,12 @@ nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) /* Enable context */ nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1); - nvkm_falcon_wr32(falcon, 0x480, + nvkm_falcon_wr32(falcon, 0x054, ((ctx->addr >> 12) & 0xfffffff) | (inst_loc << 28) | (1 << 30)); + + nvkm_falcon_mask(falcon, 0x090, 0x10000, 0x10000); + nvkm_falcon_mask(falcon, 0x0a4, 0x8, 0x8); } static void From 9e4397579fa4cd5cc411d47815eb805e337d0203 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 22 Feb 2017 20:48:30 +0900 Subject: [PATCH 0677/1299] drm/nouveau/falcon: delay construction of falcons to oneinit() Reading registers at device construction time can be harmful, as there is no guarantee the underlying engine will be up, or in its runtime configuration. Defer register reading to the oneinit() hook and update users accordingly. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/engine/falcon.h | 6 ++++ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 29 +++++++++---------- drivers/gpu/drm/nouveau/nvkm/falcon/base.c | 3 ++ .../gpu/drm/nouveau/nvkm/subdev/pmu/base.c | 10 ++++++- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h index 9384dff81494..b04d3a0e82e6 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h @@ -46,8 +46,14 @@ struct nvkm_falcon { struct nvkm_engine engine; }; +/* This constructor must be called from the owner's oneinit() hook and + * *not* its constructor. This is to ensure that DEVINIT has been + * completed, and that the device is correctly enabled before we touch + * falcon registers. + */ int nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr, struct nvkm_falcon **); + void nvkm_falcon_del(struct nvkm_falcon **); int nvkm_falcon_get(struct nvkm_falcon *, const struct nvkm_subdev *); void nvkm_falcon_put(struct nvkm_falcon *, const struct nvkm_subdev *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index f9acb8a944d2..a4410ef19db5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1647,8 +1647,18 @@ static int gf100_gr_oneinit(struct nvkm_gr *base) { struct gf100_gr *gr = gf100_gr(base); - struct nvkm_device *device = gr->base.engine.subdev.device; + struct nvkm_subdev *subdev = &gr->base.engine.subdev; + struct nvkm_device *device = subdev->device; int i, j; + int ret; + + ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs); + if (ret) + return ret; + + ret = nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs); + if (ret) + return ret; nvkm_pmu_pgob(device->pmu, false); @@ -1856,24 +1866,13 @@ int gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device, int index, struct gf100_gr *gr) { - struct nvkm_subdev *subdev = &gr->base.engine.subdev; - int ret; - gr->func = func; gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW", func->fecs.ucode == NULL); - ret = nvkm_gr_ctor(&gf100_gr_, device, index, - gr->firmware || func->fecs.ucode != NULL, - &gr->base); - if (ret) - return ret; - - ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs); - if (ret) - return ret; - - return nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs); + return nvkm_gr_ctor(&gf100_gr_, device, index, + gr->firmware || func->fecs.ucode != NULL, + &gr->base); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index ee25fdc21e1f..19b9d44ae1a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -137,6 +137,9 @@ nvkm_falcon_clear_interrupt(struct nvkm_falcon *falcon, u32 mask) void nvkm_falcon_put(struct nvkm_falcon *falcon, const struct nvkm_subdev *user) { + if (unlikely(!falcon)) + return; + mutex_lock(&falcon->mutex); if (falcon->user == user) { nvkm_debug(falcon->user, "released %s falcon\n", falcon->name); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c index 1871a92e8f2f..3306f9fe7140 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c @@ -115,6 +115,13 @@ nvkm_pmu_init(struct nvkm_subdev *subdev) return ret; } +static int +nvkm_pmu_oneinit(struct nvkm_subdev *subdev) +{ + struct nvkm_pmu *pmu = nvkm_pmu(subdev); + return nvkm_falcon_v1_new(&pmu->subdev, "PMU", 0x10a000, &pmu->falcon); +} + static void * nvkm_pmu_dtor(struct nvkm_subdev *subdev) { @@ -128,6 +135,7 @@ static const struct nvkm_subdev_func nvkm_pmu = { .dtor = nvkm_pmu_dtor, .preinit = nvkm_pmu_preinit, + .oneinit = nvkm_pmu_oneinit, .init = nvkm_pmu_init, .fini = nvkm_pmu_fini, .intr = nvkm_pmu_intr, @@ -141,7 +149,7 @@ nvkm_pmu_ctor(const struct nvkm_pmu_func *func, struct nvkm_device *device, pmu->func = func; INIT_WORK(&pmu->recv.work, nvkm_pmu_recv); init_waitqueue_head(&pmu->recv.wait); - return nvkm_falcon_v1_new(&pmu->subdev, "PMU", 0x10a000, &pmu->falcon); + return 0; } int From 16307b5d720af3fec8d2153a68e23066e20c481f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 15:16:32 +0900 Subject: [PATCH 0678/1299] drm/nouveau/nvdec: add gp102 support gp10x' secure boot requires a blob to be run on NVDEC. Expose the falcon through a dummy device. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/core/device.h | 4 +- .../drm/nouveau/include/nvkm/engine/nvdec.h | 8 +++ .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- .../gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild | 3 +- .../gpu/drm/nouveau/nvkm/engine/nvdec/base.c | 59 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/engine/nvdec/gp102.c | 30 ++++++++++ .../gpu/drm/nouveau/nvkm/engine/nvdec/priv.h | 6 ++ 7 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gp102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index d426b86e2712..6d6e7e71067b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -155,7 +155,7 @@ struct nvkm_device { struct nvkm_engine *msppp; struct nvkm_engine *msvld; struct nvkm_engine *nvenc[3]; - struct nvkm_engine *nvdec; + struct nvkm_nvdec *nvdec; struct nvkm_pm *pm; struct nvkm_engine *sec; struct nvkm_sw *sw; @@ -225,7 +225,7 @@ struct nvkm_device_chip { int (*msppp )(struct nvkm_device *, int idx, struct nvkm_engine **); int (*msvld )(struct nvkm_device *, int idx, struct nvkm_engine **); int (*nvenc[3])(struct nvkm_device *, int idx, struct nvkm_engine **); - int (*nvdec )(struct nvkm_device *, int idx, struct nvkm_engine **); + int (*nvdec )(struct nvkm_device *, int idx, struct nvkm_nvdec **); int (*pm )(struct nvkm_device *, int idx, struct nvkm_pm **); int (*sec )(struct nvkm_device *, int idx, struct nvkm_engine **); int (*sw )(struct nvkm_device *, int idx, struct nvkm_sw **); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h index 30b76d13fdcb..00b2b227ff41 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h @@ -1,4 +1,12 @@ #ifndef __NVKM_NVDEC_H__ #define __NVKM_NVDEC_H__ +#define nvkm_nvdec(p) container_of((p), struct nvkm_nvdec, engine) #include + +struct nvkm_nvdec { + struct nvkm_engine engine; + struct nvkm_falcon *falcon; +}; + +int gp102_nvdec_new(struct nvkm_device *, int, struct nvkm_nvdec **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 273562dd6bbd..883e6e474d1b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2362,7 +2362,7 @@ nvkm_device_engine(struct nvkm_device *device, int index) _(NVENC0 , device->nvenc[0], device->nvenc[0]); _(NVENC1 , device->nvenc[1], device->nvenc[1]); _(NVENC2 , device->nvenc[2], device->nvenc[2]); - _(NVDEC , device->nvdec , device->nvdec); + _(NVDEC , device->nvdec , &device->nvdec->engine); _(PM , device->pm , &device->pm->engine); _(SEC , device->sec , device->sec); _(SW , device->sw , &device->sw->engine); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild index 13b7c71ff900..98477beb823a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild @@ -1 +1,2 @@ -#nvkm-y += nvkm/engine/nvdec/base.o +nvkm-y += nvkm/engine/nvdec/base.o +nvkm-y += nvkm/engine/nvdec/gp102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c new file mode 100644 index 000000000000..4807021fd990 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +#include + +static int +nvkm_nvdec_oneinit(struct nvkm_engine *engine) +{ + struct nvkm_nvdec *nvdec = nvkm_nvdec(engine); + return nvkm_falcon_v1_new(&nvdec->engine.subdev, "NVDEC", 0x84000, + &nvdec->falcon); +} + +static void * +nvkm_nvdec_dtor(struct nvkm_engine *engine) +{ + struct nvkm_nvdec *nvdec = nvkm_nvdec(engine); + nvkm_falcon_del(&nvdec->falcon); + return nvdec; +} + +static const struct nvkm_engine_func +nvkm_nvdec = { + .dtor = nvkm_nvdec_dtor, + .oneinit = nvkm_nvdec_oneinit, +}; + +int +nvkm_nvdec_new_(struct nvkm_device *device, int index, + struct nvkm_nvdec **pnvdec) +{ + struct nvkm_nvdec *nvdec; + + if (!(nvdec = *pnvdec = kzalloc(sizeof(*nvdec), GFP_KERNEL))) + return -ENOMEM; + + return nvkm_engine_ctor(&nvkm_nvdec, device, index, true, + &nvdec->engine); +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gp102.c new file mode 100644 index 000000000000..fde6328c6d71 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gp102.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "priv.h" + +int +gp102_nvdec_new(struct nvkm_device *device, int index, + struct nvkm_nvdec **pnvdec) +{ + return nvkm_nvdec_new_(device, index, pnvdec); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h new file mode 100644 index 000000000000..353b94f51205 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h @@ -0,0 +1,6 @@ +#ifndef __NVKM_NVDEC_PRIV_H__ +#define __NVKM_NVDEC_PRIV_H__ +#include + +int nvkm_nvdec_new_(struct nvkm_device *, int, struct nvkm_nvdec **); +#endif From b62880f7966781eb0b3b1e50f63d6aac362476d3 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 23 Feb 2017 18:41:41 +0900 Subject: [PATCH 0679/1299] drm/nouveau/core: add SEC2 engine SEC2 is the name given by NVIDIA to the SEC engine post-Fermi (reasons unknown). Even though it shares the same address range as SEC, its usage is quite different and this justifies a new engine. Add this engine and make TOP use it all post-TOP devices should use this implementation and not the older SEC. Also quickly add the short gp102 implementation which will be used for falcon booting purposes. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/core/device.h | 3 + .../drm/nouveau/include/nvkm/engine/sec2.h | 13 +++ drivers/gpu/drm/nouveau/nvkm/core/subdev.c | 1 + drivers/gpu/drm/nouveau/nvkm/engine/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 + .../gpu/drm/nouveau/nvkm/engine/device/priv.h | 1 + .../gpu/drm/nouveau/nvkm/engine/sec2/Kbuild | 2 + .../gpu/drm/nouveau/nvkm/engine/sec2/base.c | 101 ++++++++++++++++++ .../gpu/drm/nouveau/nvkm/engine/sec2/gp102.c | 30 ++++++ .../gpu/drm/nouveau/nvkm/engine/sec2/priv.h | 9 ++ .../gpu/drm/nouveau/nvkm/subdev/top/gk104.c | 2 +- 11 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h index 6d6e7e71067b..bb4c214f1046 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h @@ -59,6 +59,7 @@ enum nvkm_devidx { NVKM_ENGINE_NVDEC, NVKM_ENGINE_PM, NVKM_ENGINE_SEC, + NVKM_ENGINE_SEC2, NVKM_ENGINE_SW, NVKM_ENGINE_VIC, NVKM_ENGINE_VP, @@ -158,6 +159,7 @@ struct nvkm_device { struct nvkm_nvdec *nvdec; struct nvkm_pm *pm; struct nvkm_engine *sec; + struct nvkm_sec2 *sec2; struct nvkm_sw *sw; struct nvkm_engine *vic; struct nvkm_engine *vp; @@ -228,6 +230,7 @@ struct nvkm_device_chip { int (*nvdec )(struct nvkm_device *, int idx, struct nvkm_nvdec **); int (*pm )(struct nvkm_device *, int idx, struct nvkm_pm **); int (*sec )(struct nvkm_device *, int idx, struct nvkm_engine **); + int (*sec2 )(struct nvkm_device *, int idx, struct nvkm_sec2 **); int (*sw )(struct nvkm_device *, int idx, struct nvkm_sw **); int (*vic )(struct nvkm_device *, int idx, struct nvkm_engine **); int (*vp )(struct nvkm_device *, int idx, struct nvkm_engine **); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h new file mode 100644 index 000000000000..d3db1b1e75c4 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h @@ -0,0 +1,13 @@ +#ifndef __NVKM_SEC2_H__ +#define __NVKM_SEC2_H__ +#include + +struct nvkm_sec2 { + struct nvkm_engine engine; + struct nvkm_falcon *falcon; + struct nvkm_msgqueue *queue; + struct work_struct work; +}; + +int gp102_sec2_new(struct nvkm_device *, int, struct nvkm_sec2 **); +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c index 19044aba265e..a134d225f958 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c @@ -78,6 +78,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = { [NVKM_ENGINE_NVDEC ] = "nvdec", [NVKM_ENGINE_PM ] = "pm", [NVKM_ENGINE_SEC ] = "sec", + [NVKM_ENGINE_SEC2 ] = "sec2", [NVKM_ENGINE_SW ] = "sw", [NVKM_ENGINE_VIC ] = "vic", [NVKM_ENGINE_VP ] = "vp", diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild index c2c8d2ac01b8..78571e8b01c5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/Kbuild @@ -18,6 +18,7 @@ include $(src)/nvkm/engine/nvenc/Kbuild include $(src)/nvkm/engine/nvdec/Kbuild include $(src)/nvkm/engine/pm/Kbuild include $(src)/nvkm/engine/sec/Kbuild +include $(src)/nvkm/engine/sec2/Kbuild include $(src)/nvkm/engine/sw/Kbuild include $(src)/nvkm/engine/vic/Kbuild include $(src)/nvkm/engine/vp/Kbuild diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index 883e6e474d1b..cf76d6c657ba 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2365,6 +2365,7 @@ nvkm_device_engine(struct nvkm_device *device, int index) _(NVDEC , device->nvdec , &device->nvdec->engine); _(PM , device->pm , &device->pm->engine); _(SEC , device->sec , device->sec); + _(SEC2 , device->sec2 , &device->sec2->engine); _(SW , device->sw , &device->sw->engine); _(VIC , device->vic , device->vic); _(VP , device->vp , device->vp); @@ -2812,6 +2813,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, _(NVKM_ENGINE_NVDEC , nvdec); _(NVKM_ENGINE_PM , pm); _(NVKM_ENGINE_SEC , sec); + _(NVKM_ENGINE_SEC2 , sec2); _(NVKM_ENGINE_SW , sw); _(NVKM_ENGINE_VIC , vic); _(NVKM_ENGINE_VP , vp); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h index 1a06ac175f55..6c16f3835f44 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild new file mode 100644 index 000000000000..4b17254cfbd0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild @@ -0,0 +1,2 @@ +nvkm-y += nvkm/engine/sec2/base.o +nvkm-y += nvkm/engine/sec2/gp102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c new file mode 100644 index 000000000000..814daf35e21f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "priv.h" + +#include +#include + +static void * +nvkm_sec2_dtor(struct nvkm_engine *engine) +{ + struct nvkm_sec2 *sec2 = nvkm_sec2(engine); + nvkm_msgqueue_del(&sec2->queue); + nvkm_falcon_del(&sec2->falcon); + return sec2; +} + +static void +nvkm_sec2_intr(struct nvkm_engine *engine) +{ + struct nvkm_sec2 *sec2 = nvkm_sec2(engine); + struct nvkm_subdev *subdev = &engine->subdev; + struct nvkm_device *device = subdev->device; + u32 disp = nvkm_rd32(device, 0x8701c); + u32 intr = nvkm_rd32(device, 0x87008) & disp & ~(disp >> 16); + + if (intr & 0x00000040) { + schedule_work(&sec2->work); + nvkm_wr32(device, 0x87004, 0x00000040); + intr &= ~0x00000040; + } + + if (intr) { + nvkm_error(subdev, "unhandled intr %08x\n", intr); + nvkm_wr32(device, 0x87004, intr); + + } +} + +static void +nvkm_sec2_recv(struct work_struct *work) +{ + struct nvkm_sec2 *sec2 = container_of(work, typeof(*sec2), work); + nvkm_msgqueue_recv(sec2->queue); +} + + +static int +nvkm_sec2_oneinit(struct nvkm_engine *engine) +{ + struct nvkm_sec2 *sec2 = nvkm_sec2(engine); + return nvkm_falcon_v1_new(&sec2->engine.subdev, "SEC2", 0x87000, + &sec2->falcon); +} + +static int +nvkm_sec2_fini(struct nvkm_engine *engine, bool suspend) +{ + struct nvkm_sec2 *sec2 = nvkm_sec2(engine); + flush_work(&sec2->work); + return 0; +} + +static const struct nvkm_engine_func +nvkm_sec2 = { + .dtor = nvkm_sec2_dtor, + .oneinit = nvkm_sec2_oneinit, + .fini = nvkm_sec2_fini, + .intr = nvkm_sec2_intr, +}; + +int +nvkm_sec2_new_(struct nvkm_device *device, int index, + struct nvkm_sec2 **psec2) +{ + struct nvkm_sec2 *sec2; + + if (!(sec2 = *psec2 = kzalloc(sizeof(*sec2), GFP_KERNEL))) + return -ENOMEM; + INIT_WORK(&sec2->work, nvkm_sec2_recv); + + return nvkm_engine_ctor(&nvkm_sec2, device, index, true, &sec2->engine); +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c new file mode 100644 index 000000000000..9be1524c08f5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "priv.h" + +int +gp102_sec2_new(struct nvkm_device *device, int index, + struct nvkm_sec2 **psec2) +{ + return nvkm_sec2_new_(device, index, psec2); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h new file mode 100644 index 000000000000..7ecc9d4724dc --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h @@ -0,0 +1,9 @@ +#ifndef __NVKM_SEC2_PRIV_H__ +#define __NVKM_SEC2_PRIV_H__ +#include + +#define nvkm_sec2(p) container_of((p), struct nvkm_sec2, engine) + +int nvkm_sec2_new_(struct nvkm_device *, int, struct nvkm_sec2 **); + +#endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c index efac3402f9dd..fea4957291da 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c @@ -82,7 +82,7 @@ gk104_top_oneinit(struct nvkm_top *top) case 0x0000000a: A_(MSVLD ); break; case 0x0000000b: A_(MSENC ); break; case 0x0000000c: A_(VIC ); break; - case 0x0000000d: A_(SEC ); break; + case 0x0000000d: A_(SEC2 ); break; case 0x0000000e: B_(NVENC ); break; case 0x0000000f: A_(NVENC1); break; case 0x00000010: A_(NVDEC ); break; From ad147b7f57547a5597ed338f2c46f03809d7792e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Wed, 22 Feb 2017 20:50:09 +0900 Subject: [PATCH 0680/1299] drm/nouveau/falcon: better detection of debug register Not all falcons have a debug register, and it is not always found at the same offset. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/base.c | 27 ++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index 19b9d44ae1a9..a4c6ee6ffde6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -170,6 +170,7 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func, struct nvkm_subdev *subdev, const char *name, u32 addr, struct nvkm_falcon *falcon) { + u32 debug_reg; u32 reg; falcon->func = func; @@ -189,8 +190,30 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func, falcon->code.limit = (reg & 0x1ff) << 8; falcon->data.limit = (reg & 0x3fe00) >> 1; - reg = nvkm_falcon_rd32(falcon, 0xc08); - falcon->debug = (reg >> 20) & 0x1; + switch (subdev->index) { + case NVKM_ENGINE_GR: + debug_reg = 0x0; + break; + case NVKM_SUBDEV_PMU: + debug_reg = 0xc08; + break; + case NVKM_ENGINE_NVDEC: + debug_reg = 0xd00; + break; + case NVKM_ENGINE_SEC2: + debug_reg = 0x408; + break; + default: + nvkm_warn(subdev, "unsupported falcon %s!\n", + nvkm_subdev_name[subdev->index]); + debug_reg = 0; + break; + } + + if (debug_reg) { + u32 val = nvkm_falcon_rd32(falcon, debug_reg); + falcon->debug = (val >> 20) & 0x1; + } } void From cfd044b02873b02236bcd93ff398504d489ddc13 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 16:49:43 +0900 Subject: [PATCH 0681/1299] drm/nouveau/falcon: fix base address of FBIF registers All falcons have their FBIF registers starting at offset 0x600, with the exception of the PMU and NVENC engines. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/v1.c | 25 +++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index c8283e2cc1f4..2a3c8bfb084a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -120,6 +120,7 @@ static void nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) { u32 inst_loc; + u32 fbif; /* disable instance block binding */ if (ctx == NULL) { @@ -127,15 +128,29 @@ nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx) return; } + switch (falcon->owner->index) { + case NVKM_ENGINE_NVENC0: + case NVKM_ENGINE_NVENC1: + case NVKM_ENGINE_NVENC2: + fbif = 0x800; + break; + case NVKM_SUBDEV_PMU: + fbif = 0xe00; + break; + default: + fbif = 0x600; + break; + } + nvkm_falcon_wr32(falcon, 0x10c, 0x1); /* setup apertures - virtual */ - nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_UCODE, 0x4); - nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_VIRT, 0x0); + nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_UCODE, 0x4); + nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_VIRT, 0x0); /* setup apertures - physical */ - nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_VID, 0x4); - nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_COH, 0x5); - nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6); + nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_VID, 0x4); + nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_SYS_COH, 0x5); + nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6); /* Set context */ switch (nvkm_memory_target(ctx->memory)) { From 6ac2cc209e0096dabc8e902a8620d45f41b9fc0b Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 14 Feb 2017 15:56:10 +0900 Subject: [PATCH 0682/1299] drm/nouveau/falcon: support for EMEM On SEC, DMEM is unaccessible by the CPU when the falcon is running in LS mode. This makes communication with the firmware using DMEM impossible. For this purpose, a new kind of memory (EMEM) has been added. It works similarly to DMEM, with the difference that its address space starts at 0x1000000. For this reason, it makes sense to treat it like a special case of DMEM. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/engine/falcon.h | 1 + drivers/gpu/drm/nouveau/nvkm/falcon/base.c | 1 + drivers/gpu/drm/nouveau/nvkm/falcon/v1.c | 63 +++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h index b04d3a0e82e6..9f84ef24a8b3 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h @@ -25,6 +25,7 @@ struct nvkm_falcon { u8 version; u8 secret; bool debug; + bool has_emem; struct nvkm_memory *core; bool external; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index a4c6ee6ffde6..1b7f48efd8b1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -202,6 +202,7 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func, break; case NVKM_ENGINE_SEC2: debug_reg = 0x408; + falcon->has_emem = true; break; default: nvkm_warn(subdev, "unsupported falcon %s!\n", diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c index 2a3c8bfb084a..669c24028470 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c @@ -64,6 +64,33 @@ nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start, nvkm_falcon_wr32(falcon, 0x184 + (port * 16), 0); } +static void +nvkm_falcon_v1_load_emem(struct nvkm_falcon *falcon, void *data, u32 start, + u32 size, u8 port) +{ + u8 rem = size % 4; + int i; + + size -= rem; + + nvkm_falcon_wr32(falcon, 0xac0 + (port * 8), start | (0x1 << 24)); + for (i = 0; i < size / 4; i++) + nvkm_falcon_wr32(falcon, 0xac4 + (port * 8), ((u32 *)data)[i]); + + /* + * If size is not a multiple of 4, mask the last word to ensure garbage + * does not get written + */ + if (rem) { + u32 extra = ((u32 *)data)[i]; + + nvkm_falcon_wr32(falcon, 0xac4 + (port * 8), + extra & (BIT(rem * 8) - 1)); + } +} + +static const u32 EMEM_START_ADDR = 0x1000000; + static void nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start, u32 size, u8 port) @@ -71,6 +98,11 @@ nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start, u8 rem = size % 4; int i; + if (start >= EMEM_START_ADDR && falcon->has_emem) + return nvkm_falcon_v1_load_emem(falcon, data, + start - EMEM_START_ADDR, size, + port); + size -= rem; nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), start | (0x1 << 24)); @@ -89,6 +121,33 @@ nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start, } } +static void +nvkm_falcon_v1_read_emem(struct nvkm_falcon *falcon, u32 start, u32 size, + u8 port, void *data) +{ + u8 rem = size % 4; + int i; + + size -= rem; + + nvkm_falcon_wr32(falcon, 0xac0 + (port * 8), start | (0x1 << 25)); + for (i = 0; i < size / 4; i++) + ((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0xac4 + (port * 8)); + + /* + * If size is not a multiple of 4, mask the last word to ensure garbage + * does not get read + */ + if (rem) { + u32 extra = nvkm_falcon_rd32(falcon, 0xac4 + (port * 8)); + + for (i = size; i < size + rem; i++) { + ((u8 *)data)[i] = (u8)(extra & 0xff); + extra >>= 8; + } + } +} + static void nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port, void *data) @@ -96,6 +155,10 @@ nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 rem = size % 4; int i; + if (start >= EMEM_START_ADDR && falcon->has_emem) + return nvkm_falcon_v1_read_emem(falcon, start - EMEM_START_ADDR, + size, port, data); + size -= rem; nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), start | (0x1 << 25)); From 9706d8f9d1ebc499192f8974abba03acdcf1094a Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 23 Feb 2017 18:14:30 +0900 Subject: [PATCH 0683/1299] drm/nouveau/falcon/msgqueue: add SEC2 support Add support for running a msgqueue on the SEC2 falcon. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c index 8583fe1c2183..59ed1c45bd65 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c @@ -396,6 +396,9 @@ msgqueue_handle_init_msg(struct nvkm_msgqueue *priv, case NVKM_SUBDEV_PMU: tail_reg = 0x4cc; break; + case NVKM_ENGINE_SEC2: + tail_reg = 0xa34; + break; default: nvkm_error(subdev, "falcon %s unsupported for msgqueue!\n", nvkm_subdev_name[falcon->owner->index]); From e9462417f1fedec381dd148cbc1d29d21bec4500 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 15 Nov 2016 16:28:37 +0900 Subject: [PATCH 0684/1299] drm/nouveau/secboot: add shadow blob argument ACR firmware from r364 on need a shadow region for the ACR to copy the WPR region into. Add a flag to indicate that a shadow region is required and manage memory allocations accordingly. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 16 ++++++++++++++-- .../drm/nouveau/nvkm/subdev/secboot/acr_r352.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 9e64b7c8b0b2..814f9f2a17ef 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -491,7 +491,7 @@ acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size) struct ls_ucode_img *img, *t; unsigned long managed_falcons = acr->base.managed_falcons; int managed_count = 0; - u32 image_wpr_size; + u32 image_wpr_size, ls_blob_size; int falcon_id; int ret; @@ -542,8 +542,17 @@ acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size) image_wpr_size = acr->func->ls_fill_headers(acr, &imgs); image_wpr_size = ALIGN(image_wpr_size, WPR_ALIGNMENT); + ls_blob_size = image_wpr_size; + + /* + * If we need a shadow area, allocate twice the size and use the + * upper half as WPR + */ + if (wpr_size == 0 && acr->func->shadow_blob) + ls_blob_size *= 2; + /* Allocate GPU object that will contain the WPR region */ - ret = nvkm_gpuobj_new(subdev->device, image_wpr_size, WPR_ALIGNMENT, + ret = nvkm_gpuobj_new(subdev->device, ls_blob_size, WPR_ALIGNMENT, false, NULL, &acr->ls_blob); if (ret) goto cleanup; @@ -554,6 +563,9 @@ acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size) /* If WPR address and size are not fixed, set them to fit the LS blob */ if (wpr_size == 0) { wpr_addr = acr->ls_blob->addr; + if (acr->func->shadow_blob) + wpr_addr += acr->ls_blob->size / 2; + wpr_size = image_wpr_size; /* * But if the WPR region is set by the bootloader, it is illegal for diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index 9ca23c1e3aa9..1b9f5c825c8a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -104,6 +104,7 @@ struct acr_r352_func { u64); void (*fixup_hs_desc)(struct acr_r352 *, struct nvkm_secboot *, void *); u32 hs_bl_desc_size; + bool shadow_blob; struct ls_ucode_img *(*ls_ucode_img_load)(const struct acr_r352 *, enum nvkm_secboot_falcon); From c3433603cab756bcad0d8629c291f43b4f591b7d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 16:04:14 +0900 Subject: [PATCH 0685/1299] drm/nouveau/secboot: get start address of blob from ACR The start address used for secure blobs is not unique to the ACR, but rather blob-dependent. Remove the unique member stored in the ACR structure and make the load function return the start address for the current blob instead. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h | 2 -- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 10 ++++------ drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c | 7 +++++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h index 580036d623b2..9738340d33a1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h @@ -49,7 +49,6 @@ struct nvkm_acr_func { * @boot_falcon: ID of the falcon that will perform secure boot * @managed_falcons: bitfield of falcons managed by this ACR * @optional_falcons: bitfield of falcons we can live without - * @start_address: virtual start address of the HS bootloader */ struct nvkm_acr { const struct nvkm_acr_func *func; @@ -58,7 +57,6 @@ struct nvkm_acr { enum nvkm_secboot_falcon boot_falcon; unsigned long managed_falcons; unsigned long optional_falcons; - u32 start_address; }; void *nvkm_acr_load_firmware(const struct nvkm_subdev *, const char *, size_t); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 814f9f2a17ef..83395147e855 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -754,9 +754,6 @@ acr_r352_prepare_hsbl_blob(struct acr_r352 *acr) hdr = acr->hsbl_blob; hsbl_desc = acr->hsbl_blob + hdr->header_offset; - /* virtual start address for boot vector */ - acr->base.start_address = hsbl_desc->start_tag << 8; - return 0; } @@ -813,8 +810,9 @@ acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb) } /** - * acr_r352_load() - prepare HS falcon to run the specified blob, mapped - * at GPU address offset. + * acr_r352_load() - prepare HS falcon to run the specified blob, mapped. + * + * Returns the start address to use, or a negative error value. */ static int acr_r352_load(struct nvkm_acr *_acr, struct nvkm_secboot *sb, @@ -861,7 +859,7 @@ acr_r352_load(struct nvkm_acr *_acr, struct nvkm_secboot *sb, nvkm_falcon_load_dmem(falcon, bl_desc, hsbl_desc->dmem_load_off, bl_desc_size, 0); - return 0; + return hsbl_desc->start_tag << 8; } static int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c index 813c4eb0b25f..b21e91778f86 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c @@ -40,6 +40,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob) struct nvkm_subdev *subdev = &gsb->base.subdev; struct nvkm_falcon *falcon = gsb->base.boot_falcon; struct nvkm_vma vma; + u32 start_address; int ret; ret = nvkm_falcon_get(falcon, subdev); @@ -61,9 +62,11 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob) /* Load the HS bootloader into the falcon's IMEM/DMEM */ ret = sb->acr->func->load(sb->acr, &gsb->base, blob, vma.offset); - if (ret) + if (ret < 0) goto end; + start_address = ret; + /* Disable interrupts as we will poll for the HALT bit */ nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, false); @@ -71,7 +74,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob) nvkm_falcon_wr32(falcon, 0x040, 0xdeada5a5); /* Start the HS bootloader */ - nvkm_falcon_set_start_addr(falcon, sb->acr->start_address); + nvkm_falcon_set_start_addr(falcon, start_address); nvkm_falcon_start(falcon); ret = nvkm_falcon_wait_for_halt(falcon, 100); if (ret) From 48387f0ca5493258add078de7f5520756ddc510a Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 16:56:45 +0900 Subject: [PATCH 0686/1299] drm/nouveau/secboot: support running ACR on SEC Add support for running the ACR binary on the SEC falcon. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/secboot.h | 3 ++- .../drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 18 +++++++++++++++--- .../gpu/drm/nouveau/nvkm/subdev/secboot/base.c | 8 ++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h index d03a1b086723..eb46fbf26df6 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h @@ -30,7 +30,8 @@ enum nvkm_secboot_falcon { NVKM_SECBOOT_FALCON_RESERVED = 1, NVKM_SECBOOT_FALCON_FECS = 2, NVKM_SECBOOT_FALCON_GPCCS = 3, - NVKM_SECBOOT_FALCON_END = 4, + NVKM_SECBOOT_FALCON_SEC2 = 7, + NVKM_SECBOOT_FALCON_END = 8, NVKM_SECBOOT_FALCON_INVALID = 0xffffffff, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 83395147e855..0b563bc9daf8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -28,6 +28,7 @@ #include #include #include +#include /** * struct hsf_fw_header - HS firmware descriptor @@ -1017,7 +1018,7 @@ acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon) { struct acr_r352 *acr = acr_r352(_acr); - struct nvkm_pmu *pmu = sb->subdev.device->pmu; + struct nvkm_msgqueue *queue; const char *fname = nvkm_secboot_falcon_name[falcon]; bool wpr_already_set = sb->wpr_set; int ret; @@ -1037,9 +1038,20 @@ acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb, return ret; } - /* Otherwise just ask the PMU to reset the falcon */ + switch (_acr->boot_falcon) { + case NVKM_SECBOOT_FALCON_PMU: + queue = sb->subdev.device->pmu->queue; + break; + case NVKM_SECBOOT_FALCON_SEC2: + queue = sb->subdev.device->sec2->queue; + break; + default: + return -EINVAL; + } + + /* Otherwise just ask the LS firmware to reset the falcon */ nvkm_debug(&sb->subdev, "resetting %s falcon\n", fname); - ret = nvkm_msgqueue_acr_boot_falcon(pmu->queue, falcon); + ret = nvkm_msgqueue_acr_boot_falcon(queue, falcon); if (ret) { nvkm_error(&sb->subdev, "cannot boot %s falcon\n", fname); return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c index 27c9dfffb9a6..faf94a457670 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c @@ -87,6 +87,7 @@ #include #include #include +#include const char * nvkm_secboot_falcon_name[] = { @@ -94,6 +95,7 @@ nvkm_secboot_falcon_name[] = { [NVKM_SECBOOT_FALCON_RESERVED] = "", [NVKM_SECBOOT_FALCON_FECS] = "FECS", [NVKM_SECBOOT_FALCON_GPCCS] = "GPCCS", + [NVKM_SECBOOT_FALCON_SEC2] = "SEC2", [NVKM_SECBOOT_FALCON_END] = "", }; /** @@ -133,11 +135,17 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev) case NVKM_SECBOOT_FALCON_PMU: sb->boot_falcon = subdev->device->pmu->falcon; break; + case NVKM_SECBOOT_FALCON_SEC2: + /* we must keep SEC2 alive forever since ACR will run on it */ + nvkm_engine_ref(&subdev->device->sec2->engine); + sb->boot_falcon = subdev->device->sec2->falcon; + break; default: nvkm_error(subdev, "Unmanaged boot falcon %s!\n", nvkm_secboot_falcon_name[sb->acr->boot_falcon]); return -EINVAL; } + nvkm_debug(subdev, "using %s falcon for ACR\n", sb->boot_falcon->name); /* Call chip-specific init function */ if (sb->func->oneinit) From 114223aa1a7f9926960d2c3d6b2b51317df1aa9e Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 16:57:24 +0900 Subject: [PATCH 0687/1299] drm/nouveau/secboot: add support for SEC LS firmware Support running a message queue firmware on SEC. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/engine/falcon.h | 1 + .../nouveau/nvkm/subdev/secboot/acr_r361.c | 42 +++++++++++++++++++ .../nouveau/nvkm/subdev/secboot/ls_ucode.h | 2 + .../nvkm/subdev/secboot/ls_ucode_msgqueue.c | 32 ++++++++++++++ 4 files changed, 77 insertions(+) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h index 9f84ef24a8b3..e1a854e2ade1 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h @@ -10,6 +10,7 @@ enum nvkm_falcon_dmaidx { FALCON_DMAIDX_PHYS_VID = 2, FALCON_DMAIDX_PHYS_SYS_COH = 3, FALCON_DMAIDX_PHYS_SYS_NCOH = 4, + FALCON_SEC2_DMAIDX_UCODE = 6, }; struct nvkm_falcon { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c index 1ed23e77e4e9..042bd68f6d44 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c @@ -25,6 +25,7 @@ #include #include #include +#include /** * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor @@ -169,6 +170,46 @@ acr_r361_ls_pmu_func = { .post_run = acr_ls_pmu_post_run, }; +static void +acr_r361_generate_sec2_bl_desc(const struct nvkm_acr *acr, + const struct ls_ucode_img *img, u64 wpr_addr, + void *_desc) +{ + const struct ls_ucode_img_desc *pdesc = &img->ucode_desc; + const struct nvkm_sec2 *sec = acr->subdev->device->sec2; + struct acr_r361_pmu_bl_desc *desc = _desc; + u64 base, addr_code, addr_data; + u32 addr_args; + + base = wpr_addr + img->ucode_off + pdesc->app_start_offset; + /* For some reason we should not add app_resident_code_offset here */ + addr_code = base; + addr_data = base + pdesc->app_resident_data_offset; + addr_args = sec->falcon->data.limit; + addr_args -= NVKM_MSGQUEUE_CMDLINE_SIZE; + + desc->dma_idx = FALCON_SEC2_DMAIDX_UCODE; + desc->code_dma_base = u64_to_flcn64(addr_code); + desc->total_code_size = pdesc->app_size; + desc->code_size_to_load = pdesc->app_resident_code_size; + desc->code_entry_point = pdesc->app_imem_entry; + desc->data_dma_base = u64_to_flcn64(addr_data); + desc->data_size = pdesc->app_resident_data_size; + desc->overlay_dma_base = u64_to_flcn64(addr_code); + desc->argc = 1; + /* args are stored at the beginning of EMEM */ + desc->argv = 0x01000000; +} + +const struct acr_r352_ls_func +acr_r361_ls_sec2_func = { + .load = acr_ls_ucode_load_sec2, + .generate_bl_desc = acr_r361_generate_sec2_bl_desc, + .bl_desc_size = sizeof(struct acr_r361_pmu_bl_desc), + .post_run = acr_ls_sec2_post_run, +}; + + const struct acr_r352_func acr_r361_func = { .fixup_hs_desc = acr_r352_fixup_hs_desc, @@ -181,6 +222,7 @@ acr_r361_func = { [NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func, [NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func, [NVKM_SECBOOT_FALCON_PMU] = &acr_r361_ls_pmu_func, + [NVKM_SECBOOT_FALCON_SEC2] = &acr_r361_ls_sec2_func, }, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h index 9ea295d41861..4ff9138a2a83 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h @@ -151,5 +151,7 @@ int acr_ls_ucode_load_fecs(const struct nvkm_subdev *, struct ls_ucode_img *); int acr_ls_ucode_load_gpccs(const struct nvkm_subdev *, struct ls_ucode_img *); int acr_ls_ucode_load_pmu(const struct nvkm_subdev *, struct ls_ucode_img *); void acr_ls_pmu_post_run(const struct nvkm_acr *, const struct nvkm_secboot *); +int acr_ls_ucode_load_sec2(const struct nvkm_subdev *, struct ls_ucode_img *); +void acr_ls_sec2_post_run(const struct nvkm_acr *, const struct nvkm_secboot *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c index 333dd2068a8a..ef0b298b70d7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_msgqueue.c @@ -27,6 +27,7 @@ #include #include #include +#include /** * acr_ls_ucode_load_msgqueue - load and prepare a ucode img for a msgqueue fw @@ -115,3 +116,34 @@ acr_ls_pmu_post_run(const struct nvkm_acr *acr, const struct nvkm_secboot *sb) acr_ls_msgqueue_post_run(pmu->queue, pmu->falcon, addr_args); } + +int +acr_ls_ucode_load_sec2(const struct nvkm_subdev *subdev, + struct ls_ucode_img *img) +{ + struct nvkm_sec2 *sec = subdev->device->sec2; + int ret; + + ret = acr_ls_ucode_load_msgqueue(subdev, "sec2", img); + if (ret) + return ret; + + /* Allocate the PMU queue corresponding to the FW version */ + ret = nvkm_msgqueue_new(img->ucode_desc.app_version, sec->falcon, + &sec->queue); + if (ret) + return ret; + + return 0; +} + +void +acr_ls_sec2_post_run(const struct nvkm_acr *acr, const struct nvkm_secboot *sb) +{ + struct nvkm_device *device = sb->subdev.device; + struct nvkm_sec2 *sec = device->sec2; + /* on SEC arguments are always at the beginning of EMEM */ + u32 addr_args = 0x01000000; + + acr_ls_msgqueue_post_run(sec->queue, sec->falcon, addr_args); +} From a13edd0b21748b76220c60f5c73f49e99591ab0d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Feb 2017 20:13:59 +0900 Subject: [PATCH 0688/1299] drm/nouveau/secboot: share r361 BL structures and functions Share elements of r361 that will be reused in other ACRs. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r361.c | 41 +---------- .../nouveau/nvkm/subdev/secboot/acr_r361.h | 72 +++++++++++++++++++ 2 files changed, 74 insertions(+), 39 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.h diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c index 042bd68f6d44..14b36ef93628 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c @@ -20,50 +20,13 @@ * DEALINGS IN THE SOFTWARE. */ -#include "acr_r352.h" +#include "acr_r361.h" #include #include #include #include -/** - * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor - * @signature: 16B signature for secure code. 0s if no secure code - * @ctx_dma: DMA context to be used by BL while loading code/data - * @code_dma_base: 256B-aligned Physical FB Address where code is located - * (falcon's $xcbase register) - * @non_sec_code_off: offset from code_dma_base where the non-secure code is - * located. The offset must be multiple of 256 to help perf - * @non_sec_code_size: the size of the nonSecure code part. - * @sec_code_off: offset from code_dma_base where the secure code is - * located. The offset must be multiple of 256 to help perf - * @sec_code_size: offset from code_dma_base where the secure code is - * located. The offset must be multiple of 256 to help perf - * @code_entry_point: code entry point which will be invoked by BL after - * code is loaded. - * @data_dma_base: 256B aligned Physical FB Address where data is located. - * (falcon's $xdbase register) - * @data_size: size of data block. Should be multiple of 256B - * - * Structure used by the bootloader to load the rest of the code. This has - * to be filled by host and copied into DMEM at offset provided in the - * hsflcn_bl_desc.bl_desc_dmem_load_off. - */ -struct acr_r361_flcn_bl_desc { - u32 reserved[4]; - u32 signature[4]; - u32 ctx_dma; - struct flcn_u64 code_dma_base; - u32 non_sec_code_off; - u32 non_sec_code_size; - u32 sec_code_off; - u32 sec_code_size; - u32 code_entry_point; - struct flcn_u64 data_dma_base; - u32 data_size; -}; - static void acr_r361_generate_flcn_bl_desc(const struct nvkm_acr *acr, const struct ls_ucode_img *img, u64 wpr_addr, @@ -86,7 +49,7 @@ acr_r361_generate_flcn_bl_desc(const struct nvkm_acr *acr, desc->data_size = pdesc->app_resident_data_size; } -static void +void acr_r361_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc, u64 offset) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.h new file mode 100644 index 000000000000..f9f978daadb9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVKM_SECBOOT_ACR_R361_H__ +#define __NVKM_SECBOOT_ACR_R361_H__ + +#include "acr_r352.h" + +/** + * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor + * @signature: 16B signature for secure code. 0s if no secure code + * @ctx_dma: DMA context to be used by BL while loading code/data + * @code_dma_base: 256B-aligned Physical FB Address where code is located + * (falcon's $xcbase register) + * @non_sec_code_off: offset from code_dma_base where the non-secure code is + * located. The offset must be multiple of 256 to help perf + * @non_sec_code_size: the size of the nonSecure code part. + * @sec_code_off: offset from code_dma_base where the secure code is + * located. The offset must be multiple of 256 to help perf + * @sec_code_size: offset from code_dma_base where the secure code is + * located. The offset must be multiple of 256 to help perf + * @code_entry_point: code entry point which will be invoked by BL after + * code is loaded. + * @data_dma_base: 256B aligned Physical FB Address where data is located. + * (falcon's $xdbase register) + * @data_size: size of data block. Should be multiple of 256B + * + * Structure used by the bootloader to load the rest of the code. This has + * to be filled by host and copied into DMEM at offset provided in the + * hsflcn_bl_desc.bl_desc_dmem_load_off. + */ +struct acr_r361_flcn_bl_desc { + u32 reserved[4]; + u32 signature[4]; + u32 ctx_dma; + struct flcn_u64 code_dma_base; + u32 non_sec_code_off; + u32 non_sec_code_size; + u32 sec_code_off; + u32 sec_code_size; + u32 code_entry_point; + struct flcn_u64 data_dma_base; + u32 data_size; +}; + +void acr_r361_generate_hs_bl_desc(const struct hsf_load_header *, void *, u64); + +extern const struct acr_r352_ls_func acr_r361_ls_fecs_func; +extern const struct acr_r352_ls_func acr_r361_ls_gpccs_func; +extern const struct acr_r352_ls_func acr_r361_ls_pmu_func; +extern const struct acr_r352_ls_func acr_r361_ls_sec2_func; + +#endif From 7defd1daacef6bfae5387e95bcd7b57c9183aaf7 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 17:18:49 +0900 Subject: [PATCH 0689/1299] drm/nouveau/secboot: support for different load and unload falcons On some secure boot instances (e.g. gp10x) the load and unload blobs do not run on the same falcon. Support this case by introducing a new member to the ACR structure and making related functions take the falcon to use as an argument instead of assuming the boot falcon is to be used. The rule is that the load blob can be run on either the SEC or PMU falcons, but the unload blob must be always run on PMU. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/secboot.h | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h | 2 +- .../gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c | 12 ++++++------ drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c | 3 ++- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c | 8 ++++---- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h | 3 ++- drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h | 3 ++- 7 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h index eb46fbf26df6..03da24e2c229 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h @@ -45,6 +45,7 @@ struct nvkm_secboot { struct nvkm_acr *acr; struct nvkm_subdev subdev; struct nvkm_falcon *boot_falcon; + struct nvkm_falcon *halt_falcon; u64 wpr_addr; u32 wpr_size; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h index 9738340d33a1..15aa443658ad 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h @@ -37,7 +37,7 @@ struct nvkm_acr_func { void (*dtor)(struct nvkm_acr *); int (*oneinit)(struct nvkm_acr *, struct nvkm_secboot *); int (*fini)(struct nvkm_acr *, struct nvkm_secboot *, bool); - int (*load)(struct nvkm_acr *, struct nvkm_secboot *, + int (*load)(struct nvkm_acr *, struct nvkm_falcon *, struct nvkm_gpuobj *, u64); int (*reset)(struct nvkm_acr *, struct nvkm_secboot *, enum nvkm_secboot_falcon); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 0b563bc9daf8..613ed363a6bf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -816,11 +816,10 @@ acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb) * Returns the start address to use, or a negative error value. */ static int -acr_r352_load(struct nvkm_acr *_acr, struct nvkm_secboot *sb, +acr_r352_load(struct nvkm_acr *_acr, struct nvkm_falcon *falcon, struct nvkm_gpuobj *blob, u64 offset) { struct acr_r352 *acr = acr_r352(_acr); - struct nvkm_falcon *falcon = sb->boot_falcon; struct fw_bin_header *hdr = acr->hsbl_blob; struct fw_bl_desc *hsbl_desc = acr->hsbl_blob + hdr->header_offset; void *blob_data = acr->hsbl_blob + hdr->data_offset; @@ -873,7 +872,7 @@ acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb) int ret; nvkm_debug(&sb->subdev, "running HS unload blob\n"); - ret = sb->func->run_blob(sb, acr->unload_blob); + ret = sb->func->run_blob(sb, acr->unload_blob, sb->halt_falcon); if (ret) return ret; nvkm_debug(&sb->subdev, "HS unload blob completed\n"); @@ -935,7 +934,7 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) return ret; nvkm_debug(subdev, "running HS load blob\n"); - ret = sb->func->run_blob(sb, acr->load_blob); + ret = sb->func->run_blob(sb, acr->load_blob, sb->boot_falcon); /* clear halt interrupt */ nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10); sb->wpr_set = acr_r352_wpr_is_set(acr, sb); @@ -967,9 +966,10 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) nvkm_falcon_wr32(sb->boot_falcon, 0x10, 0xff); nvkm_mc_intr_mask(subdev->device, sb->boot_falcon->owner->index, true); - /* Start PMU */ + /* Start LS firmware on boot falcon */ nvkm_falcon_start(sb->boot_falcon); - nvkm_debug(subdev, "PMU started\n"); + nvkm_debug(subdev, "%s started\n", + nvkm_secboot_falcon_name[acr->base.boot_falcon]); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c index faf94a457670..5c11e8c50964 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c @@ -133,12 +133,13 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev) switch (sb->acr->boot_falcon) { case NVKM_SECBOOT_FALCON_PMU: - sb->boot_falcon = subdev->device->pmu->falcon; + sb->halt_falcon = sb->boot_falcon = subdev->device->pmu->falcon; break; case NVKM_SECBOOT_FALCON_SEC2: /* we must keep SEC2 alive forever since ACR will run on it */ nvkm_engine_ref(&subdev->device->sec2->engine); sb->boot_falcon = subdev->device->sec2->falcon; + sb->halt_falcon = subdev->device->pmu->falcon; break; default: nvkm_error(subdev, "Unmanaged boot falcon %s!\n", diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c index b21e91778f86..2c8dec9fda9a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c @@ -34,11 +34,11 @@ * */ int -gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob) +gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, + struct nvkm_falcon *falcon) { struct gm200_secboot *gsb = gm200_secboot(sb); struct nvkm_subdev *subdev = &gsb->base.subdev; - struct nvkm_falcon *falcon = gsb->base.boot_falcon; struct nvkm_vma vma; u32 start_address; int ret; @@ -61,7 +61,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob) nvkm_falcon_bind_context(falcon, gsb->inst); /* Load the HS bootloader into the falcon's IMEM/DMEM */ - ret = sb->acr->func->load(sb->acr, &gsb->base, blob, vma.offset); + ret = sb->acr->func->load(sb->acr, falcon, blob, vma.offset); if (ret < 0) goto end; @@ -83,7 +83,7 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob) /* If mailbox register contains an error code, then ACR has failed */ ret = nvkm_falcon_rd32(falcon, 0x040); if (ret) { - nvkm_error(subdev, "ACR boot failed, ret 0x%08x", ret); + nvkm_error(subdev, "HS blob failed, ret 0x%08x", ret); ret = -EINVAL; goto end; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h index 45adf1a3bc20..6dc9fc384f24 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h @@ -38,6 +38,7 @@ struct gm200_secboot { int gm200_secboot_oneinit(struct nvkm_secboot *); int gm200_secboot_fini(struct nvkm_secboot *, bool); void *gm200_secboot_dtor(struct nvkm_secboot *); -int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *); +int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *, + struct nvkm_falcon *); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h index 430b88ec74cf..885e919a8720 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h @@ -30,7 +30,8 @@ struct nvkm_secboot_func { int (*oneinit)(struct nvkm_secboot *); int (*fini)(struct nvkm_secboot *, bool suspend); void *(*dtor)(struct nvkm_secboot *); - int (*run_blob)(struct nvkm_secboot *, struct nvkm_gpuobj *); + int (*run_blob)(struct nvkm_secboot *, struct nvkm_gpuobj *, + struct nvkm_falcon *); }; int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_acr *, From c93cfe35c4da46502ffb0b8a9ded3a1f1a819b9d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 23 Feb 2017 13:05:27 +0900 Subject: [PATCH 0690/1299] drm/nouveau/secboot: let callers interpret return value of blobs Since the HS blobs are provided and signed by NVIDIA, we cannot expect always-consistent behavior. In this case, on GP10x the unload blob may return 0x1d even though things have run perfectly well. This behavior has been confirmed by NVIDIA. So let the callers of the run_blob() hook receive the blob return's value (a positive integer) and decide what it means. This allows us to workaround the 0x1d code instead of issuing an error. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 23 ++++++++++++++----- .../drm/nouveau/nvkm/subdev/secboot/gm200.c | 10 ++++---- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 613ed363a6bf..413275d0594b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -865,17 +865,26 @@ acr_r352_load(struct nvkm_acr *_acr, struct nvkm_falcon *falcon, static int acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb) { + struct nvkm_subdev *subdev = &sb->subdev; int i; /* Run the unload blob to unprotect the WPR region */ if (acr->unload_blob && sb->wpr_set) { int ret; - nvkm_debug(&sb->subdev, "running HS unload blob\n"); + nvkm_debug(subdev, "running HS unload blob\n"); ret = sb->func->run_blob(sb, acr->unload_blob, sb->halt_falcon); - if (ret) + if (ret < 0) return ret; - nvkm_debug(&sb->subdev, "HS unload blob completed\n"); + /* + * Unload blob will return this error code - it is not an error + * and the expected behavior on RM as well + */ + if (ret && ret != 0x1d) { + nvkm_error(subdev, "HS unload failed, ret 0x%08x", ret); + return -EINVAL; + } + nvkm_debug(subdev, "HS unload blob completed\n"); } for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++) @@ -938,11 +947,13 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) /* clear halt interrupt */ nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10); sb->wpr_set = acr_r352_wpr_is_set(acr, sb); - if (ret) + if (ret < 0) { return ret; + } else if (ret > 0) { + nvkm_error(subdev, "HS load failed, ret 0x%08x", ret); + return -EINVAL; + } nvkm_debug(subdev, "HS load blob completed\n"); - if (ret) - return ret; /* WPR must be set at this point */ if (!sb->wpr_set) { nvkm_error(subdev, "ACR blob completed but WPR not set!\n"); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c index 2c8dec9fda9a..73ca1203281d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c @@ -80,13 +80,11 @@ gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, if (ret) goto end; - /* If mailbox register contains an error code, then ACR has failed */ + /* + * The mailbox register contains the (positive) error code - return this + * to the caller + */ ret = nvkm_falcon_rd32(falcon, 0x040); - if (ret) { - nvkm_error(subdev, "HS blob failed, ret 0x%08x", ret); - ret = -EINVAL; - goto end; - } end: /* Reenable interrupts */ From b58b41716302ed95df16d8fcefc7edb1ce5cf794 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 17:23:11 +0900 Subject: [PATCH 0691/1299] drm/nouveau/secboot: support for unload blob bootloader If the load and unload falcons are different, then a different bootloader must also be used. Support this case. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 64 ++++++++++--------- .../nouveau/nvkm/subdev/secboot/acr_r352.h | 3 + 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 413275d0594b..77ed8f007776 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -737,27 +737,6 @@ acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb, return ret; } -static int -acr_r352_prepare_hsbl_blob(struct acr_r352 *acr) -{ - const struct nvkm_subdev *subdev = acr->base.subdev; - struct fw_bin_header *hdr; - struct fw_bl_desc *hsbl_desc; - - acr->hsbl_blob = nvkm_acr_load_firmware(subdev, "acr/bl", 0); - if (IS_ERR(acr->hsbl_blob)) { - int ret = PTR_ERR(acr->hsbl_blob); - - acr->hsbl_blob = NULL; - return ret; - } - - hdr = acr->hsbl_blob; - hsbl_desc = acr->hsbl_blob + hdr->header_offset; - - return 0; -} - /** * acr_r352_load_blobs - load blobs common to all ACR V1 versions. * @@ -768,6 +747,7 @@ acr_r352_prepare_hsbl_blob(struct acr_r352 *acr) int acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb) { + struct nvkm_subdev *subdev = &sb->subdev; int ret; /* Firmware already loaded? */ @@ -799,9 +779,24 @@ acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb) /* Load the HS firmware bootloader */ if (!acr->hsbl_blob) { - ret = acr_r352_prepare_hsbl_blob(acr); - if (ret) + acr->hsbl_blob = nvkm_acr_load_firmware(subdev, "acr/bl", 0); + if (IS_ERR(acr->hsbl_blob)) { + ret = PTR_ERR(acr->hsbl_blob); + acr->hsbl_blob = NULL; return ret; + } + + if (acr->base.boot_falcon != NVKM_SECBOOT_FALCON_PMU) { + acr->hsbl_unload_blob = nvkm_acr_load_firmware(subdev, + "acr/unload_bl", 0); + if (IS_ERR(acr->hsbl_unload_blob)) { + ret = PTR_ERR(acr->hsbl_unload_blob); + acr->hsbl_unload_blob = NULL; + return ret; + } + } else { + acr->hsbl_unload_blob = acr->hsbl_blob; + } } acr->firmware_ok = true; @@ -820,26 +815,33 @@ acr_r352_load(struct nvkm_acr *_acr, struct nvkm_falcon *falcon, struct nvkm_gpuobj *blob, u64 offset) { struct acr_r352 *acr = acr_r352(_acr); - struct fw_bin_header *hdr = acr->hsbl_blob; - struct fw_bl_desc *hsbl_desc = acr->hsbl_blob + hdr->header_offset; - void *blob_data = acr->hsbl_blob + hdr->data_offset; - void *hsbl_code = blob_data + hsbl_desc->code_off; - void *hsbl_data = blob_data + hsbl_desc->data_off; - u32 code_size = ALIGN(hsbl_desc->code_size, 256); - const struct hsf_load_header *load_hdr; const u32 bl_desc_size = acr->func->hs_bl_desc_size; + const struct hsf_load_header *load_hdr; + struct fw_bin_header *bl_hdr; + struct fw_bl_desc *hsbl_desc; + void *bl, *blob_data, *hsbl_code, *hsbl_data; + u32 code_size; u8 bl_desc[bl_desc_size]; /* Find the bootloader descriptor for our blob and copy it */ if (blob == acr->load_blob) { load_hdr = &acr->load_bl_header; + bl = acr->hsbl_blob; } else if (blob == acr->unload_blob) { load_hdr = &acr->unload_bl_header; + bl = acr->hsbl_unload_blob; } else { nvkm_error(_acr->subdev, "invalid secure boot blob!\n"); return -EINVAL; } + bl_hdr = bl; + hsbl_desc = bl + bl_hdr->header_offset; + blob_data = bl + bl_hdr->data_offset; + hsbl_code = blob_data + hsbl_desc->code_off; + hsbl_data = blob_data + hsbl_desc->data_off; + code_size = ALIGN(hsbl_desc->code_size, 256); + /* * Copy HS bootloader data */ @@ -1087,6 +1089,8 @@ acr_r352_dtor(struct nvkm_acr *_acr) nvkm_gpuobj_del(&acr->unload_blob); + if (_acr->boot_falcon != NVKM_SECBOOT_FALCON_PMU) + kfree(acr->hsbl_unload_blob); kfree(acr->hsbl_blob); nvkm_gpuobj_del(&acr->load_blob); nvkm_gpuobj_del(&acr->ls_blob); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index 1b9f5c825c8a..b610152ea610 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -142,6 +142,9 @@ struct acr_r352 { /* HS bootloader */ void *hsbl_blob; + /* HS bootloader for unload blob, if using a different falcon */ + void *hsbl_unload_blob; + /* LS FWs, to be loaded by the HS ACR */ struct nvkm_gpuobj *ls_blob; From c5e1fef4875b75df48a7e9828243062799e444dc Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 16:12:10 +0900 Subject: [PATCH 0692/1299] drm/nouveau/secboot: support standard NVIDIA HS binaries I had the brilliant idea to "improve" the binary format by removing a useless indirection in the HS binary files. In the end it just makes things more complicated than they ought to be as NVIDIA-provided files need to be adapted. Since the format used can be identified by the header, support both. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 60 ++++++++++++++++--- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 77ed8f007776..b4e0add13a33 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -599,19 +599,35 @@ acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size) /** - * acr_r352_hsf_patch_signature() - patch HS blob with correct signature + * acr_r352_hsf_patch_signature() - patch HS blob with correct signature for + * specified falcon. */ static void -acr_r352_hsf_patch_signature(struct nvkm_secboot *sb, void *acr_image) +acr_r352_hsf_patch_signature(const struct nvkm_falcon *falcon, void *acr_image, + bool new_format) { struct fw_bin_header *hsbin_hdr = acr_image; struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset; void *hs_data = acr_image + hsbin_hdr->data_offset; void *sig; u32 sig_size; + u32 patch_loc, patch_sig; + + /* + * I had the brilliant idea to "improve" the binary format by + * removing this useless indirection. However to make NVIDIA files + * directly compatible, let's support both format. + */ + if (new_format) { + patch_loc = fw_hdr->patch_loc; + patch_sig = fw_hdr->patch_sig; + } else { + patch_loc = *(u32 *)(acr_image + fw_hdr->patch_loc); + patch_sig = *(u32 *)(acr_image + fw_hdr->patch_sig); + } /* Falcon in debug or production mode? */ - if (sb->boot_falcon->debug) { + if (falcon->debug) { sig = acr_image + fw_hdr->sig_dbg_offset; sig_size = fw_hdr->sig_dbg_size; } else { @@ -620,7 +636,7 @@ acr_r352_hsf_patch_signature(struct nvkm_secboot *sb, void *acr_image) } /* Patch signature */ - memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size); + memcpy(hs_data + patch_loc, sig + patch_sig, sig_size); } void @@ -670,6 +686,37 @@ acr_r352_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc, bl_desc->data_size = hdr->data_size; } +void * +acr_r352_load_hs_blob(struct nvkm_secboot *sb, const struct nvkm_falcon *falcon, + const char *fw) +{ + struct nvkm_subdev *subdev = &sb->subdev; + void *acr_image; + bool new_format; + + acr_image = nvkm_acr_load_firmware(subdev, fw, 0); + if (IS_ERR(acr_image)) + return acr_image; + + /* detect the format to define how signature should be patched */ + switch (((u32 *)acr_image)[0]) { + case 0x3b1d14f0: + new_format = true; + break; + case 0x000010de: + new_format = false; + break; + default: + nvkm_error(subdev, "unknown header for HS blob %s\n", fw); + return ERR_PTR(-EINVAL); + } + + /* Patch signature */ + acr_r352_hsf_patch_signature(falcon, acr_image, new_format); + + return acr_image; +} + /** * acr_r352_prepare_hs_blob - load and prepare a HS blob and BL descriptor * @@ -692,7 +739,7 @@ acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb, void *acr_data; int ret; - acr_image = nvkm_acr_load_firmware(subdev, fw, 0); + acr_image = acr_r352_load_hs_blob(sb, sb->boot_falcon, fw); if (IS_ERR(acr_image)) return PTR_ERR(acr_image); @@ -701,9 +748,6 @@ acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb, load_hdr = acr_image + fw_hdr->hdr_offset; acr_data = acr_image + hsbin_hdr->data_offset; - /* Patch signature */ - acr_r352_hsf_patch_signature(sb, acr_image); - /* Patch descriptor with WPR information? */ if (patch) { struct hsflcn_acr_desc *desc; From ec91cb028511d68193c792f27a1bcd8d3e756280 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Feb 2017 15:13:49 +0900 Subject: [PATCH 0693/1299] drm/nouveau/secboot: workaround bug when starting SEC2 firmware For some unknown reason the LS SEC2 firmware needs to be started twice to operate. Detect and address that condition. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../nouveau/nvkm/subdev/secboot/acr_r352.c | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index b4e0add13a33..72ca537b0b59 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -977,6 +978,7 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) { const struct nvkm_subdev *subdev = &sb->subdev; unsigned long managed_falcons = acr->base.managed_falcons; + u32 reg; int falcon_id; int ret; @@ -1025,6 +1027,37 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb) /* Start LS firmware on boot falcon */ nvkm_falcon_start(sb->boot_falcon); + + /* + * There is a bug where the LS firmware sometimes require to be started + * twice (this happens only on SEC). Detect and workaround that + * condition. + * + * Once started, the falcon will end up in STOPPED condition (bit 5) + * if successful, or in HALT condition (bit 4) if not. + */ + nvkm_msec(subdev->device, 1, + if ((reg = nvkm_rd32(subdev->device, + sb->boot_falcon->addr + 0x100) + & 0x30) != 0) + break; + ); + if (reg & BIT(4)) { + nvkm_debug(subdev, "applying workaround for start bug..."); + nvkm_falcon_start(sb->boot_falcon); + nvkm_msec(subdev->device, 1, + if ((reg = nvkm_rd32(subdev->device, + sb->boot_falcon->addr + 0x100) + & 0x30) != 0) + break; + ); + if (reg & BIT(4)) { + nvkm_error(subdev, "%s failed to start\n", + nvkm_secboot_falcon_name[acr->base.boot_falcon]); + return -EINVAL; + } + } + nvkm_debug(subdev, "%s started\n", nvkm_secboot_falcon_name[acr->base.boot_falcon]); From 810997ff40783853491babc5d3d82b510704674f Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 15 Nov 2016 15:34:29 +0900 Subject: [PATCH 0694/1299] drm/nouveau/secboot: support for r364 ACR r364 is similar to r361, but uses a different hsflcn_desc structure to introduce the shadow region address (even though it is not yet used by this version). Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/nvkm/subdev/secboot/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/secboot/acr.h | 1 + .../nouveau/nvkm/subdev/secboot/acr_r364.c | 117 ++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r364.c diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild index 4e0c671e19dd..291700bfb07a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild @@ -4,5 +4,6 @@ nvkm-y += nvkm/subdev/secboot/ls_ucode_msgqueue.o nvkm-y += nvkm/subdev/secboot/acr.o nvkm-y += nvkm/subdev/secboot/acr_r352.o nvkm-y += nvkm/subdev/secboot/acr_r361.o +nvkm-y += nvkm/subdev/secboot/acr_r364.o nvkm-y += nvkm/subdev/secboot/gm200.o nvkm-y += nvkm/subdev/secboot/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h index 15aa443658ad..e6fed662781f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h @@ -63,5 +63,6 @@ void *nvkm_acr_load_firmware(const struct nvkm_subdev *, const char *, size_t); struct nvkm_acr *acr_r352_new(unsigned long); struct nvkm_acr *acr_r361_new(unsigned long); +struct nvkm_acr *acr_r364_new(unsigned long); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r364.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r364.c new file mode 100644 index 000000000000..30cf04109991 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r364.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "acr_r361.h" + +#include + +/* + * r364 ACR: hsflcn_desc structure has changed to introduce the shadow_mem + * parameter. + */ + +struct acr_r364_hsflcn_desc { + union { + u8 reserved_dmem[0x200]; + u32 signatures[4]; + } ucode_reserved_space; + u32 wpr_region_id; + u32 wpr_offset; + u32 mmu_memory_range; + struct { + u32 no_regions; + struct { + u32 start_addr; + u32 end_addr; + u32 region_id; + u32 read_mask; + u32 write_mask; + u32 client_mask; + u32 shadow_mem_start_addr; + } region_props[2]; + } regions; + u32 ucode_blob_size; + u64 ucode_blob_base __aligned(8); + struct { + u32 vpr_enabled; + u32 vpr_start; + u32 vpr_end; + u32 hdcp_policies; + } vpr_desc; +}; + +static void +acr_r364_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb, + void *_desc) +{ + struct acr_r364_hsflcn_desc *desc = _desc; + struct nvkm_gpuobj *ls_blob = acr->ls_blob; + + /* WPR region information if WPR is not fixed */ + if (sb->wpr_size == 0) { + u64 wpr_start = ls_blob->addr; + u64 wpr_end = ls_blob->addr + ls_blob->size; + + if (acr->func->shadow_blob) + wpr_start += ls_blob->size / 2; + + desc->wpr_region_id = 1; + desc->regions.no_regions = 2; + desc->regions.region_props[0].start_addr = wpr_start >> 8; + desc->regions.region_props[0].end_addr = wpr_end >> 8; + desc->regions.region_props[0].region_id = 1; + desc->regions.region_props[0].read_mask = 0xf; + desc->regions.region_props[0].write_mask = 0xc; + desc->regions.region_props[0].client_mask = 0x2; + if (acr->func->shadow_blob) + desc->regions.region_props[0].shadow_mem_start_addr = + ls_blob->addr >> 8; + else + desc->regions.region_props[0].shadow_mem_start_addr = 0; + } else { + desc->ucode_blob_base = ls_blob->addr; + desc->ucode_blob_size = ls_blob->size; + } +} + +const struct acr_r352_func +acr_r364_func = { + .fixup_hs_desc = acr_r364_fixup_hs_desc, + .generate_hs_bl_desc = acr_r361_generate_hs_bl_desc, + .hs_bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc), + .ls_ucode_img_load = acr_r352_ls_ucode_img_load, + .ls_fill_headers = acr_r352_ls_fill_headers, + .ls_write_wpr = acr_r352_ls_write_wpr, + .ls_func = { + [NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func, + [NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func, + [NVKM_SECBOOT_FALCON_PMU] = &acr_r361_ls_pmu_func, + }, +}; + + +struct nvkm_acr * +acr_r364_new(unsigned long managed_falcons) +{ + return acr_r352_new_(&acr_r364_func, NVKM_SECBOOT_FALCON_PMU, + managed_falcons); +} From 0f8fb2ab1e093c031ff4ce91570951e82fef6ca1 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 15 Nov 2016 16:30:26 +0900 Subject: [PATCH 0695/1299] drm/nouveau/secboot: support for r367 ACR r367 uses a different hsflcn_desc layout and LS firmware signature format, requiring a rewrite of some functions. It also makes use of the shadow region, and uses SEC as the boot falcon. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/nvkm/subdev/secboot/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/secboot/acr.h | 1 + .../nouveau/nvkm/subdev/secboot/acr_r367.c | 388 ++++++++++++++++++ .../nouveau/nvkm/subdev/secboot/acr_r367.h | 35 ++ 4 files changed, 425 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.h diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild index 291700bfb07a..8b2ab23fdfab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild @@ -5,5 +5,6 @@ nvkm-y += nvkm/subdev/secboot/acr.o nvkm-y += nvkm/subdev/secboot/acr_r352.o nvkm-y += nvkm/subdev/secboot/acr_r361.o nvkm-y += nvkm/subdev/secboot/acr_r364.o +nvkm-y += nvkm/subdev/secboot/acr_r367.o nvkm-y += nvkm/subdev/secboot/gm200.o nvkm-y += nvkm/subdev/secboot/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h index e6fed662781f..e4f1c45d3350 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h @@ -64,5 +64,6 @@ void *nvkm_acr_load_firmware(const struct nvkm_subdev *, const char *, size_t); struct nvkm_acr *acr_r352_new(unsigned long); struct nvkm_acr *acr_r361_new(unsigned long); struct nvkm_acr *acr_r364_new(unsigned long); +struct nvkm_acr *acr_r367_new(enum nvkm_secboot_falcon, unsigned long); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.c new file mode 100644 index 000000000000..f860713642f1 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.c @@ -0,0 +1,388 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "acr_r367.h" +#include "acr_r361.h" + +#include + +/* + * r367 ACR: new LS signature format requires a rewrite of LS firmware and + * blob creation functions. Also the hsflcn_desc layout has changed slightly. + */ + +#define LSF_LSB_DEPMAP_SIZE 11 + +/** + * struct acr_r367_lsf_lsb_header - LS firmware header + * + * See also struct acr_r352_lsf_lsb_header for documentation. + */ +struct acr_r367_lsf_lsb_header { + /** + * LS falcon signatures + * @prd_keys: signature to use in production mode + * @dgb_keys: signature to use in debug mode + * @b_prd_present: whether the production key is present + * @b_dgb_present: whether the debug key is present + * @falcon_id: ID of the falcon the ucode applies to + */ + struct { + u8 prd_keys[2][16]; + u8 dbg_keys[2][16]; + u32 b_prd_present; + u32 b_dbg_present; + u32 falcon_id; + u32 supports_versioning; + u32 version; + u32 depmap_count; + u8 depmap[LSF_LSB_DEPMAP_SIZE * 2 * 4]; + u8 kdf[16]; + } signature; + u32 ucode_off; + u32 ucode_size; + u32 data_size; + u32 bl_code_size; + u32 bl_imem_off; + u32 bl_data_off; + u32 bl_data_size; + u32 app_code_off; + u32 app_code_size; + u32 app_data_off; + u32 app_data_size; + u32 flags; +}; + +/** + * struct acr_r367_lsf_wpr_header - LS blob WPR Header + * + * See also struct acr_r352_lsf_wpr_header for documentation. + */ +struct acr_r367_lsf_wpr_header { + u32 falcon_id; + u32 lsb_offset; + u32 bootstrap_owner; + u32 lazy_bootstrap; + u32 bin_version; + u32 status; +#define LSF_IMAGE_STATUS_NONE 0 +#define LSF_IMAGE_STATUS_COPY 1 +#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED 2 +#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED 3 +#define LSF_IMAGE_STATUS_VALIDATION_DONE 4 +#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED 5 +#define LSF_IMAGE_STATUS_BOOTSTRAP_READY 6 +#define LSF_IMAGE_STATUS_REVOCATION_CHECK_FAILED 7 +}; + +/** + * struct ls_ucode_img_r367 - ucode image augmented with r367 headers + */ +struct ls_ucode_img_r367 { + struct ls_ucode_img base; + + struct acr_r367_lsf_wpr_header wpr_header; + struct acr_r367_lsf_lsb_header lsb_header; +}; +#define ls_ucode_img_r367(i) container_of(i, struct ls_ucode_img_r367, base) + +struct ls_ucode_img * +acr_r367_ls_ucode_img_load(const struct acr_r352 *acr, + enum nvkm_secboot_falcon falcon_id) +{ + const struct nvkm_subdev *subdev = acr->base.subdev; + struct ls_ucode_img_r367 *img; + int ret; + + img = kzalloc(sizeof(*img), GFP_KERNEL); + if (!img) + return ERR_PTR(-ENOMEM); + + img->base.falcon_id = falcon_id; + + ret = acr->func->ls_func[falcon_id]->load(subdev, &img->base); + if (ret) { + kfree(img->base.ucode_data); + kfree(img->base.sig); + kfree(img); + return ERR_PTR(ret); + } + + /* Check that the signature size matches our expectations... */ + if (img->base.sig_size != sizeof(img->lsb_header.signature)) { + nvkm_error(subdev, "invalid signature size for %s falcon!\n", + nvkm_secboot_falcon_name[falcon_id]); + return ERR_PTR(-EINVAL); + } + + /* Copy signature to the right place */ + memcpy(&img->lsb_header.signature, img->base.sig, img->base.sig_size); + + /* not needed? the signature should already have the right value */ + img->lsb_header.signature.falcon_id = falcon_id; + + return &img->base; +} + +#define LSF_LSB_HEADER_ALIGN 256 +#define LSF_BL_DATA_ALIGN 256 +#define LSF_BL_DATA_SIZE_ALIGN 256 +#define LSF_BL_CODE_SIZE_ALIGN 256 +#define LSF_UCODE_DATA_ALIGN 4096 + +static u32 +acr_r367_ls_img_fill_headers(struct acr_r352 *acr, + struct ls_ucode_img_r367 *img, u32 offset) +{ + struct ls_ucode_img *_img = &img->base; + struct acr_r367_lsf_wpr_header *whdr = &img->wpr_header; + struct acr_r367_lsf_lsb_header *lhdr = &img->lsb_header; + struct ls_ucode_img_desc *desc = &_img->ucode_desc; + const struct acr_r352_ls_func *func = + acr->func->ls_func[_img->falcon_id]; + + /* Fill WPR header */ + whdr->falcon_id = _img->falcon_id; + whdr->bootstrap_owner = acr->base.boot_falcon; + whdr->bin_version = lhdr->signature.version; + whdr->status = LSF_IMAGE_STATUS_COPY; + + /* Skip bootstrapping falcons started by someone else than ACR */ + if (acr->lazy_bootstrap & BIT(_img->falcon_id)) + whdr->lazy_bootstrap = 1; + + /* Align, save off, and include an LSB header size */ + offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN); + whdr->lsb_offset = offset; + offset += sizeof(*lhdr); + + /* + * Align, save off, and include the original (static) ucode + * image size + */ + offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN); + _img->ucode_off = lhdr->ucode_off = offset; + offset += _img->ucode_size; + + /* + * For falcons that use a boot loader (BL), we append a loader + * desc structure on the end of the ucode image and consider + * this the boot loader data. The host will then copy the loader + * desc args to this space within the WPR region (before locking + * down) and the HS bin will then copy them to DMEM 0 for the + * loader. + */ + lhdr->bl_code_size = ALIGN(desc->bootloader_size, + LSF_BL_CODE_SIZE_ALIGN); + lhdr->ucode_size = ALIGN(desc->app_resident_data_offset, + LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size; + lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) + + lhdr->bl_code_size - lhdr->ucode_size; + /* + * Though the BL is located at 0th offset of the image, the VA + * is different to make sure that it doesn't collide the actual + * OS VA range + */ + lhdr->bl_imem_off = desc->bootloader_imem_offset; + lhdr->app_code_off = desc->app_start_offset + + desc->app_resident_code_offset; + lhdr->app_code_size = desc->app_resident_code_size; + lhdr->app_data_off = desc->app_start_offset + + desc->app_resident_data_offset; + lhdr->app_data_size = desc->app_resident_data_size; + + lhdr->flags = func->lhdr_flags; + if (_img->falcon_id == acr->base.boot_falcon) + lhdr->flags |= LSF_FLAG_DMACTL_REQ_CTX; + + /* Align and save off BL descriptor size */ + lhdr->bl_data_size = ALIGN(func->bl_desc_size, LSF_BL_DATA_SIZE_ALIGN); + + /* + * Align, save off, and include the additional BL data + */ + offset = ALIGN(offset, LSF_BL_DATA_ALIGN); + lhdr->bl_data_off = offset; + offset += lhdr->bl_data_size; + + return offset; +} + +int +acr_r367_ls_fill_headers(struct acr_r352 *acr, struct list_head *imgs) +{ + struct ls_ucode_img_r367 *img; + struct list_head *l; + u32 count = 0; + u32 offset; + + /* Count the number of images to manage */ + list_for_each(l, imgs) + count++; + + /* + * Start with an array of WPR headers at the base of the WPR. + * The expectation here is that the secure falcon will do a single DMA + * read of this array and cache it internally so it's ok to pack these. + * Also, we add 1 to the falcon count to indicate the end of the array. + */ + offset = sizeof(img->wpr_header) * (count + 1); + + /* + * Walk the managed falcons, accounting for the LSB structs + * as well as the ucode images. + */ + list_for_each_entry(img, imgs, base.node) { + offset = acr_r367_ls_img_fill_headers(acr, img, offset); + } + + return offset; +} + +int +acr_r367_ls_write_wpr(struct acr_r352 *acr, struct list_head *imgs, + struct nvkm_gpuobj *wpr_blob, u64 wpr_addr) +{ + struct ls_ucode_img *_img; + u32 pos = 0; + + nvkm_kmap(wpr_blob); + + list_for_each_entry(_img, imgs, node) { + struct ls_ucode_img_r367 *img = ls_ucode_img_r367(_img); + const struct acr_r352_ls_func *ls_func = + acr->func->ls_func[_img->falcon_id]; + u8 gdesc[ls_func->bl_desc_size]; + + nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header, + sizeof(img->wpr_header)); + + nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset, + &img->lsb_header, sizeof(img->lsb_header)); + + /* Generate and write BL descriptor */ + memset(gdesc, 0, ls_func->bl_desc_size); + ls_func->generate_bl_desc(&acr->base, _img, wpr_addr, gdesc); + + nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.bl_data_off, + gdesc, ls_func->bl_desc_size); + + /* Copy ucode */ + nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off, + _img->ucode_data, _img->ucode_size); + + pos += sizeof(img->wpr_header); + } + + nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID); + + nvkm_done(wpr_blob); + + return 0; +} + +struct acr_r367_hsflcn_desc { + u8 reserved_dmem[0x200]; + u32 signatures[4]; + u32 wpr_region_id; + u32 wpr_offset; + u32 mmu_memory_range; +#define FLCN_ACR_MAX_REGIONS 2 + struct { + u32 no_regions; + struct { + u32 start_addr; + u32 end_addr; + u32 region_id; + u32 read_mask; + u32 write_mask; + u32 client_mask; + u32 shadow_mem_start_addr; + } region_props[FLCN_ACR_MAX_REGIONS]; + } regions; + u32 ucode_blob_size; + u64 ucode_blob_base __aligned(8); + struct { + u32 vpr_enabled; + u32 vpr_start; + u32 vpr_end; + u32 hdcp_policies; + } vpr_desc; +}; + +void +acr_r367_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb, + void *_desc) +{ + struct acr_r367_hsflcn_desc *desc = _desc; + struct nvkm_gpuobj *ls_blob = acr->ls_blob; + + /* WPR region information if WPR is not fixed */ + if (sb->wpr_size == 0) { + u64 wpr_start = ls_blob->addr; + u64 wpr_end = ls_blob->addr + ls_blob->size; + + if (acr->func->shadow_blob) + wpr_start += ls_blob->size / 2; + + desc->wpr_region_id = 1; + desc->regions.no_regions = 2; + desc->regions.region_props[0].start_addr = wpr_start >> 8; + desc->regions.region_props[0].end_addr = wpr_end >> 8; + desc->regions.region_props[0].region_id = 1; + desc->regions.region_props[0].read_mask = 0xf; + desc->regions.region_props[0].write_mask = 0xc; + desc->regions.region_props[0].client_mask = 0x2; + if (acr->func->shadow_blob) + desc->regions.region_props[0].shadow_mem_start_addr = + ls_blob->addr >> 8; + else + desc->regions.region_props[0].shadow_mem_start_addr = 0; + } else { + desc->ucode_blob_base = ls_blob->addr; + desc->ucode_blob_size = ls_blob->size; + } +} + +const struct acr_r352_func +acr_r367_func = { + .fixup_hs_desc = acr_r367_fixup_hs_desc, + .generate_hs_bl_desc = acr_r361_generate_hs_bl_desc, + .hs_bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc), + .shadow_blob = true, + .ls_ucode_img_load = acr_r367_ls_ucode_img_load, + .ls_fill_headers = acr_r367_ls_fill_headers, + .ls_write_wpr = acr_r367_ls_write_wpr, + .ls_func = { + [NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func, + [NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func, + [NVKM_SECBOOT_FALCON_PMU] = &acr_r361_ls_pmu_func, + [NVKM_SECBOOT_FALCON_SEC2] = &acr_r361_ls_sec2_func, + }, +}; + +struct nvkm_acr * +acr_r367_new(enum nvkm_secboot_falcon boot_falcon, + unsigned long managed_falcons) +{ + return acr_r352_new_(&acr_r367_func, boot_falcon, managed_falcons); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.h new file mode 100644 index 000000000000..ec6a71ca36be --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r367.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVKM_SECBOOT_ACR_R367_H__ +#define __NVKM_SECBOOT_ACR_R367_H__ + +#include "acr_r352.h" + +void acr_r367_fixup_hs_desc(struct acr_r352 *, struct nvkm_secboot *, void *); + +struct ls_ucode_img *acr_r367_ls_ucode_img_load(const struct acr_r352 *, + enum nvkm_secboot_falcon); +int acr_r367_ls_fill_headers(struct acr_r352 *, struct list_head *); +int acr_r367_ls_write_wpr(struct acr_r352 *, struct list_head *, + struct nvkm_gpuobj *, u64); +#endif From 717bad8273530ca94ae3ff27ff80c90b3721d163 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Tue, 15 Nov 2016 16:30:52 +0900 Subject: [PATCH 0696/1299] drm/nouveau/secboot: support for r375 ACR r375 ACR uses a unified bootloader descriptor for the GR and PMU firmwares. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/nvkm/subdev/secboot/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/secboot/acr.h | 1 + .../nouveau/nvkm/subdev/secboot/acr_r375.c | 165 ++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r375.c diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild index 8b2ab23fdfab..62ee784a48f8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild @@ -6,5 +6,6 @@ nvkm-y += nvkm/subdev/secboot/acr_r352.o nvkm-y += nvkm/subdev/secboot/acr_r361.o nvkm-y += nvkm/subdev/secboot/acr_r364.o nvkm-y += nvkm/subdev/secboot/acr_r367.o +nvkm-y += nvkm/subdev/secboot/acr_r375.o nvkm-y += nvkm/subdev/secboot/gm200.o nvkm-y += nvkm/subdev/secboot/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h index e4f1c45d3350..93d804652d44 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h @@ -65,5 +65,6 @@ struct nvkm_acr *acr_r352_new(unsigned long); struct nvkm_acr *acr_r361_new(unsigned long); struct nvkm_acr *acr_r364_new(unsigned long); struct nvkm_acr *acr_r367_new(enum nvkm_secboot_falcon, unsigned long); +struct nvkm_acr *acr_r375_new(enum nvkm_secboot_falcon, unsigned long); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r375.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r375.c new file mode 100644 index 000000000000..ddb795bb007b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r375.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "acr_r367.h" + +#include +#include +#include + +/* + * r375 ACR: similar to r367, but with a unified bootloader descriptor + * structure for GR and PMU falcons. + */ + +/* Same as acr_r361_flcn_bl_desc, plus argc/argv */ +struct acr_r375_flcn_bl_desc { + u32 reserved[4]; + u32 signature[4]; + u32 ctx_dma; + struct flcn_u64 code_dma_base; + u32 non_sec_code_off; + u32 non_sec_code_size; + u32 sec_code_off; + u32 sec_code_size; + u32 code_entry_point; + struct flcn_u64 data_dma_base; + u32 data_size; + u32 argc; + u32 argv; +}; + +static void +acr_r375_generate_flcn_bl_desc(const struct nvkm_acr *acr, + const struct ls_ucode_img *img, u64 wpr_addr, + void *_desc) +{ + struct acr_r375_flcn_bl_desc *desc = _desc; + const struct ls_ucode_img_desc *pdesc = &img->ucode_desc; + u64 base, addr_code, addr_data; + + base = wpr_addr + img->ucode_off + pdesc->app_start_offset; + addr_code = base + pdesc->app_resident_code_offset; + addr_data = base + pdesc->app_resident_data_offset; + + desc->ctx_dma = FALCON_DMAIDX_UCODE; + desc->code_dma_base = u64_to_flcn64(addr_code); + desc->non_sec_code_off = pdesc->app_resident_code_offset; + desc->non_sec_code_size = pdesc->app_resident_code_size; + desc->code_entry_point = pdesc->app_imem_entry; + desc->data_dma_base = u64_to_flcn64(addr_data); + desc->data_size = pdesc->app_resident_data_size; +} + +static void +acr_r375_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc, + u64 offset) +{ + struct acr_r375_flcn_bl_desc *bl_desc = _bl_desc; + + bl_desc->ctx_dma = FALCON_DMAIDX_VIRT; + bl_desc->non_sec_code_off = hdr->non_sec_code_off; + bl_desc->non_sec_code_size = hdr->non_sec_code_size; + bl_desc->sec_code_off = hsf_load_header_app_off(hdr, 0); + bl_desc->sec_code_size = hsf_load_header_app_size(hdr, 0); + bl_desc->code_entry_point = 0; + bl_desc->code_dma_base = u64_to_flcn64(offset); + bl_desc->data_dma_base = u64_to_flcn64(offset + hdr->data_dma_base); + bl_desc->data_size = hdr->data_size; +} + +const struct acr_r352_ls_func +acr_r375_ls_fecs_func = { + .load = acr_ls_ucode_load_fecs, + .generate_bl_desc = acr_r375_generate_flcn_bl_desc, + .bl_desc_size = sizeof(struct acr_r375_flcn_bl_desc), +}; + +const struct acr_r352_ls_func +acr_r375_ls_gpccs_func = { + .load = acr_ls_ucode_load_gpccs, + .generate_bl_desc = acr_r375_generate_flcn_bl_desc, + .bl_desc_size = sizeof(struct acr_r375_flcn_bl_desc), + /* GPCCS will be loaded using PRI */ + .lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD, +}; + + +static void +acr_r375_generate_pmu_bl_desc(const struct nvkm_acr *acr, + const struct ls_ucode_img *img, u64 wpr_addr, + void *_desc) +{ + const struct ls_ucode_img_desc *pdesc = &img->ucode_desc; + const struct nvkm_pmu *pmu = acr->subdev->device->pmu; + struct acr_r375_flcn_bl_desc *desc = _desc; + u64 base, addr_code, addr_data; + u32 addr_args; + + base = wpr_addr + img->ucode_off + pdesc->app_start_offset; + addr_code = base + pdesc->app_resident_code_offset; + addr_data = base + pdesc->app_resident_data_offset; + addr_args = pmu->falcon->data.limit; + addr_args -= NVKM_MSGQUEUE_CMDLINE_SIZE; + + desc->ctx_dma = FALCON_DMAIDX_UCODE; + desc->code_dma_base = u64_to_flcn64(addr_code); + desc->non_sec_code_off = pdesc->app_resident_code_offset; + desc->non_sec_code_size = pdesc->app_resident_code_size; + desc->code_entry_point = pdesc->app_imem_entry; + desc->data_dma_base = u64_to_flcn64(addr_data); + desc->data_size = pdesc->app_resident_data_size; + desc->argc = 1; + desc->argv = addr_args; +} + +const struct acr_r352_ls_func +acr_r375_ls_pmu_func = { + .load = acr_ls_ucode_load_pmu, + .generate_bl_desc = acr_r375_generate_pmu_bl_desc, + .bl_desc_size = sizeof(struct acr_r375_flcn_bl_desc), + .post_run = acr_ls_pmu_post_run, +}; + + +const struct acr_r352_func +acr_r375_func = { + .fixup_hs_desc = acr_r367_fixup_hs_desc, + .generate_hs_bl_desc = acr_r375_generate_hs_bl_desc, + .hs_bl_desc_size = sizeof(struct acr_r375_flcn_bl_desc), + .shadow_blob = true, + .ls_ucode_img_load = acr_r367_ls_ucode_img_load, + .ls_fill_headers = acr_r367_ls_fill_headers, + .ls_write_wpr = acr_r367_ls_write_wpr, + .ls_func = { + [NVKM_SECBOOT_FALCON_FECS] = &acr_r375_ls_fecs_func, + [NVKM_SECBOOT_FALCON_GPCCS] = &acr_r375_ls_gpccs_func, + [NVKM_SECBOOT_FALCON_PMU] = &acr_r375_ls_pmu_func, + }, +}; + +struct nvkm_acr * +acr_r375_new(enum nvkm_secboot_falcon boot_falcon, + unsigned long managed_falcons) +{ + return acr_r352_new_(&acr_r375_func, boot_falcon, managed_falcons); +} From 84074e5b10acc7154d5aff8630f9379870e71be6 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Feb 2017 18:57:03 +0900 Subject: [PATCH 0697/1299] drm/nouveau/secboot: put HS code loading code into own file We will also need to load HS blobs outside of acr_r352 (for instance, to run the NVDEC VPR scrubber), so make this code reusable. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/nvkm/subdev/secboot/Kbuild | 1 + .../nouveau/nvkm/subdev/secboot/acr_r352.c | 100 +----------------- .../nouveau/nvkm/subdev/secboot/acr_r352.h | 27 +---- .../nouveau/nvkm/subdev/secboot/hs_ucode.c | 97 +++++++++++++++++ .../nouveau/nvkm/subdev/secboot/hs_ucode.h | 81 ++++++++++++++ 5 files changed, 182 insertions(+), 124 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.h diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild index 62ee784a48f8..566c8593a099 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild @@ -1,4 +1,5 @@ nvkm-y += nvkm/subdev/secboot/base.o +nvkm-y += nvkm/subdev/secboot/hs_ucode.o nvkm-y += nvkm/subdev/secboot/ls_ucode_gr.o nvkm-y += nvkm/subdev/secboot/ls_ucode_msgqueue.o nvkm-y += nvkm/subdev/secboot/acr.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c index 72ca537b0b59..993a38eb3ed5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c @@ -21,6 +21,7 @@ */ #include "acr_r352.h" +#include "hs_ucode.h" #include #include @@ -31,31 +32,6 @@ #include #include -/** - * struct hsf_fw_header - HS firmware descriptor - * @sig_dbg_offset: offset of the debug signature - * @sig_dbg_size: size of the debug signature - * @sig_prod_offset: offset of the production signature - * @sig_prod_size: size of the production signature - * @patch_loc: offset of the offset (sic) of where the signature is - * @patch_sig: offset of the offset (sic) to add to sig_*_offset - * @hdr_offset: offset of the load header (see struct hs_load_header) - * @hdr_size: size of above header - * - * This structure is embedded in the HS firmware image at - * hs_bin_hdr.header_offset. - */ -struct hsf_fw_header { - u32 sig_dbg_offset; - u32 sig_dbg_size; - u32 sig_prod_offset; - u32 sig_prod_size; - u32 patch_loc; - u32 patch_sig; - u32 hdr_offset; - u32 hdr_size; -}; - /** * struct acr_r352_flcn_bl_desc - DMEM bootloader descriptor * @signature: 16B signature for secure code. 0s if no secure code @@ -599,47 +575,6 @@ acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size) -/** - * acr_r352_hsf_patch_signature() - patch HS blob with correct signature for - * specified falcon. - */ -static void -acr_r352_hsf_patch_signature(const struct nvkm_falcon *falcon, void *acr_image, - bool new_format) -{ - struct fw_bin_header *hsbin_hdr = acr_image; - struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset; - void *hs_data = acr_image + hsbin_hdr->data_offset; - void *sig; - u32 sig_size; - u32 patch_loc, patch_sig; - - /* - * I had the brilliant idea to "improve" the binary format by - * removing this useless indirection. However to make NVIDIA files - * directly compatible, let's support both format. - */ - if (new_format) { - patch_loc = fw_hdr->patch_loc; - patch_sig = fw_hdr->patch_sig; - } else { - patch_loc = *(u32 *)(acr_image + fw_hdr->patch_loc); - patch_sig = *(u32 *)(acr_image + fw_hdr->patch_sig); - } - - /* Falcon in debug or production mode? */ - if (falcon->debug) { - sig = acr_image + fw_hdr->sig_dbg_offset; - sig_size = fw_hdr->sig_dbg_size; - } else { - sig = acr_image + fw_hdr->sig_prod_offset; - sig_size = fw_hdr->sig_prod_size; - } - - /* Patch signature */ - memcpy(hs_data + patch_loc, sig + patch_sig, sig_size); -} - void acr_r352_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb, void *_desc) @@ -687,37 +622,6 @@ acr_r352_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc, bl_desc->data_size = hdr->data_size; } -void * -acr_r352_load_hs_blob(struct nvkm_secboot *sb, const struct nvkm_falcon *falcon, - const char *fw) -{ - struct nvkm_subdev *subdev = &sb->subdev; - void *acr_image; - bool new_format; - - acr_image = nvkm_acr_load_firmware(subdev, fw, 0); - if (IS_ERR(acr_image)) - return acr_image; - - /* detect the format to define how signature should be patched */ - switch (((u32 *)acr_image)[0]) { - case 0x3b1d14f0: - new_format = true; - break; - case 0x000010de: - new_format = false; - break; - default: - nvkm_error(subdev, "unknown header for HS blob %s\n", fw); - return ERR_PTR(-EINVAL); - } - - /* Patch signature */ - acr_r352_hsf_patch_signature(falcon, acr_image, new_format); - - return acr_image; -} - /** * acr_r352_prepare_hs_blob - load and prepare a HS blob and BL descriptor * @@ -740,7 +644,7 @@ acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb, void *acr_data; int ret; - acr_image = acr_r352_load_hs_blob(sb, sb->boot_falcon, fw); + acr_image = hs_ucode_load_blob(subdev, sb->boot_falcon, fw); if (IS_ERR(acr_image)) return PTR_ERR(acr_image); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h index b610152ea610..6e88520566c9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h @@ -24,6 +24,7 @@ #include "acr.h" #include "ls_ucode.h" +#include "hs_ucode.h" struct ls_ucode_img; @@ -33,32 +34,6 @@ struct ls_ucode_img; #define LSF_FLAG_DMACTL_REQ_CTX 4 #define LSF_FLAG_FORCE_PRIV_LOAD 8 -/* - * HS blob structures - */ - -/** - * struct hsf_load_header - HS firmware load header - */ -struct hsf_load_header { - u32 non_sec_code_off; - u32 non_sec_code_size; - u32 data_dma_base; - u32 data_size; - u32 num_apps; - /* - * Organized as follows: - * - app0_code_off - * - app1_code_off - * - ... - * - appn_code_off - * - app0_code_size - * - app1_code_size - * - ... - */ - u32 apps[0]; -}; - static inline u32 hsf_load_header_app_off(const struct hsf_load_header *hdr, u32 app) { diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.c new file mode 100644 index 000000000000..6b33182ddc2f --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "hs_ucode.h" +#include "ls_ucode.h" +#include "acr.h" + +#include + +/** + * hs_ucode_patch_signature() - patch HS blob with correct signature for + * specified falcon. + */ +static void +hs_ucode_patch_signature(const struct nvkm_falcon *falcon, void *acr_image, + bool new_format) +{ + struct fw_bin_header *hsbin_hdr = acr_image; + struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset; + void *hs_data = acr_image + hsbin_hdr->data_offset; + void *sig; + u32 sig_size; + u32 patch_loc, patch_sig; + + /* + * I had the brilliant idea to "improve" the binary format by + * removing this useless indirection. However to make NVIDIA files + * directly compatible, let's support both format. + */ + if (new_format) { + patch_loc = fw_hdr->patch_loc; + patch_sig = fw_hdr->patch_sig; + } else { + patch_loc = *(u32 *)(acr_image + fw_hdr->patch_loc); + patch_sig = *(u32 *)(acr_image + fw_hdr->patch_sig); + } + + /* Falcon in debug or production mode? */ + if (falcon->debug) { + sig = acr_image + fw_hdr->sig_dbg_offset; + sig_size = fw_hdr->sig_dbg_size; + } else { + sig = acr_image + fw_hdr->sig_prod_offset; + sig_size = fw_hdr->sig_prod_size; + } + + /* Patch signature */ + memcpy(hs_data + patch_loc, sig + patch_sig, sig_size); +} + +void * +hs_ucode_load_blob(struct nvkm_subdev *subdev, const struct nvkm_falcon *falcon, + const char *fw) +{ + void *acr_image; + bool new_format; + + acr_image = nvkm_acr_load_firmware(subdev, fw, 0); + if (IS_ERR(acr_image)) + return acr_image; + + /* detect the format to define how signature should be patched */ + switch (((u32 *)acr_image)[0]) { + case 0x3b1d14f0: + new_format = true; + break; + case 0x000010de: + new_format = false; + break; + default: + nvkm_error(subdev, "unknown header for HS blob %s\n", fw); + return ERR_PTR(-EINVAL); + } + + hs_ucode_patch_signature(falcon, acr_image, new_format); + + return acr_image; +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.h new file mode 100644 index 000000000000..d8cfc6f7752a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/hs_ucode.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NVKM_SECBOOT_HS_UCODE_H__ +#define __NVKM_SECBOOT_HS_UCODE_H__ + +#include +#include + +struct nvkm_falcon; + +/** + * struct hsf_fw_header - HS firmware descriptor + * @sig_dbg_offset: offset of the debug signature + * @sig_dbg_size: size of the debug signature + * @sig_prod_offset: offset of the production signature + * @sig_prod_size: size of the production signature + * @patch_loc: offset of the offset (sic) of where the signature is + * @patch_sig: offset of the offset (sic) to add to sig_*_offset + * @hdr_offset: offset of the load header (see struct hs_load_header) + * @hdr_size: size of above header + * + * This structure is embedded in the HS firmware image at + * hs_bin_hdr.header_offset. + */ +struct hsf_fw_header { + u32 sig_dbg_offset; + u32 sig_dbg_size; + u32 sig_prod_offset; + u32 sig_prod_size; + u32 patch_loc; + u32 patch_sig; + u32 hdr_offset; + u32 hdr_size; +}; + +/** + * struct hsf_load_header - HS firmware load header + */ +struct hsf_load_header { + u32 non_sec_code_off; + u32 non_sec_code_size; + u32 data_dma_base; + u32 data_size; + u32 num_apps; + /* + * Organized as follows: + * - app0_code_off + * - app1_code_off + * - ... + * - appn_code_off + * - app0_code_size + * - app1_code_size + * - ... + */ + u32 apps[0]; +}; + +void *hs_ucode_load_blob(struct nvkm_subdev *, const struct nvkm_falcon *, + const char *); + +#endif From 5429f82f341524deb9f66193892a69dea2f862a3 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 26 Jan 2017 15:18:25 +0900 Subject: [PATCH 0698/1299] drm/nouveau/secboot: add gp102/gp104/gp106/gp107 support These gp10x chips are supporting using (roughly) the same firmware. Compared to previous secure chips, ACR runs on SEC2 and so does the low-secure msgqueue. ACR for these chips is based on r367. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- .../drm/nouveau/include/nvkm/subdev/secboot.h | 1 + .../drm/nouveau/nvkm/subdev/secboot/Kbuild | 1 + .../drm/nouveau/nvkm/subdev/secboot/gp102.c | 251 ++++++++++++++++++ 3 files changed, 253 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h index 03da24e2c229..d6a4bdb6573b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h @@ -59,5 +59,6 @@ int nvkm_secboot_reset(struct nvkm_secboot *, enum nvkm_secboot_falcon); int gm200_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **); int gm20b_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **); +int gp102_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild index 566c8593a099..ac7f50ae53c6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild @@ -10,3 +10,4 @@ nvkm-y += nvkm/subdev/secboot/acr_r367.o nvkm-y += nvkm/subdev/secboot/acr_r375.o nvkm-y += nvkm/subdev/secboot/gm200.o nvkm-y += nvkm/subdev/secboot/gm20b.o +nvkm-y += nvkm/subdev/secboot/gp102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c new file mode 100644 index 000000000000..8570c84c8a29 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "acr.h" +#include "gm200.h" + +#include "ls_ucode.h" +#include "hs_ucode.h" +#include +#include +#include +#include + +static bool +gp102_secboot_scrub_required(struct nvkm_secboot *sb) +{ + struct nvkm_subdev *subdev = &sb->subdev; + struct nvkm_device *device = subdev->device; + u32 reg; + + nvkm_wr32(device, 0x100cd0, 0x2); + reg = nvkm_rd32(device, 0x100cd0); + + return (reg & BIT(4)); +} + +static int +gp102_run_secure_scrub(struct nvkm_secboot *sb) +{ + struct nvkm_subdev *subdev = &sb->subdev; + struct nvkm_device *device = subdev->device; + struct nvkm_engine *engine; + struct nvkm_falcon *falcon; + void *scrub_image; + struct fw_bin_header *hsbin_hdr; + struct hsf_fw_header *fw_hdr; + struct hsf_load_header *lhdr; + void *scrub_data; + int ret; + + nvkm_debug(subdev, "running VPR scrubber binary on NVDEC...\n"); + + if (!(engine = nvkm_engine_ref(&device->nvdec->engine))) + return PTR_ERR(engine); + falcon = device->nvdec->falcon; + + nvkm_falcon_get(falcon, &sb->subdev); + + scrub_image = hs_ucode_load_blob(subdev, falcon, "nvdec/scrubber"); + if (IS_ERR(scrub_image)) + return PTR_ERR(scrub_image); + + nvkm_falcon_reset(falcon); + nvkm_falcon_bind_context(falcon, NULL); + + hsbin_hdr = scrub_image; + fw_hdr = scrub_image + hsbin_hdr->header_offset; + lhdr = scrub_image + fw_hdr->hdr_offset; + scrub_data = scrub_image + hsbin_hdr->data_offset; + + nvkm_falcon_load_imem(falcon, scrub_data, lhdr->non_sec_code_off, + lhdr->non_sec_code_size, + lhdr->non_sec_code_off >> 8, 0, false); + nvkm_falcon_load_imem(falcon, scrub_data + lhdr->apps[0], + ALIGN(lhdr->apps[0], 0x100), + lhdr->apps[1], + lhdr->apps[0] >> 8, 0, true); + nvkm_falcon_load_dmem(falcon, scrub_data + lhdr->data_dma_base, 0, + lhdr->data_size, 0); + + kfree(scrub_image); + + nvkm_falcon_set_start_addr(falcon, 0x0); + nvkm_falcon_start(falcon); + + ret = nvkm_falcon_wait_for_halt(falcon, 500); + if (ret < 0) { + nvkm_error(subdev, "failed to run VPR scrubber binary!\n"); + ret = -ETIMEDOUT; + goto end; + } + + /* put nvdec in clean state - without reset it will remain in HS mode */ + nvkm_falcon_reset(falcon); + + if (gp102_secboot_scrub_required(sb)) { + nvkm_error(subdev, "VPR scrubber binary failed!\n"); + ret = -EINVAL; + goto end; + } + + nvkm_debug(subdev, "VPR scrub successfully completed\n"); + +end: + nvkm_falcon_put(falcon, &sb->subdev); + nvkm_engine_unref(&engine); + return ret; +} + +static int +gp102_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob, + struct nvkm_falcon *falcon) +{ + int ret; + + /* make sure the VPR region is unlocked */ + if (gp102_secboot_scrub_required(sb)) { + ret = gp102_run_secure_scrub(sb); + if (ret) + return ret; + } + + return gm200_secboot_run_blob(sb, blob, falcon); +} + +static const struct nvkm_secboot_func +gp102_secboot = { + .dtor = gm200_secboot_dtor, + .oneinit = gm200_secboot_oneinit, + .fini = gm200_secboot_fini, + .run_blob = gp102_secboot_run_blob, +}; + +int +gp102_secboot_new(struct nvkm_device *device, int index, + struct nvkm_secboot **psb) +{ + int ret; + struct gm200_secboot *gsb; + struct nvkm_acr *acr; + + acr = acr_r367_new(NVKM_SECBOOT_FALCON_SEC2, + BIT(NVKM_SECBOOT_FALCON_FECS) | + BIT(NVKM_SECBOOT_FALCON_GPCCS) | + BIT(NVKM_SECBOOT_FALCON_SEC2)); + if (IS_ERR(acr)) + return PTR_ERR(acr); + + gsb = kzalloc(sizeof(*gsb), GFP_KERNEL); + if (!gsb) { + psb = NULL; + return -ENOMEM; + } + *psb = &gsb->base; + + ret = nvkm_secboot_ctor(&gp102_secboot, acr, device, index, &gsb->base); + if (ret) + return ret; + + return 0; +} + +MODULE_FIRMWARE("nvidia/gp102/acr/bl.bin"); +MODULE_FIRMWARE("nvidia/gp102/acr/unload_bl.bin"); +MODULE_FIRMWARE("nvidia/gp102/acr/ucode_load.bin"); +MODULE_FIRMWARE("nvidia/gp102/acr/ucode_unload.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/fecs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/fecs_data.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/gpccs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/gpccs_data.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/sw_ctx.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/sw_nonctx.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/sw_bundle_init.bin"); +MODULE_FIRMWARE("nvidia/gp102/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/gp102/nvdec/scrubber.bin"); +MODULE_FIRMWARE("nvidia/gp102/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/gp102/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/gp102/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gp104/acr/bl.bin"); +MODULE_FIRMWARE("nvidia/gp104/acr/unload_bl.bin"); +MODULE_FIRMWARE("nvidia/gp104/acr/ucode_load.bin"); +MODULE_FIRMWARE("nvidia/gp104/acr/ucode_unload.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/fecs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/fecs_data.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/gpccs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/gpccs_data.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/sw_ctx.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/sw_nonctx.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/sw_bundle_init.bin"); +MODULE_FIRMWARE("nvidia/gp104/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/gp104/nvdec/scrubber.bin"); +MODULE_FIRMWARE("nvidia/gp104/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/gp104/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/gp104/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gp106/acr/bl.bin"); +MODULE_FIRMWARE("nvidia/gp106/acr/unload_bl.bin"); +MODULE_FIRMWARE("nvidia/gp106/acr/ucode_load.bin"); +MODULE_FIRMWARE("nvidia/gp106/acr/ucode_unload.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/fecs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/fecs_data.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/gpccs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/gpccs_data.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/sw_ctx.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/sw_nonctx.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/sw_bundle_init.bin"); +MODULE_FIRMWARE("nvidia/gp106/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/gp106/nvdec/scrubber.bin"); +MODULE_FIRMWARE("nvidia/gp106/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/gp106/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/gp106/sec2/sig.bin"); +MODULE_FIRMWARE("nvidia/gp107/acr/bl.bin"); +MODULE_FIRMWARE("nvidia/gp107/acr/unload_bl.bin"); +MODULE_FIRMWARE("nvidia/gp107/acr/ucode_load.bin"); +MODULE_FIRMWARE("nvidia/gp107/acr/ucode_unload.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/fecs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/fecs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/fecs_data.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/fecs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/gpccs_bl.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/gpccs_inst.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/gpccs_data.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/gpccs_sig.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/sw_ctx.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/sw_nonctx.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/sw_bundle_init.bin"); +MODULE_FIRMWARE("nvidia/gp107/gr/sw_method_init.bin"); +MODULE_FIRMWARE("nvidia/gp107/nvdec/scrubber.bin"); +MODULE_FIRMWARE("nvidia/gp107/sec2/desc.bin"); +MODULE_FIRMWARE("nvidia/gp107/sec2/image.bin"); +MODULE_FIRMWARE("nvidia/gp107/sec2/sig.bin"); From 4eb3390e345e4dc5b17673f64660098eaa21522d Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Feb 2017 15:46:25 +0900 Subject: [PATCH 0699/1299] drm/nouveau/falcon: support for gp10x msgqueue Add support for the msgqueue firmware used to process SEC2 commands for gp10x chips. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/falcon/msgqueue.c | 3 + .../gpu/drm/nouveau/nvkm/falcon/msgqueue.h | 1 + .../nouveau/nvkm/falcon/msgqueue_0148cdec.c | 263 ++++++++++++++++++ 4 files changed, 268 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0148cdec.c diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild index a2e9ae3db6dd..2aa040ba39e5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild @@ -2,3 +2,4 @@ nvkm-y += nvkm/falcon/base.o nvkm-y += nvkm/falcon/v1.o nvkm-y += nvkm/falcon/msgqueue.o nvkm-y += nvkm/falcon/msgqueue_0137c63d.o +nvkm-y += nvkm/falcon/msgqueue_0148cdec.o diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c index 59ed1c45bd65..a063fb823117 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c @@ -481,6 +481,9 @@ nvkm_msgqueue_new(u32 version, struct nvkm_falcon *falcon, struct nvkm_msgqueue case 0x0137c63d: ret = msgqueue_0137c63d_new(falcon, queue); break; + case 0x0148cdec: + ret = msgqueue_0148cdec_new(falcon, queue); + break; default: nvkm_error(subdev, "unhandled firmware version 0x%08x\n", version); diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h index 2f537e0ec9b4..f37afe963d3e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.h @@ -202,5 +202,6 @@ void nvkm_msgqueue_process_msgs(struct nvkm_msgqueue *, struct nvkm_msgqueue_queue *); int msgqueue_0137c63d_new(struct nvkm_falcon *, struct nvkm_msgqueue **); +int msgqueue_0148cdec_new(struct nvkm_falcon *, struct nvkm_msgqueue **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0148cdec.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0148cdec.c new file mode 100644 index 000000000000..ed5d0da4f4e9 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue_0148cdec.c @@ -0,0 +1,263 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "msgqueue.h" +#include +#include + +/* + * This firmware runs on the SEC falcon. It only has one command and one + * message queue, and uses a different command line and init message. + */ + +enum { + MSGQUEUE_0148CDEC_COMMAND_QUEUE = 0, + MSGQUEUE_0148CDEC_MESSAGE_QUEUE = 1, + MSGQUEUE_0148CDEC_NUM_QUEUES, +}; + +struct msgqueue_0148cdec { + struct nvkm_msgqueue base; + + struct nvkm_msgqueue_queue queue[MSGQUEUE_0148CDEC_NUM_QUEUES]; +}; +#define msgqueue_0148cdec(q) \ + container_of(q, struct msgqueue_0148cdec, base) + +static struct nvkm_msgqueue_queue * +msgqueue_0148cdec_cmd_queue(struct nvkm_msgqueue *queue, + enum msgqueue_msg_priority priority) +{ + struct msgqueue_0148cdec *priv = msgqueue_0148cdec(queue); + + return &priv->queue[MSGQUEUE_0148CDEC_COMMAND_QUEUE]; +} + +static void +msgqueue_0148cdec_process_msgs(struct nvkm_msgqueue *queue) +{ + struct msgqueue_0148cdec *priv = msgqueue_0148cdec(queue); + struct nvkm_msgqueue_queue *q_queue = + &priv->queue[MSGQUEUE_0148CDEC_MESSAGE_QUEUE]; + + nvkm_msgqueue_process_msgs(&priv->base, q_queue); +} + + +/* Init unit */ +#define MSGQUEUE_0148CDEC_UNIT_INIT 0x01 + +enum { + INIT_MSG_INIT = 0x0, +}; + +static void +init_gen_cmdline(struct nvkm_msgqueue *queue, void *buf) +{ + struct { + u32 freq_hz; + u32 falc_trace_size; + u32 falc_trace_dma_base; + u32 falc_trace_dma_idx; + bool secure_mode; + } *args = buf; + + args->secure_mode = false; +} + +static int +init_callback(struct nvkm_msgqueue *_queue, struct nvkm_msgqueue_hdr *hdr) +{ + struct msgqueue_0148cdec *priv = msgqueue_0148cdec(_queue); + struct { + struct nvkm_msgqueue_msg base; + + u8 num_queues; + u16 os_debug_entry_point; + + struct { + u32 offset; + u16 size; + u8 index; + u8 id; + } queue_info[MSGQUEUE_0148CDEC_NUM_QUEUES]; + + u16 sw_managed_area_offset; + u16 sw_managed_area_size; + } *init = (void *)hdr; + const struct nvkm_subdev *subdev = _queue->falcon->owner; + int i; + + if (init->base.hdr.unit_id != MSGQUEUE_0148CDEC_UNIT_INIT) { + nvkm_error(subdev, "expected message from init unit\n"); + return -EINVAL; + } + + if (init->base.msg_type != INIT_MSG_INIT) { + nvkm_error(subdev, "expected SEC init msg\n"); + return -EINVAL; + } + + for (i = 0; i < MSGQUEUE_0148CDEC_NUM_QUEUES; i++) { + u8 id = init->queue_info[i].id; + struct nvkm_msgqueue_queue *queue = &priv->queue[id]; + + mutex_init(&queue->mutex); + + queue->index = init->queue_info[i].index; + queue->offset = init->queue_info[i].offset; + queue->size = init->queue_info[i].size; + + if (id == MSGQUEUE_0148CDEC_MESSAGE_QUEUE) { + queue->head_reg = 0xa30 + (queue->index * 8); + queue->tail_reg = 0xa34 + (queue->index * 8); + } else { + queue->head_reg = 0xa00 + (queue->index * 8); + queue->tail_reg = 0xa04 + (queue->index * 8); + } + + nvkm_debug(subdev, + "queue %d: index %d, offset 0x%08x, size 0x%08x\n", + id, queue->index, queue->offset, queue->size); + } + + complete_all(&_queue->init_done); + + return 0; +} + +static const struct nvkm_msgqueue_init_func +msgqueue_0148cdec_init_func = { + .gen_cmdline = init_gen_cmdline, + .init_callback = init_callback, +}; + + + +/* ACR unit */ +#define MSGQUEUE_0148CDEC_UNIT_ACR 0x08 + +enum { + ACR_CMD_BOOTSTRAP_FALCON = 0x00, +}; + +static void +acr_boot_falcon_callback(struct nvkm_msgqueue *priv, + struct nvkm_msgqueue_hdr *hdr) +{ + struct acr_bootstrap_falcon_msg { + struct nvkm_msgqueue_msg base; + + u32 error_code; + u32 falcon_id; + } *msg = (void *)hdr; + const struct nvkm_subdev *subdev = priv->falcon->owner; + u32 falcon_id = msg->falcon_id; + + if (msg->error_code) { + nvkm_error(subdev, "in bootstrap falcon callback:\n"); + nvkm_error(subdev, "expected error code 0x%x\n", + msg->error_code); + return; + } + + if (falcon_id >= NVKM_SECBOOT_FALCON_END) { + nvkm_error(subdev, "in bootstrap falcon callback:\n"); + nvkm_error(subdev, "invalid falcon ID 0x%x\n", falcon_id); + return; + } + + nvkm_debug(subdev, "%s booted\n", nvkm_secboot_falcon_name[falcon_id]); +} + +enum { + ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES = 0, + ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_NO = 1, +}; + +static int +acr_boot_falcon(struct nvkm_msgqueue *priv, enum nvkm_secboot_falcon falcon) +{ + DECLARE_COMPLETION_ONSTACK(completed); + /* + * flags - Flag specifying RESET or no RESET. + * falcon id - Falcon id specifying falcon to bootstrap. + */ + struct { + struct nvkm_msgqueue_hdr hdr; + u8 cmd_type; + u32 flags; + u32 falcon_id; + } cmd; + + memset(&cmd, 0, sizeof(cmd)); + + cmd.hdr.unit_id = MSGQUEUE_0148CDEC_UNIT_ACR; + cmd.hdr.size = sizeof(cmd); + cmd.cmd_type = ACR_CMD_BOOTSTRAP_FALCON; + cmd.flags = ACR_CMD_BOOTSTRAP_FALCON_FLAGS_RESET_YES; + cmd.falcon_id = falcon; + nvkm_msgqueue_post(priv, MSGQUEUE_MSG_PRIORITY_HIGH, &cmd.hdr, + acr_boot_falcon_callback, &completed, true); + + if (!wait_for_completion_timeout(&completed, msecs_to_jiffies(1000))) + return -ETIMEDOUT; + + return 0; +} + +const struct nvkm_msgqueue_acr_func +msgqueue_0148cdec_acr_func = { + .boot_falcon = acr_boot_falcon, +}; + +static void +msgqueue_0148cdec_dtor(struct nvkm_msgqueue *queue) +{ + kfree(msgqueue_0148cdec(queue)); +} + +const struct nvkm_msgqueue_func +msgqueue_0148cdec_func = { + .init_func = &msgqueue_0148cdec_init_func, + .acr_func = &msgqueue_0148cdec_acr_func, + .cmd_queue = msgqueue_0148cdec_cmd_queue, + .recv = msgqueue_0148cdec_process_msgs, + .dtor = msgqueue_0148cdec_dtor, +}; + +int +msgqueue_0148cdec_new(struct nvkm_falcon *falcon, struct nvkm_msgqueue **queue) +{ + struct msgqueue_0148cdec *ret; + + ret = kzalloc(sizeof(*ret), GFP_KERNEL); + if (!ret) + return -ENOMEM; + + *queue = &ret->base; + + nvkm_msgqueue_ctor(&msgqueue_0148cdec_func, falcon, &ret->base); + + return 0; +} From 424321befd2102dc1609e3ae280a28eab61c522b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 30 Nov 2016 15:48:00 +1000 Subject: [PATCH 0700/1299] drm/nouveau/gr/gp102: initial support Differences from GP100: - 3 PPCs/GPC. - Another random reg to calculate/write. - Attrib CB setup a little different. - PascalB - PascalComputeB Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvif/class.h | 2 + .../gpu/drm/nouveau/include/nvkm/engine/gr.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild | 2 + .../gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 4 + .../gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c | 4 +- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c | 98 +++++++++++++++++++ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 4 + .../gpu/drm/nouveau/nvkm/engine/gr/gp100.c | 6 +- .../gpu/drm/nouveau/nvkm/engine/gr/gp102.c | 66 +++++++++++++ 9 files changed, 183 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index 3a2c0137d4b4..d08da82ba7ed 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -125,6 +125,7 @@ #define MAXWELL_B /* cl9097.h */ 0x0000b197 #define PASCAL_A /* cl9097.h */ 0x0000c097 +#define PASCAL_B /* cl9097.h */ 0x0000c197 #define NV74_BSP 0x000074b0 @@ -163,6 +164,7 @@ #define MAXWELL_COMPUTE_A 0x0000b0c0 #define MAXWELL_COMPUTE_B 0x0000b1c0 #define PASCAL_COMPUTE_A 0x0000c0c0 +#define PASCAL_COMPUTE_B 0x0000c1c0 #define NV74_CIPHER 0x000074c1 #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h index 89cf99307828..0a636833e0eb 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h @@ -43,4 +43,5 @@ int gm107_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gm200_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gm20b_gr_new(struct nvkm_device *, int, struct nvkm_gr **); int gp100_gr_new(struct nvkm_device *, int, struct nvkm_gr **); +int gp102_gr_new(struct nvkm_device *, int, struct nvkm_gr **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild index f1c494182248..2938ad5aca40 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild @@ -32,6 +32,7 @@ nvkm-y += nvkm/engine/gr/gm107.o nvkm-y += nvkm/engine/gr/gm200.o nvkm-y += nvkm/engine/gr/gm20b.o nvkm-y += nvkm/engine/gr/gp100.o +nvkm-y += nvkm/engine/gr/gp102.o nvkm-y += nvkm/engine/gr/ctxnv40.o nvkm-y += nvkm/engine/gr/ctxnv50.o @@ -50,3 +51,4 @@ nvkm-y += nvkm/engine/gr/ctxgm107.o nvkm-y += nvkm/engine/gr/ctxgm200.o nvkm-y += nvkm/engine/gr/ctxgm20b.o nvkm-y += nvkm/engine/gr/ctxgp100.o +nvkm-y += nvkm/engine/gr/ctxgp102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 52048b5a5274..0ae032fa2909 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -102,6 +102,10 @@ void gm200_grctx_generate_405b60(struct gf100_gr *); extern const struct gf100_grctx_func gm20b_grctx; extern const struct gf100_grctx_func gp100_grctx; +void gp100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *); +void gp100_grctx_generate_pagepool(struct gf100_grctx *); + +extern const struct gf100_grctx_func gp102_grctx; /* context init value lists */ diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index 3d1ae7ddf7dd..7833bc777a29 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -29,7 +29,7 @@ * PGRAPH context implementation ******************************************************************************/ -static void +void gp100_grctx_generate_pagepool(struct gf100_grctx *info) { const struct gf100_grctx_func *grctx = info->gr->func->grctx; @@ -123,7 +123,7 @@ gp100_grctx_generate_405b60(struct gf100_gr *gr) nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]); } -static void +void gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) { struct nvkm_device *device = gr->base.engine.subdev.device; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c new file mode 100644 index 000000000000..ee26d64af73a --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -0,0 +1,98 @@ +/* + * Copyright 2016 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "ctxgf100.h" + +#include + +/******************************************************************************* + * PGRAPH context implementation + ******************************************************************************/ + +static void +gp102_grctx_generate_attrib(struct gf100_grctx *info) +{ + struct gf100_gr *gr = info->gr; + const struct gf100_grctx_func *grctx = gr->func->grctx; + const u32 alpha = grctx->alpha_nr; + const u32 attrib = grctx->attrib_nr; + const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max); + const u32 size = roundup(gr->tpc_total * pertpc, 0x80); + const u32 access = NV_MEM_ACCESS_RW; + const int s = 12; + const int b = mmio_vram(info, size, (1 << s), access); + const int max_batches = 0xffff; + u32 ao = 0; + u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total; + int gpc, ppc, n = 0; + + mmio_refn(info, 0x418810, 0x80000000, s, b); + mmio_refn(info, 0x419848, 0x10000000, s, b); + mmio_refn(info, 0x419c2c, 0x10000000, s, b); + mmio_refn(info, 0x419b00, 0x00000000, s, b); + mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7); + mmio_wr32(info, 0x405830, attrib); + mmio_wr32(info, 0x40585c, alpha); + mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches); + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) { + const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc]; + const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc]; + const u32 u = 0x418ea0 + (n * 0x04); + const u32 o = PPC_UNIT(gpc, ppc, 0); + const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4)); + if (!(gr->ppc_mask[gpc] & (1 << ppc))) + continue; + mmio_wr32(info, o + 0xc0, bs); + mmio_wr32(info, p, bs); + mmio_wr32(info, o + 0xf4, bo); + mmio_wr32(info, o + 0xf0, bs); + bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + mmio_wr32(info, o + 0xe4, as); + mmio_wr32(info, o + 0xf8, ao); + ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc]; + mmio_wr32(info, u, bs); + } + } + + mmio_wr32(info, 0x4181e4, 0x00000100); + mmio_wr32(info, 0x41befc, 0x00000100); +} + +const struct gf100_grctx_func +gp102_grctx = { + .main = gp100_grctx_generate_main, + .unkn = gk104_grctx_generate_unkn, + .bundle = gm107_grctx_generate_bundle, + .bundle_size = 0x3000, + .bundle_min_gpm_fifo_depth = 0x180, + .bundle_token_limit = 0x900, + .pagepool = gp100_grctx_generate_pagepool, + .pagepool_size = 0x20000, + .attrib = gp102_grctx_generate_attrib, + .attrib_nr_max = 0x5d4, + .attrib_nr = 0x320, + .alpha_nr_max = 0xc00, + .alpha_nr = 0x800, +}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index db6ee3b06841..1d2101af2a87 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -124,6 +124,7 @@ struct gf100_gr_func { void (*init_gpc_mmu)(struct gf100_gr *); void (*init_rop_active_fbps)(struct gf100_gr *); void (*init_ppc_exceptions)(struct gf100_gr *); + void (*init_swdx_pes_mask)(struct gf100_gr *); void (*set_hww_esr_report_mask)(struct gf100_gr *); const struct gf100_gr_pack *mmio; struct { @@ -150,6 +151,9 @@ int gk20a_gr_init(struct gf100_gr *); int gm200_gr_init(struct gf100_gr *); int gm200_gr_rops(struct gf100_gr *); +int gp100_gr_init(struct gf100_gr *); +void gp100_gr_init_rop_active_fbps(struct gf100_gr *); + #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object) struct gf100_gr_chan { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c index 26ad79def0ff..94ed7debb714 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c @@ -30,7 +30,7 @@ * PGRAPH engine/subdev functions ******************************************************************************/ -static void +void gp100_gr_init_rop_active_fbps(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -40,7 +40,7 @@ gp100_gr_init_rop_active_fbps(struct gf100_gr *gr) nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */ } -static int +int gp100_gr_init(struct gf100_gr *gr) { struct nvkm_device *device = gr->base.engine.subdev.device; @@ -85,6 +85,8 @@ gp100_gr_init(struct gf100_gr *gr) nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804)); gr->func->init_rop_active_fbps(gr); + if (gr->func->init_swdx_pes_mask) + gr->func->init_swdx_pes_mask(gr); nvkm_wr32(device, 0x400500, 0x00010001); nvkm_wr32(device, 0x400100, 0xffffffff); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c new file mode 100644 index 000000000000..1d5117a16299 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c @@ -0,0 +1,66 @@ +/* + * Copyright 2016 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "gf100.h" +#include "ctxgf100.h" + +#include + +static void +gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 mask = 0, data, gpc; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + data = nvkm_rd32(device, GPC_UNIT(gpc, 0x0c50)) & 0x0000000f; + mask |= data << (gpc * 4); + } + + nvkm_wr32(device, 0x4181d0, mask); +} + +static const struct gf100_gr_func +gp102_gr = { + .init = gp100_gr_init, + .init_gpc_mmu = gm200_gr_init_gpc_mmu, + .init_rop_active_fbps = gp100_gr_init_rop_active_fbps, + .init_ppc_exceptions = gk104_gr_init_ppc_exceptions, + .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask, + .rops = gm200_gr_rops, + .ppc_nr = 3, + .grctx = &gp102_grctx, + .sclass = { + { -1, -1, FERMI_TWOD_A }, + { -1, -1, KEPLER_INLINE_TO_MEMORY_B }, + { -1, -1, PASCAL_B, &gf100_fermi }, + { -1, -1, PASCAL_COMPUTE_B }, + {} + } +}; + +int +gp102_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr) +{ + return gm200_gr_new_(&gp102_gr, device, index, pgr); +} From 36510adde3d9c073a538e441c38515b7741d1732 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Thu, 16 Feb 2017 15:50:27 +0900 Subject: [PATCH 0701/1299] drm/nouveau/gp10x: enable secboot and GR All the bricks are in place for secure boot to be enabled. This in turn makes GR usable so enable them all. Signed-off-by: Alexandre Courbot Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index cf76d6c657ba..f0a77c5fd16a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2200,6 +2200,9 @@ nv132_chipset = { .ltc = gp100_ltc_new, .mc = gp100_mc_new, .mmu = gf100_mmu_new, + .secboot = gp102_secboot_new, + .sec2 = gp102_sec2_new, + .nvdec = gp102_nvdec_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .timer = gk20a_timer_new, @@ -2211,6 +2214,8 @@ nv132_chipset = { .disp = gp102_disp_new, .dma = gf119_dma_new, .fifo = gp100_fifo_new, + .gr = gp102_gr_new, + .sw = gf100_sw_new, }; static const struct nvkm_device_chip @@ -2229,6 +2234,9 @@ nv134_chipset = { .ltc = gp100_ltc_new, .mc = gp100_mc_new, .mmu = gf100_mmu_new, + .secboot = gp102_secboot_new, + .sec2 = gp102_sec2_new, + .nvdec = gp102_nvdec_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .timer = gk20a_timer_new, @@ -2240,6 +2248,8 @@ nv134_chipset = { .disp = gp102_disp_new, .dma = gf119_dma_new, .fifo = gp100_fifo_new, + .gr = gp102_gr_new, + .sw = gf100_sw_new, }; static const struct nvkm_device_chip @@ -2258,6 +2268,9 @@ nv136_chipset = { .ltc = gp100_ltc_new, .mc = gp100_mc_new, .mmu = gf100_mmu_new, + .secboot = gp102_secboot_new, + .sec2 = gp102_sec2_new, + .nvdec = gp102_nvdec_new, .pci = gp100_pci_new, .pmu = gp102_pmu_new, .timer = gk20a_timer_new, @@ -2269,6 +2282,8 @@ nv136_chipset = { .disp = gp102_disp_new, .dma = gf119_dma_new, .fifo = gp100_fifo_new, + .gr = gp102_gr_new, + .sw = gf100_sw_new, }; static int From 1af5c410cc0cae4808fd25e5cd88de303b8975fd Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Mar 2017 09:01:08 +1000 Subject: [PATCH 0702/1299] drm/nouveau/i2c: modify aux interface to return length actually transferred Apparently sinks are allows to respond with ACK even if they didn't fully complete a transaction... It seems like a missed opportunity for DEFER to me, but what do I know :) Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h | 8 +++++--- drivers/gpu/drm/nouveau/nouveau_connector.c | 5 +++-- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/anx9805.c | 14 +++++++------- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c | 4 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h | 4 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c | 10 +++++----- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 10 +++++----- 7 files changed, 29 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h index a63c5ac69f66..ce23cc6c672e 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h @@ -64,7 +64,7 @@ void nvkm_i2c_aux_monitor(struct nvkm_i2c_aux *, bool monitor); int nvkm_i2c_aux_acquire(struct nvkm_i2c_aux *); void nvkm_i2c_aux_release(struct nvkm_i2c_aux *); int nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *, bool retry, u8 type, - u32 addr, u8 *data, u8 size); + u32 addr, u8 *data, u8 *size); int nvkm_i2c_aux_lnk_ctl(struct nvkm_i2c_aux *, int link_nr, int link_bw, bool enhanced_framing); @@ -162,9 +162,11 @@ nvkm_probe_i2c(struct i2c_adapter *adap, u8 addr) static inline int nvkm_rdaux(struct nvkm_i2c_aux *aux, u32 addr, u8 *data, u8 size) { + const u8 xfer = size; int ret = nvkm_i2c_aux_acquire(aux); if (ret == 0) { - ret = nvkm_i2c_aux_xfer(aux, true, 9, addr, data, size); + ret = nvkm_i2c_aux_xfer(aux, true, 9, addr, data, &size); + WARN_ON(!ret && size != xfer); nvkm_i2c_aux_release(aux); } return ret; @@ -175,7 +177,7 @@ nvkm_wraux(struct nvkm_i2c_aux *aux, u32 addr, u8 *data, u8 size) { int ret = nvkm_i2c_aux_acquire(aux); if (ret == 0) { - ret = nvkm_i2c_aux_xfer(aux, true, 8, addr, data, size); + ret = nvkm_i2c_aux_xfer(aux, true, 8, addr, data, &size); nvkm_i2c_aux_release(aux); } return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index f5add64c093f..f802bcd94457 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1147,6 +1147,7 @@ nouveau_connector_aux_xfer(struct drm_dp_aux *obj, struct drm_dp_aux_msg *msg) container_of(obj, typeof(*nv_connector), aux); struct nouveau_encoder *nv_encoder; struct nvkm_i2c_aux *aux; + u8 size = msg->size; int ret; nv_encoder = find_encoder(&nv_connector->base, DCB_OUTPUT_DP); @@ -1162,11 +1163,11 @@ nouveau_connector_aux_xfer(struct drm_dp_aux *obj, struct drm_dp_aux_msg *msg) return ret; ret = nvkm_i2c_aux_xfer(aux, false, msg->request, msg->address, - msg->buffer, msg->size); + msg->buffer, &size); nvkm_i2c_aux_release(aux); if (ret >= 0) { msg->reply = ret; - return msg->size; + return size; } return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/anx9805.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/anx9805.c index b7b01c3f7037..dd391809fef7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/anx9805.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/anx9805.c @@ -134,7 +134,7 @@ struct anx9805_aux { static int anx9805_aux_xfer(struct nvkm_i2c_aux *base, bool retry, - u8 type, u32 addr, u8 *data, u8 size) + u8 type, u32 addr, u8 *data, u8 *size) { struct anx9805_aux *aux = anx9805_aux(base); struct anx9805_pad *pad = aux->pad; @@ -143,7 +143,7 @@ anx9805_aux_xfer(struct nvkm_i2c_aux *base, bool retry, u8 buf[16] = {}; u8 tmp; - AUX_DBG(&aux->base, "%02x %05x %d", type, addr, size); + AUX_DBG(&aux->base, "%02x %05x %d", type, addr, *size); tmp = nvkm_rdi2cr(adap, pad->addr, 0x07) & ~0x04; nvkm_wri2cr(adap, pad->addr, 0x07, tmp | 0x04); @@ -152,12 +152,12 @@ anx9805_aux_xfer(struct nvkm_i2c_aux *base, bool retry, nvkm_wri2cr(adap, aux->addr, 0xe4, 0x80); if (!(type & 1)) { - memcpy(buf, data, size); + memcpy(buf, data, *size); AUX_DBG(&aux->base, "%16ph", buf); - for (i = 0; i < size; i++) + for (i = 0; i < *size; i++) nvkm_wri2cr(adap, aux->addr, 0xf0 + i, buf[i]); } - nvkm_wri2cr(adap, aux->addr, 0xe5, ((size - 1) << 4) | type); + nvkm_wri2cr(adap, aux->addr, 0xe5, ((*size - 1) << 4) | type); nvkm_wri2cr(adap, aux->addr, 0xe6, (addr & 0x000ff) >> 0); nvkm_wri2cr(adap, aux->addr, 0xe7, (addr & 0x0ff00) >> 8); nvkm_wri2cr(adap, aux->addr, 0xe8, (addr & 0xf0000) >> 16); @@ -176,10 +176,10 @@ anx9805_aux_xfer(struct nvkm_i2c_aux *base, bool retry, } if (type & 1) { - for (i = 0; i < size; i++) + for (i = 0; i < *size; i++) buf[i] = nvkm_rdi2cr(adap, aux->addr, 0xf0 + i); AUX_DBG(&aux->base, "%16ph", buf); - memcpy(data, buf, size); + memcpy(data, buf, *size); } ret = 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c index 01d5c5a56e2e..d172e42dd228 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c @@ -51,7 +51,7 @@ nvkm_i2c_aux_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) if (mcnt || remaining > 16) cmd |= 4; /* MOT */ - ret = aux->func->xfer(aux, true, cmd, msg->addr, ptr, cnt); + ret = aux->func->xfer(aux, true, cmd, msg->addr, ptr, &cnt); if (ret < 0) { nvkm_i2c_aux_release(aux); return ret; @@ -115,7 +115,7 @@ nvkm_i2c_aux_acquire(struct nvkm_i2c_aux *aux) int nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *aux, bool retry, u8 type, - u32 addr, u8 *data, u8 size) + u32 addr, u8 *data, u8 *size) { return aux->func->xfer(aux, retry, type, addr, data, size); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h index fc6b162fa0b1..27a4a39c87f0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.h @@ -4,7 +4,7 @@ struct nvkm_i2c_aux_func { int (*xfer)(struct nvkm_i2c_aux *, bool retry, u8 type, - u32 addr, u8 *data, u8 size); + u32 addr, u8 *data, u8 *size); int (*lnk_ctl)(struct nvkm_i2c_aux *, int link_nr, int link_bw, bool enhanced_framing); }; @@ -15,7 +15,7 @@ int nvkm_i2c_aux_new_(const struct nvkm_i2c_aux_func *, struct nvkm_i2c_pad *, int id, struct nvkm_i2c_aux **); void nvkm_i2c_aux_del(struct nvkm_i2c_aux **); int nvkm_i2c_aux_xfer(struct nvkm_i2c_aux *, bool retry, u8 type, - u32 addr, u8 *data, u8 size); + u32 addr, u8 *data, u8 *size); int g94_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **); int gm200_i2c_aux_new(struct nvkm_i2c_pad *, int, u8, struct nvkm_i2c_aux **); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c index b80236a4eeac..dc96afb7793c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c @@ -74,7 +74,7 @@ g94_i2c_aux_init(struct g94_i2c_aux *aux) static int g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, - u8 type, u32 addr, u8 *data, u8 size) + u8 type, u32 addr, u8 *data, u8 *size) { struct g94_i2c_aux *aux = g94_i2c_aux(obj); struct nvkm_device *device = aux->base.pad->i2c->subdev.device; @@ -83,7 +83,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u32 xbuf[4] = {}; int ret, i; - AUX_TRACE(&aux->base, "%d: %08x %d", type, addr, size); + AUX_TRACE(&aux->base, "%d: %08x %d", type, addr, *size); ret = g94_i2c_aux_init(aux); if (ret < 0) @@ -97,7 +97,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, } if (!(type & 1)) { - memcpy(xbuf, data, size); + memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { AUX_TRACE(&aux->base, "wr %08x", xbuf[i / 4]); nvkm_wr32(device, 0x00e4c0 + base + i, xbuf[i / 4]); @@ -107,7 +107,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, ctrl = nvkm_rd32(device, 0x00e4e4 + base); ctrl &= ~0x0001f0ff; ctrl |= type << 12; - ctrl |= size - 1; + ctrl |= *size - 1; nvkm_wr32(device, 0x00e4e0 + base, addr); /* (maybe) retry transaction a number of times on failure... */ @@ -151,7 +151,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, xbuf[i / 4] = nvkm_rd32(device, 0x00e4d0 + base + i); AUX_TRACE(&aux->base, "rd %08x", xbuf[i / 4]); } - memcpy(data, xbuf, size); + memcpy(data, xbuf, *size); } out: diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index ed458c7f056b..50e79a9106a0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -74,7 +74,7 @@ gm200_i2c_aux_init(struct gm200_i2c_aux *aux) static int gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, - u8 type, u32 addr, u8 *data, u8 size) + u8 type, u32 addr, u8 *data, u8 *size) { struct gm200_i2c_aux *aux = gm200_i2c_aux(obj); struct nvkm_device *device = aux->base.pad->i2c->subdev.device; @@ -83,7 +83,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, u32 xbuf[4] = {}; int ret, i; - AUX_TRACE(&aux->base, "%d: %08x %d", type, addr, size); + AUX_TRACE(&aux->base, "%d: %08x %d", type, addr, *size); ret = gm200_i2c_aux_init(aux); if (ret < 0) @@ -97,7 +97,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, } if (!(type & 1)) { - memcpy(xbuf, data, size); + memcpy(xbuf, data, *size); for (i = 0; i < 16; i += 4) { AUX_TRACE(&aux->base, "wr %08x", xbuf[i / 4]); nvkm_wr32(device, 0x00d930 + base + i, xbuf[i / 4]); @@ -107,7 +107,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, ctrl = nvkm_rd32(device, 0x00d954 + base); ctrl &= ~0x0001f0ff; ctrl |= type << 12; - ctrl |= size - 1; + ctrl |= *size - 1; nvkm_wr32(device, 0x00d950 + base, addr); /* (maybe) retry transaction a number of times on failure... */ @@ -151,7 +151,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, xbuf[i / 4] = nvkm_rd32(device, 0x00d940 + base + i); AUX_TRACE(&aux->base, "rd %08x", xbuf[i / 4]); } - memcpy(data, xbuf, size); + memcpy(data, xbuf, *size); } out: From 5c68d91ee072fcaae8c6c8dfa5a50a4ebf14bc96 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Mar 2017 09:38:29 +1000 Subject: [PATCH 0703/1299] drm/nouveau/i2c/g94-: return REPLY_M value on reads This value represents the actual number of bytes recieved on the AUX channel as the result of a read transaction. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c | 1 + drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c index dc96afb7793c..ab8cb196c34e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c @@ -152,6 +152,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, AUX_TRACE(&aux->base, "rd %08x", xbuf[i / 4]); } memcpy(data, xbuf, *size); + *size = stat & 0x0000001f; } out: diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c index 50e79a9106a0..ee091fa79628 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c @@ -152,6 +152,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry, AUX_TRACE(&aux->base, "rd %08x", xbuf[i / 4]); } memcpy(data, xbuf, *size); + *size = stat & 0x0000001f; } out: From df8dc97cd17269474344d73cc02739532c468d04 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 1 Mar 2017 09:42:04 +1000 Subject: [PATCH 0704/1299] drm/nouveau/kms/nv50: use drm core i2c-over-aux algorithm I'm not entirely sure NVKM needs to support this now, but I haven't removed it as of yet just in case it's needed from DEVINIT scripts where DRM isn't available. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 0b4440ffbeae..b4a99aba4d65 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -3691,7 +3691,7 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe) struct nvkm_i2c_aux *aux = nvkm_i2c_aux_find(i2c, dcbe->i2c_index); if (aux) { - nv_encoder->i2c = &aux->i2c; + nv_encoder->i2c = &nv_connector->aux.ddc; nv_encoder->aux = aux; } @@ -3841,6 +3841,7 @@ nv50_pior_func = { static int nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe) { + struct nouveau_connector *nv_connector = nouveau_connector(connector); struct nouveau_drm *drm = nouveau_drm(connector->dev); struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device); struct nvkm_i2c_bus *bus = NULL; @@ -3858,7 +3859,7 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe) break; case DCB_OUTPUT_DP: aux = nvkm_i2c_aux_find(i2c, NVKM_I2C_AUX_EXT(dcbe->extdev)); - ddc = aux ? &aux->i2c : NULL; + ddc = aux ? &nv_connector->aux.ddc : NULL; type = DRM_MODE_ENCODER_TMDS; break; default: From fcb371a1d5fc7def715227f74dfcb9be73a597e8 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 2 Mar 2017 14:34:16 +1000 Subject: [PATCH 0705/1299] drm/nouveau/fb/gf100-: modify constructors to allow more customisation GF108/GM107 implementations will want slightly different functions for the upcoming RAM detection improvements. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 15 ++++- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c | 39 ++++++----- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c | 66 +++++++++---------- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c | 13 +++- 4 files changed, 81 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index b60068b7d8f9..9eded69163e3 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -19,13 +19,24 @@ int nv50_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **); void nv50_ram_put(struct nvkm_ram *, struct nvkm_mem **); void __nv50_ram_put(struct nvkm_ram *, struct nvkm_mem *); +int gf100_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, + struct nvkm_ram **); int gf100_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *, u32, struct nvkm_ram *); int gf100_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **); void gf100_ram_put(struct nvkm_ram *, struct nvkm_mem **); +int gf100_ram_init(struct nvkm_ram *); +int gf100_ram_calc(struct nvkm_ram *, u32); +int gf100_ram_prog(struct nvkm_ram *); +void gf100_ram_tidy(struct nvkm_ram *); -int gk104_ram_ctor(struct nvkm_fb *, struct nvkm_ram **, u32); -int gk104_ram_init(struct nvkm_ram *ram); +int gk104_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, + struct nvkm_ram **, u32); +void *gk104_ram_dtor(struct nvkm_ram *); +int gk104_ram_init(struct nvkm_ram *); +int gk104_ram_calc(struct nvkm_ram *, u32); +int gk104_ram_prog(struct nvkm_ram *); +void gk104_ram_tidy(struct nvkm_ram *); /* RAM type-specific MR calculation routines */ int nvkm_sddr2_calc(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c index 6758da93a3a1..ef28514af56f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c @@ -124,7 +124,7 @@ gf100_ram_train(struct gf100_ramfuc *fuc, u32 magic) } } -static int +int gf100_ram_calc(struct nvkm_ram *base, u32 freq) { struct gf100_ram *ram = gf100_ram(base); @@ -404,7 +404,7 @@ gf100_ram_calc(struct nvkm_ram *base, u32 freq) return 0; } -static int +int gf100_ram_prog(struct nvkm_ram *base) { struct gf100_ram *ram = gf100_ram(base); @@ -413,7 +413,7 @@ gf100_ram_prog(struct nvkm_ram *base) return 0; } -static void +void gf100_ram_tidy(struct nvkm_ram *base) { struct gf100_ram *ram = gf100_ram(base); @@ -500,7 +500,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin, return 0; } -static int +int gf100_ram_init(struct nvkm_ram *base) { static const u8 train0[] = { @@ -543,16 +543,6 @@ gf100_ram_init(struct nvkm_ram *base) return 0; } -static const struct nvkm_ram_func -gf100_ram_func = { - .init = gf100_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, - .calc = gf100_ram_calc, - .prog = gf100_ram_prog, - .tidy = gf100_ram_tidy, -}; - int gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, u32 maskaddr, struct nvkm_ram *ram) @@ -624,7 +614,8 @@ gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, } int -gf100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +gf100_ram_new_(const struct nvkm_ram_func *func, + struct nvkm_fb *fb, struct nvkm_ram **pram) { struct nvkm_subdev *subdev = &fb->subdev; struct nvkm_bios *bios = subdev->device->bios; @@ -635,7 +626,7 @@ gf100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) return -ENOMEM; *pram = &ram->base; - ret = gf100_ram_ctor(&gf100_ram_func, fb, 0x022554, &ram->base); + ret = gf100_ram_ctor(func, fb, 0x022554, &ram->base); if (ret) return ret; @@ -711,3 +702,19 @@ gf100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) ram->fuc.r_0x13d8f4 = ramfuc_reg(0x13d8f4); return 0; } + +static const struct nvkm_ram_func +gf100_ram = { + .init = gf100_ram_init, + .get = gf100_ram_get, + .put = gf100_ram_put, + .calc = gf100_ram_calc, + .prog = gf100_ram_prog, + .tidy = gf100_ram_tidy, +}; + +int +gf100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +{ + return gf100_ram_new_(&gf100_ram, fb, pram); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c index fb8a1239743d..2575d6c370a8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c @@ -1108,7 +1108,7 @@ gk104_ram_calc_xits(struct gk104_ram *ram, struct nvkm_ram_data *next) return ret; } -static int +int gk104_ram_calc(struct nvkm_ram *base, u32 freq) { struct gk104_ram *ram = gk104_ram(base); @@ -1227,7 +1227,7 @@ gk104_ram_prog_0(struct gk104_ram *ram, u32 freq) nvkm_mask(device, 0x10f444, mask, data); } -static int +int gk104_ram_prog(struct nvkm_ram *base) { struct gk104_ram *ram = gk104_ram(base); @@ -1247,7 +1247,7 @@ gk104_ram_prog(struct nvkm_ram *base) return (ram->base.next == &ram->base.xition); } -static void +void gk104_ram_tidy(struct nvkm_ram *base) { struct gk104_ram *ram = gk104_ram(base); @@ -1509,7 +1509,7 @@ gk104_ram_ctor_data(struct gk104_ram *ram, u8 ramcfg, int i) return ret; } -static void * +void * gk104_ram_dtor(struct nvkm_ram *base) { struct gk104_ram *ram = gk104_ram(base); @@ -1522,31 +1522,14 @@ gk104_ram_dtor(struct nvkm_ram *base) return ram; } -static const struct nvkm_ram_func -gk104_ram_func = { - .dtor = gk104_ram_dtor, - .init = gk104_ram_init, - .get = gf100_ram_get, - .put = gf100_ram_put, - .calc = gk104_ram_calc, - .prog = gk104_ram_prog, - .tidy = gk104_ram_tidy, -}; - int -gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) -{ - return gk104_ram_ctor(fb, pram, 0x022554); -} - -int -gk104_ram_ctor(struct nvkm_fb *fb, struct nvkm_ram **pram, u32 maskaddr) +gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb, + struct nvkm_ram **pram, u32 maskaddr) { struct nvkm_subdev *subdev = &fb->subdev; struct nvkm_device *device = subdev->device; struct nvkm_bios *bios = device->bios; - struct nvkm_gpio *gpio = device->gpio; - struct dcb_gpio_func func; + struct dcb_gpio_func gpio; struct gk104_ram *ram; int ret, i; u8 ramcfg = nvbios_ramcfg_index(subdev); @@ -1556,7 +1539,7 @@ gk104_ram_ctor(struct nvkm_fb *fb, struct nvkm_ram **pram, u32 maskaddr) return -ENOMEM; *pram = &ram->base; - ret = gf100_ram_ctor(&gk104_ram_func, fb, maskaddr, &ram->base); + ret = gf100_ram_ctor(func, fb, maskaddr, &ram->base); if (ret) return ret; @@ -1614,18 +1597,18 @@ gk104_ram_ctor(struct nvkm_fb *fb, struct nvkm_ram **pram, u32 maskaddr) } /* lookup memory voltage gpios */ - ret = nvkm_gpio_find(gpio, 0, 0x18, DCB_GPIO_UNUSED, &func); + ret = nvkm_gpio_find(device->gpio, 0, 0x18, DCB_GPIO_UNUSED, &gpio); if (ret == 0) { - ram->fuc.r_gpioMV = ramfuc_reg(0x00d610 + (func.line * 0x04)); - ram->fuc.r_funcMV[0] = (func.log[0] ^ 2) << 12; - ram->fuc.r_funcMV[1] = (func.log[1] ^ 2) << 12; + ram->fuc.r_gpioMV = ramfuc_reg(0x00d610 + (gpio.line * 0x04)); + ram->fuc.r_funcMV[0] = (gpio.log[0] ^ 2) << 12; + ram->fuc.r_funcMV[1] = (gpio.log[1] ^ 2) << 12; } - ret = nvkm_gpio_find(gpio, 0, 0x2e, DCB_GPIO_UNUSED, &func); + ret = nvkm_gpio_find(device->gpio, 0, 0x2e, DCB_GPIO_UNUSED, &gpio); if (ret == 0) { - ram->fuc.r_gpio2E = ramfuc_reg(0x00d610 + (func.line * 0x04)); - ram->fuc.r_func2E[0] = (func.log[0] ^ 2) << 12; - ram->fuc.r_func2E[1] = (func.log[1] ^ 2) << 12; + ram->fuc.r_gpio2E = ramfuc_reg(0x00d610 + (gpio.line * 0x04)); + ram->fuc.r_func2E[0] = (gpio.log[0] ^ 2) << 12; + ram->fuc.r_func2E[1] = (gpio.log[1] ^ 2) << 12; } ram->fuc.r_gpiotrig = ramfuc_reg(0x00d604); @@ -1717,3 +1700,20 @@ gk104_ram_ctor(struct nvkm_fb *fb, struct nvkm_ram **pram, u32 maskaddr) ram->fuc.r_0x100750 = ramfuc_reg(0x100750); return 0; } + +static const struct nvkm_ram_func +gk104_ram = { + .dtor = gk104_ram_dtor, + .init = gk104_ram_init, + .get = gf100_ram_get, + .put = gf100_ram_put, + .calc = gk104_ram_calc, + .prog = gk104_ram_prog, + .tidy = gk104_ram_tidy, +}; + +int +gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +{ + return gk104_ram_new_(&gk104_ram, fb, pram, 0x022554); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c index ac862d1d77bd..af5a97a9061e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c @@ -23,8 +23,19 @@ */ #include "ram.h" +static const struct nvkm_ram_func +gm107_ram = { + .dtor = gk104_ram_dtor, + .init = gk104_ram_init, + .get = gf100_ram_get, + .put = gf100_ram_put, + .calc = gk104_ram_calc, + .prog = gk104_ram_prog, + .tidy = gk104_ram_tidy, +}; + int gm107_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { - return gk104_ram_ctor(fb, pram, 0x021c14); + return gk104_ram_new_(&gm107_ram, fb, pram, 0x021c14); } From 904e703c8065f57665e21cdd170ea2b6b809da79 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 3 Mar 2017 08:36:48 +1000 Subject: [PATCH 0706/1299] drm/nouveau/fb/gf108: split implementation from gf100 Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/fb.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild | 2 + .../gpu/drm/nouveau/nvkm/subdev/fb/gf108.c | 42 +++++++++++++++++++ drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 1 + .../gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c | 40 ++++++++++++++++++ 6 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 0b26a4c860ec..6876b269a447 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -89,6 +89,7 @@ int gt215_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int mcp77_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int mcp89_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gf100_fb_new(struct nvkm_device *, int, struct nvkm_fb **); +int gf108_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gk104_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gk20a_fb_new(struct nvkm_device *, int, struct nvkm_fb **); int gm107_fb_new(struct nvkm_device *, int, struct nvkm_fb **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index f0a77c5fd16a..1076949b802a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1379,7 +1379,7 @@ nvc1_chipset = { .bus = gf100_bus_new, .clk = gf100_clk_new, .devinit = gf100_devinit_new, - .fb = gf100_fb_new, + .fb = gf108_fb_new, .fuse = gf100_fuse_new, .gpio = g94_gpio_new, .i2c = g94_i2c_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild index 63566ba12fbb..8c7cce2451bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild @@ -20,6 +20,7 @@ nvkm-y += nvkm/subdev/fb/gt215.o nvkm-y += nvkm/subdev/fb/mcp77.o nvkm-y += nvkm/subdev/fb/mcp89.o nvkm-y += nvkm/subdev/fb/gf100.o +nvkm-y += nvkm/subdev/fb/gf108.o nvkm-y += nvkm/subdev/fb/gk104.o nvkm-y += nvkm/subdev/fb/gk20a.o nvkm-y += nvkm/subdev/fb/gm107.o @@ -42,6 +43,7 @@ nvkm-y += nvkm/subdev/fb/ramnv50.o nvkm-y += nvkm/subdev/fb/ramgt215.o nvkm-y += nvkm/subdev/fb/rammcp77.o nvkm-y += nvkm/subdev/fb/ramgf100.o +nvkm-y += nvkm/subdev/fb/ramgf108.o nvkm-y += nvkm/subdev/fb/ramgk104.o nvkm-y += nvkm/subdev/fb/ramgm107.o nvkm-y += nvkm/subdev/fb/ramgp100.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c new file mode 100644 index 000000000000..56af84aa333b --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf108.c @@ -0,0 +1,42 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "gf100.h" +#include "ram.h" + +static const struct nvkm_fb_func +gf108_fb = { + .dtor = gf100_fb_dtor, + .oneinit = gf100_fb_oneinit, + .init = gf100_fb_init, + .init_page = gf100_fb_init_page, + .intr = gf100_fb_intr, + .ram_new = gf108_ram_new, + .memtype_valid = gf100_fb_memtype_valid, +}; + +int +gf108_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb) +{ + return gf100_fb_new_(&gf108_fb, device, index, pfb); +} diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index 9eded69163e3..c6c05aac0ff6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -57,6 +57,7 @@ int nv50_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gt215_ram_new(struct nvkm_fb *, struct nvkm_ram **); int mcp77_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gf100_ram_new(struct nvkm_fb *, struct nvkm_ram **); +int gf108_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c new file mode 100644 index 000000000000..ddab0db3a7a5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c @@ -0,0 +1,40 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "ram.h" + +static const struct nvkm_ram_func +gf108_ram = { + .init = gf100_ram_init, + .get = gf100_ram_get, + .put = gf100_ram_put, + .calc = gf100_ram_calc, + .prog = gf100_ram_prog, + .tidy = gf100_ram_tidy, +}; + +int +gf108_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +{ + return gf100_ram_new_(&gf108_ram, fb, pram); +} From ba4c063d47562d28aed3d440959fb6b13802e921 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 3 Mar 2017 08:44:01 +1000 Subject: [PATCH 0707/1299] drm/nouveau/fb/gm200: split ram implementation from gm107 Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/fb/gm200.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 1 + .../gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c | 41 +++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild index 8c7cce2451bc..1c5e5ba487a8 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild @@ -46,6 +46,7 @@ nvkm-y += nvkm/subdev/fb/ramgf100.o nvkm-y += nvkm/subdev/fb/ramgf108.o nvkm-y += nvkm/subdev/fb/ramgk104.o nvkm-y += nvkm/subdev/fb/ramgm107.o +nvkm-y += nvkm/subdev/fb/ramgm200.o nvkm-y += nvkm/subdev/fb/ramgp100.o nvkm-y += nvkm/subdev/fb/sddr2.o nvkm-y += nvkm/subdev/fb/sddr3.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c index fe5886013ac0..d83da5ddbc1e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c @@ -68,7 +68,7 @@ gm200_fb = { .init = gm200_fb_init, .init_page = gm200_fb_init_page, .intr = gf100_fb_intr, - .ram_new = gm107_ram_new, + .ram_new = gm200_ram_new, .memtype_valid = gf100_fb_memtype_valid, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index c6c05aac0ff6..893fbb957181 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -60,5 +60,6 @@ int gf100_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gf108_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **); +int gm200_ram_new(struct nvkm_fb *, struct nvkm_ram **); int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c new file mode 100644 index 000000000000..04ae81f0db10 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c @@ -0,0 +1,41 @@ +/* + * Copyright 2017 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ben Skeggs + */ +#include "ram.h" + +static const struct nvkm_ram_func +gm200_ram = { + .dtor = gk104_ram_dtor, + .init = gk104_ram_init, + .get = gf100_ram_get, + .put = gf100_ram_put, + .calc = gk104_ram_calc, + .prog = gk104_ram_prog, + .tidy = gk104_ram_tidy, +}; + +int +gm200_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) +{ + return gk104_ram_new_(&gm200_ram, fb, pram, 0x021c14); +} From 97e5268d57bb2ec9c82cf8758fa97a2f04ea9d1b Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 2 Mar 2017 13:53:05 +1000 Subject: [PATCH 0708/1299] drm/nouveau/fb/gf100-: rework ram detection This commit reworks the RAM detection algorithm, using RAM-per-LTC to determine whether a board has a mixed-memory configuration instead of using RAM-per-FBPA. I'm not certain the algorithm is perfect, but it should handle all currently known configurations in the very least. This should fix GTX 970 boards with 4GiB of RAM where the last 512MiB isn't fully accessible, as well as only detecting half the VRAM on GF108 boards. As a nice side-effect, GP10x memory detection now reuses the majority of the code from earlier chipsets. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/include/nvkm/subdev/fb.h | 6 + drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h | 18 ++- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c | 113 +++++++++++------- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c | 22 ++++ .../gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c | 10 +- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c | 14 ++- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c | 29 ++++- .../gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c | 68 +++-------- 8 files changed, 179 insertions(+), 101 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h index 6876b269a447..891497a0fe3b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h @@ -147,6 +147,12 @@ struct nvkm_ram { }; struct nvkm_ram_func { + u64 upper; + u32 (*probe_fbp)(const struct nvkm_ram_func *, struct nvkm_device *, + int fbp, int *pltcs); + u32 (*probe_fbp_amount)(const struct nvkm_ram_func *, u32 fbpao, + struct nvkm_device *, int fbp, int *pltcs); + u32 (*probe_fbpa_amount)(struct nvkm_device *, int fbpa); void *(*dtor)(struct nvkm_ram *); int (*init)(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h index 893fbb957181..fac7e73c3ddf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h @@ -22,7 +22,12 @@ void __nv50_ram_put(struct nvkm_ram *, struct nvkm_mem *); int gf100_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, struct nvkm_ram **); int gf100_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *, - u32, struct nvkm_ram *); + struct nvkm_ram *); +u32 gf100_ram_probe_fbp(const struct nvkm_ram_func *, + struct nvkm_device *, int, int *); +u32 gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, + struct nvkm_device *, int, int *); +u32 gf100_ram_probe_fbpa_amount(struct nvkm_device *, int); int gf100_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **); void gf100_ram_put(struct nvkm_ram *, struct nvkm_mem **); int gf100_ram_init(struct nvkm_ram *); @@ -30,14 +35,23 @@ int gf100_ram_calc(struct nvkm_ram *, u32); int gf100_ram_prog(struct nvkm_ram *); void gf100_ram_tidy(struct nvkm_ram *); +u32 gf108_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, + struct nvkm_device *, int, int *); + int gk104_ram_new_(const struct nvkm_ram_func *, struct nvkm_fb *, - struct nvkm_ram **, u32); + struct nvkm_ram **); void *gk104_ram_dtor(struct nvkm_ram *); int gk104_ram_init(struct nvkm_ram *); int gk104_ram_calc(struct nvkm_ram *, u32); int gk104_ram_prog(struct nvkm_ram *); void gk104_ram_tidy(struct nvkm_ram *); +u32 gm107_ram_probe_fbp(const struct nvkm_ram_func *, + struct nvkm_device *, int, int *); + +u32 gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *, u32, + struct nvkm_device *, int, int *); + /* RAM type-specific MR calculation routines */ int nvkm_sddr2_calc(struct nvkm_ram *); int nvkm_sddr3_calc(struct nvkm_ram *); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c index ef28514af56f..53c32fc694e9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c @@ -543,67 +543,96 @@ gf100_ram_init(struct nvkm_ram *base) return 0; } +u32 +gf100_ram_probe_fbpa_amount(struct nvkm_device *device, int fbpa) +{ + return nvkm_rd32(device, 0x11020c + (fbpa * 0x1000)); +} + +u32 +gf100_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao, + struct nvkm_device *device, int fbp, int *pltcs) +{ + if (!(fbpao & BIT(fbp))) { + *pltcs = 1; + return func->probe_fbpa_amount(device, fbp); + } + return 0; +} + +u32 +gf100_ram_probe_fbp(const struct nvkm_ram_func *func, + struct nvkm_device *device, int fbp, int *pltcs) +{ + u32 fbpao = nvkm_rd32(device, 0x022554); + return func->probe_fbp_amount(func, fbpao, device, fbp, pltcs); +} + int gf100_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb, - u32 maskaddr, struct nvkm_ram *ram) + struct nvkm_ram *ram) { struct nvkm_subdev *subdev = &fb->subdev; struct nvkm_device *device = subdev->device; struct nvkm_bios *bios = device->bios; const u32 rsvd_head = ( 256 * 1024); /* vga memory */ const u32 rsvd_tail = (1024 * 1024); /* vbios etc */ - u32 parts = nvkm_rd32(device, 0x022438); - u32 pmask = nvkm_rd32(device, maskaddr); - u64 bsize = (u64)nvkm_rd32(device, 0x10f20c) << 20; - u64 psize, size = 0; enum nvkm_ram_type type = nvkm_fb_bios_memtype(bios); - bool uniform = true; - int ret, i; + u32 fbps = nvkm_rd32(device, 0x022438); + u64 total = 0, lcomm = ~0, lower, ubase, usize; + int ret, fbp, ltcs, ltcn = 0; - nvkm_debug(subdev, "100800: %08x\n", nvkm_rd32(device, 0x100800)); - nvkm_debug(subdev, "parts %08x mask %08x\n", parts, pmask); - - /* read amount of vram attached to each memory controller */ - for (i = 0; i < parts; i++) { - if (pmask & (1 << i)) - continue; - - psize = (u64)nvkm_rd32(device, 0x11020c + (i * 0x1000)) << 20; - if (psize != bsize) { - if (psize < bsize) - bsize = psize; - uniform = false; + nvkm_debug(subdev, "%d FBP(s)\n", fbps); + for (fbp = 0; fbp < fbps; fbp++) { + u32 size = func->probe_fbp(func, device, fbp, <cs); + if (size) { + nvkm_debug(subdev, "FBP %d: %4d MiB, %d LTC(s)\n", + fbp, size, ltcs); + lcomm = min(lcomm, (u64)(size / ltcs) << 20); + total += size << 20; + ltcn += ltcs; + } else { + nvkm_debug(subdev, "FBP %d: disabled\n", fbp); } - - nvkm_debug(subdev, "%d: %d MiB\n", i, (u32)(psize >> 20)); - size += psize; } - ret = nvkm_ram_ctor(func, fb, type, size, 0, ram); + lower = lcomm * ltcn; + ubase = lcomm + func->upper; + usize = total - lower; + + nvkm_debug(subdev, "Lower: %4lld MiB @ %010llx\n", lower >> 20, 0ULL); + nvkm_debug(subdev, "Upper: %4lld MiB @ %010llx\n", usize >> 20, ubase); + nvkm_debug(subdev, "Total: %4lld MiB\n", total >> 20); + + ret = nvkm_ram_ctor(func, fb, type, total, 0, ram); if (ret) return ret; nvkm_mm_fini(&ram->vram); - /* if all controllers have the same amount attached, there's no holes */ - if (uniform) { + /* Some GPUs are in what's known as a "mixed memory" configuration. + * + * This is either where some FBPs have more memory than the others, + * or where LTCs have been disabled on a FBP. + */ + if (lower != total) { + /* The common memory amount is addressed normally. */ ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, - (size - rsvd_head - rsvd_tail) >> - NVKM_RAM_MM_SHIFT, 1); - if (ret) - return ret; - } else { - /* otherwise, address lowest common amount from 0GiB */ - ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, - ((bsize * parts) - rsvd_head) >> - NVKM_RAM_MM_SHIFT, 1); + (lower - rsvd_head) >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; - /* and the rest starting from (8GiB + common_size) */ - ret = nvkm_mm_init(&ram->vram, (0x0200000000ULL + bsize) >> - NVKM_RAM_MM_SHIFT, - (size - (bsize * parts) - rsvd_tail) >> + /* And the rest is much higher in the physical address + * space, and may not be usable for certain operations. + */ + ret = nvkm_mm_init(&ram->vram, ubase >> NVKM_RAM_MM_SHIFT, + (usize - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); + if (ret) + return ret; + } else { + /* GPUs without mixed-memory are a lot nicer... */ + ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, + (total - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT, 1); if (ret) return ret; @@ -626,7 +655,7 @@ gf100_ram_new_(const struct nvkm_ram_func *func, return -ENOMEM; *pram = &ram->base; - ret = gf100_ram_ctor(func, fb, 0x022554, &ram->base); + ret = gf100_ram_ctor(func, fb, &ram->base); if (ret) return ret; @@ -705,6 +734,10 @@ gf100_ram_new_(const struct nvkm_ram_func *func, static const struct nvkm_ram_func gf100_ram = { + .upper = 0x0200000000, + .probe_fbp = gf100_ram_probe_fbp, + .probe_fbp_amount = gf100_ram_probe_fbp_amount, + .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .init = gf100_ram_init, .get = gf100_ram_get, .put = gf100_ram_put, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c index ddab0db3a7a5..985ec64cf369 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf108.c @@ -23,8 +23,30 @@ */ #include "ram.h" +u32 +gf108_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao, + struct nvkm_device *device, int fbp, int *pltcs) +{ + u32 fbpt = nvkm_rd32(device, 0x022438); + u32 fbpat = nvkm_rd32(device, 0x02243c); + u32 fbpas = fbpat / fbpt; + u32 fbpa = fbp * fbpas; + u32 size = 0; + while (fbpas--) { + if (!(fbpao & BIT(fbpa))) + size += func->probe_fbpa_amount(device, fbpa); + fbpa++; + } + *pltcs = 1; + return size; +} + static const struct nvkm_ram_func gf108_ram = { + .upper = 0x0200000000, + .probe_fbp = gf100_ram_probe_fbp, + .probe_fbp_amount = gf108_ram_probe_fbp_amount, + .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .init = gf100_ram_init, .get = gf100_ram_get, .put = gf100_ram_put, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c index 2575d6c370a8..f6c00791722c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c @@ -1524,7 +1524,7 @@ gk104_ram_dtor(struct nvkm_ram *base) int gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb, - struct nvkm_ram **pram, u32 maskaddr) + struct nvkm_ram **pram) { struct nvkm_subdev *subdev = &fb->subdev; struct nvkm_device *device = subdev->device; @@ -1539,7 +1539,7 @@ gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb, return -ENOMEM; *pram = &ram->base; - ret = gf100_ram_ctor(func, fb, maskaddr, &ram->base); + ret = gf100_ram_ctor(func, fb, &ram->base); if (ret) return ret; @@ -1703,6 +1703,10 @@ gk104_ram_new_(const struct nvkm_ram_func *func, struct nvkm_fb *fb, static const struct nvkm_ram_func gk104_ram = { + .upper = 0x0200000000, + .probe_fbp = gf100_ram_probe_fbp, + .probe_fbp_amount = gf108_ram_probe_fbp_amount, + .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, .get = gf100_ram_get, @@ -1715,5 +1719,5 @@ gk104_ram = { int gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { - return gk104_ram_new_(&gk104_ram, fb, pram, 0x022554); + return gk104_ram_new_(&gk104_ram, fb, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c index af5a97a9061e..3f0b56347291 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c @@ -23,8 +23,20 @@ */ #include "ram.h" +u32 +gm107_ram_probe_fbp(const struct nvkm_ram_func *func, + struct nvkm_device *device, int fbp, int *pltcs) +{ + u32 fbpao = nvkm_rd32(device, 0x021c14); + return func->probe_fbp_amount(func, fbpao, device, fbp, pltcs); +} + static const struct nvkm_ram_func gm107_ram = { + .upper = 0x1000000000, + .probe_fbp = gm107_ram_probe_fbp, + .probe_fbp_amount = gf108_ram_probe_fbp_amount, + .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, .get = gf100_ram_get, @@ -37,5 +49,5 @@ gm107_ram = { int gm107_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { - return gk104_ram_new_(&gm107_ram, fb, pram, 0x021c14); + return gk104_ram_new_(&gm107_ram, fb, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c index 04ae81f0db10..fd8facf90476 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm200.c @@ -23,8 +23,35 @@ */ #include "ram.h" +u32 +gm200_ram_probe_fbp_amount(const struct nvkm_ram_func *func, u32 fbpao, + struct nvkm_device *device, int fbp, int *pltcs) +{ + u32 ltcs = nvkm_rd32(device, 0x022450); + u32 fbpas = nvkm_rd32(device, 0x022458); + u32 fbpa = fbp * fbpas; + u32 size = 0; + if (!(nvkm_rd32(device, 0x021d38) & BIT(fbp))) { + u32 ltco = nvkm_rd32(device, 0x021d70 + (fbp * 4)); + u32 ltcm = ~ltco & ((1 << ltcs) - 1); + + while (fbpas--) { + if (!(fbpao & (1 << fbpa))) + size += func->probe_fbpa_amount(device, fbpa); + fbpa++; + } + + *pltcs = hweight32(ltcm); + } + return size; +} + static const struct nvkm_ram_func gm200_ram = { + .upper = 0x1000000000, + .probe_fbp = gm107_ram_probe_fbp, + .probe_fbp_amount = gm200_ram_probe_fbp_amount, + .probe_fbpa_amount = gf100_ram_probe_fbpa_amount, .dtor = gk104_ram_dtor, .init = gk104_ram_init, .get = gf100_ram_get, @@ -37,5 +64,5 @@ gm200_ram = { int gm200_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { - return gk104_ram_new_(&gm200_ram, fb, pram, 0x021c14); + return gk104_ram_new_(&gm200_ram, fb, pram); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c index 405faabe8dcd..cac70047ad5a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c @@ -76,8 +76,18 @@ gp100_ram_init(struct nvkm_ram *ram) return 0; } +static u32 +gp100_ram_probe_fbpa(struct nvkm_device *device, int fbpa) +{ + return nvkm_rd32(device, 0x90020c + (fbpa * 0x4000)); +} + static const struct nvkm_ram_func -gp100_ram_func = { +gp100_ram = { + .upper = 0x1000000000, + .probe_fbp = gm107_ram_probe_fbp, + .probe_fbp_amount = gm200_ram_probe_fbp_amount, + .probe_fbpa_amount = gp100_ram_probe_fbpa, .init = gp100_ram_init, .get = gf100_ram_get, .put = gf100_ram_put, @@ -87,60 +97,10 @@ int gp100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram) { struct nvkm_ram *ram; - struct nvkm_subdev *subdev = &fb->subdev; - struct nvkm_device *device = subdev->device; - enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios); - const u32 rsvd_head = ( 256 * 1024); /* vga memory */ - const u32 rsvd_tail = (1024 * 1024); /* vbios etc */ - u32 fbpa_num = nvkm_rd32(device, 0x02243c), fbpa; - u32 fbio_opt = nvkm_rd32(device, 0x021c14); - u64 part, size = 0, comm = ~0ULL; - bool mixed = false; - int ret; - nvkm_debug(subdev, "02243c: %08x\n", fbpa_num); - nvkm_debug(subdev, "021c14: %08x\n", fbio_opt); - for (fbpa = 0; fbpa < fbpa_num; fbpa++) { - if (!(fbio_opt & (1 << fbpa))) { - part = nvkm_rd32(device, 0x90020c + (fbpa * 0x4000)); - nvkm_debug(subdev, "fbpa %02x: %lld MiB\n", fbpa, part); - part = part << 20; - if (part != comm) { - if (comm != ~0ULL) - mixed = true; - comm = min(comm, part); - } - size = size + part; - } - } + if (!(ram = *pram = kzalloc(sizeof(*ram), GFP_KERNEL))) + return -ENOMEM; - ret = nvkm_ram_new_(&gp100_ram_func, fb, type, size, 0, &ram); - *pram = ram; - if (ret) - return ret; + return gf100_ram_ctor(&gp100_ram, fb, ram); - nvkm_mm_fini(&ram->vram); - - if (mixed) { - ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, - ((comm * fbpa_num) - rsvd_head) >> - NVKM_RAM_MM_SHIFT, 1); - if (ret) - return ret; - - ret = nvkm_mm_init(&ram->vram, (0x1000000000ULL + comm) >> - NVKM_RAM_MM_SHIFT, - (size - (comm * fbpa_num) - rsvd_tail) >> - NVKM_RAM_MM_SHIFT, 1); - if (ret) - return ret; - } else { - ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT, - (size - rsvd_head - rsvd_tail) >> - NVKM_RAM_MM_SHIFT, 1); - if (ret) - return ret; - } - - return 0; } From 2c33b5410de595d8eaa64f6ac47db96ce4008503 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 6 Mar 2017 15:03:19 +0000 Subject: [PATCH 0709/1299] drm/i915: No need to save/restore irq status in __i915_request_irq_complete It is always called from thread context. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 56e569f536ec..937534897d21 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4075,7 +4075,6 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) if (engine->irq_seqno_barrier && test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; /* The ordering of irq_posted versus applying the barrier * is crucial. The clearing of the current irq_posted must @@ -4097,7 +4096,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the seqno before we believe it coherent since they see * irq_posted == false but we are still running). */ - spin_lock_irqsave(&b->irq_lock, flags); + spin_lock_irq(&b->irq_lock); if (b->irq_wait && b->irq_wait->tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will @@ -4106,7 +4105,7 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * ourself. */ wake_up_process(b->irq_wait->tsk); - spin_unlock_irqrestore(&b->irq_lock, flags); + spin_unlock_irq(&b->irq_lock); if (__i915_gem_request_completed(req, seqno)) return true; From a9e64931eec57b17d1e7c71c93f8aea57a3435b2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 6 Mar 2017 15:03:20 +0000 Subject: [PATCH 0710/1299] drm/i915: No need to save/restore irq status in intel_breadcrumbs_fake_irq Timer callback is a known context so it is correct to always disable interrupts. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index ba73dc5b0328..1500a1d0e8aa 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -109,7 +109,6 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; /* * The timer persists in case we cannot enable interrupts, @@ -119,10 +118,10 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * coherent seqno check. */ - spin_lock_irqsave(&b->irq_lock, flags); + spin_lock_irq(&b->irq_lock); if (!__intel_breadcrumbs_wakeup(b)) __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irqrestore(&b->irq_lock, flags); + spin_unlock_irq(&b->irq_lock); if (!b->irq_armed) return; From cdc3a45390341e579421957257e3607c017a0785 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 6 Mar 2017 15:03:21 +0000 Subject: [PATCH 0711/1299] drm/i915: No need to save/restore irq status in intel_engine_wakeup It is called from either the process or timer context so it is correct to always disable interrupts. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170306150321.29024-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 1500a1d0e8aa..f96d0f368f59 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -46,12 +46,11 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - unsigned long flags; unsigned int result; - spin_lock_irqsave(&b->irq_lock, flags); + spin_lock_irq(&b->irq_lock); result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irqrestore(&b->irq_lock, flags); + spin_unlock_irq(&b->irq_lock); return result; } From 1fbdbcea80056acfc8c8506594744f5ec52208c1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 17:05:57 -0800 Subject: [PATCH 0712/1299] avr32: Fix build error caused by include file reshuffling Various avr32 builds fail: arch/avr32/oprofile/backtrace.c:58: error: dereferencing pointer to incomplete type arch/avr32/oprofile/backtrace.c:60: error: implicit declaration of function 'user_mode' Signed-off-by: Guenter Roeck Acked-by: Hans-Christian Noren Egtvedt Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert Richter Cc: Thomas Gleixner Cc: oprofile-list@lists.sf.net Fixes: f780d89a0e82 ("sched/headers: Remove from ...") Link: http://lkml.kernel.org/r/1488762357-4500-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- arch/avr32/oprofile/backtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/avr32/oprofile/backtrace.c b/arch/avr32/oprofile/backtrace.c index 75d9ad6f99cf..29cf2f191bfd 100644 --- a/arch/avr32/oprofile/backtrace.c +++ b/arch/avr32/oprofile/backtrace.c @@ -14,7 +14,7 @@ */ #include -#include +#include #include /* The first two words of each frame on the stack look like this if we have From 80aa1a54f054a1c71cc88e0cad6cfa0d20a10f23 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 10:27:14 -0800 Subject: [PATCH 0713/1299] h8300: Fix build breakage caused by header file changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following h8300 build failures: arch/h8300/kernel/ptrace_h.c: In function ‘trace_trap’: arch/h8300/kernel/ptrace_h.c:253:3: error: implicit declaration of function ‘force_sig’ Signed-off-by: Guenter Roeck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yoshinori Sato Cc: uclinux-h8-devel@lists.sourceforge.jp Fixes: c3edc4010e9d ("sched/headers: Move task_struct::signal and ...") Link: http://lkml.kernel.org/r/1488738434-3504-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- arch/h8300/kernel/ptrace_h.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/h8300/kernel/ptrace_h.c b/arch/h8300/kernel/ptrace_h.c index fe3b5673baba..f5ff3b794c85 100644 --- a/arch/h8300/kernel/ptrace_h.c +++ b/arch/h8300/kernel/ptrace_h.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #define BREAKINST 0x5730 /* trapa #3 */ From bb35e4515411396219431fa235bf21bf9c2794e9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 5 Mar 2017 17:13:31 -0800 Subject: [PATCH 0714/1299] drivers/char/nwbutton: Fix build breakage caused by include file reshuffling Fix: drivers/char/nwbutton.c: In function 'button_sequence_finished': drivers/char/nwbutton.c:134:3: error: implicit declaration of function 'kill_cad_pid' The declaration has been moved from one include file to another. Signed-off-by: Guenter Roeck Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: c3edc4010e9d102 ("sched/headers: Move task_struct::signal and ...") Link: http://lkml.kernel.org/r/1488762811-9022-1-git-send-email-linux@roeck-us.net Signed-off-by: Ingo Molnar --- drivers/char/nwbutton.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index a5b1eb276c0b..e6d0d271c58c 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include From 5c51f4ae84df0f9df33ac08aa5be50061a8b4242 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 2 Mar 2017 16:57:23 -0600 Subject: [PATCH 0715/1299] objtool: Fix another GCC jump table detection issue Arnd Bergmann reported a (false positive) objtool warning: drivers/infiniband/sw/rxe/rxe_resp.o: warning: objtool: rxe_responder()+0xfe: sibling call from callable instruction with changed frame pointer The issue is in find_switch_table(). It tries to find a switch statement's jump table by walking backwards from an indirect jump instruction, looking for a relocation to the .rodata section. In this case it stopped walking prematurely: the first .rodata relocation it encountered was for a variable (resp_state_name) instead of a jump table, so it just assumed there wasn't a jump table. The fix is to ignore any .rodata relocation which refers to an ELF object symbol. This works because the jump tables are anonymous and have no symbols associated with them. Reported-by: Arnd Bergmann Tested-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 3732710ff6f2 ("objtool: Improve rare switch jump table pattern detection") Link: http://lkml.kernel.org/r/20170302225723.3ndbsnl4hkqbne7a@treble Signed-off-by: Ingo Molnar --- tools/objtool/builtin-check.c | 15 ++++++++++++--- tools/objtool/elf.c | 12 ++++++++++++ tools/objtool/elf.h | 1 + 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 4cfdbb5b6967..066086dd59a8 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -805,11 +805,20 @@ static struct rela *find_switch_table(struct objtool_file *file, insn->jump_dest->offset > orig_insn->offset)) break; + /* look for a relocation which references .rodata */ text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) - return find_rela_by_dest(file->rodata, - text_rela->addend); + if (!text_rela || text_rela->sym != file->rodata->sym) + continue; + + /* + * Make sure the .rodata address isn't associated with a + * symbol. gcc jump tables are anonymous data. + */ + if (find_symbol_containing(file->rodata, text_rela->addend)) + continue; + + return find_rela_by_dest(file->rodata, text_rela->addend); } return NULL; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 0d7983ac63ef..d897702ce742 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -85,6 +85,18 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) +{ + struct symbol *sym; + + list_for_each_entry(sym, &sec->symbol_list, list) + if (sym->type != STT_SECTION && + offset >= sym->offset && offset < sym->offset + sym->len) + return sym; + + return NULL; +} + struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len) { diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index aa1ff6596684..731973e1a3f5 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -79,6 +79,7 @@ struct elf { struct elf *elf_open(const char *name); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, unsigned int len); From aebfd1d37194e00d4c417e7be97efeb736cd9c04 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 22 Feb 2017 11:55:36 -0800 Subject: [PATCH 0716/1299] drm/i915/: DMC 1.04 for Geminilake There is a nre version of DMC available for GLK. The release notes mentions: This FW has the fix to remove the hang conditions due to some debug related issues. Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487793336-31857-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 02a90257161b..36832257cc9b 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -34,8 +34,8 @@ * low-power state and comes back to normal. */ -#define I915_CSR_GLK "i915/glk_dmc_ver1_03.bin" -#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 3) +#define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" +#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); From fa3aa7a54fe6d3abf128f13cd4bbd40eaa48fed2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 7 Mar 2017 10:55:34 +0100 Subject: [PATCH 0717/1299] jiffies: Revert bogus conversion of NSEC_PER_SEC to TICK_NSEC commit 93825f2ec736 converted NSEC_PER_SEC to TICK_NSEC because the author confused NSEC_PER_JIFFY with NSEC_PER_SEC. As a result, the calculation of refined jiffies got broken, triggering lockups. Fixes: 93825f2ec736 ("jiffies: Reuse TICK_NSEC instead of NSEC_PER_JIFFY") Reported-and-tested-by: Meelis Roos Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1488880534-3777-1-git-send-email-fweisbec@gmail.com Signed-off-by: Thomas Gleixner --- kernel/time/jiffies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7906b3f0c41a..497719127bf9 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -125,7 +125,7 @@ int register_refined_jiffies(long cycles_per_second) shift_hz += cycles_per_tick/2; do_div(shift_hz, cycles_per_tick); /* Calculate nsec_per_tick using shift_hz */ - nsec_per_tick = (u64)TICK_NSEC << 8; + nsec_per_tick = (u64)NSEC_PER_SEC << 8; nsec_per_tick += (u32)shift_hz/2; do_div(nsec_per_tick, (u32)shift_hz); From d2fa80a50a366bc87c8c0bd73d531dec974d20ae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 15:46:44 +0000 Subject: [PATCH 0718/1299] drm/i915: Avoid clearing the base drm_crtc_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To prevent having to preserve the drm_crtc_state as we clear the intel_crtc_state, only memset our extended state. Fixes: drivers/gpu/drm/i915/intel_display.c: In function ‘clear_intel_crtc_state’: drivers/gpu/drm/i915/intel_display.c:11301:1: error: the frame size of 1056 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] v2: Add a comment and BUILD_BUG_ON to explain the memset() Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170303154644.6709-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b2b0661ccbed..e44160f35afd 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11272,7 +11272,6 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - struct drm_crtc_state tmp_state; struct intel_crtc_scaler_state scaler_state; struct intel_dpll_hw_state dpll_hw_state; struct intel_shared_dpll *shared_dpll; @@ -11284,7 +11283,6 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) * fixed, so that the crtc_state can be safely duplicated. For now, * only fields that are know to not cause problems are preserved. */ - tmp_state = crtc_state->base; scaler_state = crtc_state->scaler_state; shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; @@ -11292,9 +11290,11 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) wm_state = crtc_state->wm; - memset(crtc_state, 0, sizeof *crtc_state); + /* Keep base drm_crtc_state intact, only clear our extended struct */ + BUILD_BUG_ON(offsetof(struct intel_crtc_state, base)); + memset(&crtc_state->base + 1, 0, + sizeof(*crtc_state) - sizeof(crtc_state->base)); - crtc_state->base = tmp_state; crtc_state->scaler_state = scaler_state; crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; From 7967ef6a02c7e6a85d780ab20b38ece4f210243a Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 6 Mar 2017 16:31:24 +0200 Subject: [PATCH 0719/1299] drm/i915/dsi: remove support for more than one panel driver Fact is, there are no other panel drivers except the VBT based one. Simplify the code and maintenance. No functional changes. Reviewed-by: Hans de Goede Reviewed-by: Bob Paauwe Reviewed-by: Daniel Vetter Cc: Madhav Chauhan Cc: Hans de Goede Cc: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/7dfd041dd25e8e930150ede09589bb232f6248d5.1488810382.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 19 +------------------ drivers/gpu/drm/i915/intel_dsi.h | 2 +- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 2 +- 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 323fc097c3ee..316a0d6511fb 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -36,16 +36,6 @@ #include "intel_drv.h" #include "intel_dsi.h" -static const struct { - u16 panel_id; - struct drm_panel * (*init)(struct intel_dsi *intel_dsi, u16 panel_id); -} intel_dsi_drivers[] = { - { - .panel_id = MIPI_DSI_GENERIC_PANEL_ID, - .init = vbt_panel_init, - }, -}; - /* return pixels in terms of txbyteclkhs */ static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count, u16 burst_mode_ratio) @@ -1709,7 +1699,6 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) struct drm_connector *connector; struct drm_display_mode *scan, *fixed_mode = NULL; enum port port; - unsigned int i; DRM_DEBUG_KMS("\n"); @@ -1816,13 +1805,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_dsi->dsi_hosts[port] = host; } - for (i = 0; i < ARRAY_SIZE(intel_dsi_drivers); i++) { - intel_dsi->panel = intel_dsi_drivers[i].init(intel_dsi, - intel_dsi_drivers[i].panel_id); - if (intel_dsi->panel) - break; - } - + intel_dsi->panel = intel_dsi_vbt_init(intel_dsi, MIPI_DSI_GENERIC_PANEL_ID); if (!intel_dsi->panel) { DRM_DEBUG_KMS("no device found\n"); goto err; diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index 548649158abd..ac2f9e481f9d 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -146,7 +146,7 @@ u32 intel_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); -struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id); +struct drm_panel *intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); #endif /* _INTEL_DSI_H */ diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index ab922b6eb36a..67d5092fd830 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -518,7 +518,7 @@ static const struct drm_panel_funcs vbt_panel_funcs = { .get_modes = vbt_panel_get_modes, }; -struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) +struct drm_panel *intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) { struct drm_device *dev = intel_dsi->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); From b9e56754ec797da839cbf103222084228a67c65e Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 6 Mar 2017 16:31:25 +0200 Subject: [PATCH 0720/1299] drm/i915/dsi: call vbt_panel_get_modes directly instead of via drm_panel Commit 18a00095a5f3 ("drm/i915/dsi: Make intel_dsi_enable/disable directly exec VBT sequences") started calling the VBT sequence functions directly instead of using the drm_panel hooks. Remove the last drm_panel hook by calling vbt_panel_get_modes() directly. No functional changes. Reviewed-by: Hans de Goede Reviewed-by: Bob Paauwe Reviewed-by: Daniel Vetter Cc: Madhav Chauhan Cc: Hans de Goede Cc: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/63d0d41f29583507f5968b42b5f52e6574a1f245.1488810382.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 2 +- drivers/gpu/drm/i915/intel_dsi.h | 1 + drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 7 +------ 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 316a0d6511fb..5a38112c8619 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1843,7 +1843,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) drm_panel_attach(intel_dsi->panel, connector); mutex_lock(&dev->mode_config.mutex); - drm_panel_get_modes(intel_dsi->panel); + intel_dsi_vbt_get_modes(intel_dsi->panel); list_for_each_entry(scan, &connector->probed_modes, head) { if ((scan->type & DRM_MODE_TYPE_PREFERRED)) { fixed_mode = drm_mode_duplicate(dev, scan); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index ac2f9e481f9d..bd3ec02daecb 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -147,6 +147,7 @@ void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); struct drm_panel *intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); +int intel_dsi_vbt_get_modes(struct drm_panel *panel); enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); #endif /* _INTEL_DSI_H */ diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 67d5092fd830..efe4845c56ac 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -492,7 +492,7 @@ void intel_dsi_exec_vbt_sequence(struct intel_dsi *intel_dsi, } } -static int vbt_panel_get_modes(struct drm_panel *panel) +int intel_dsi_vbt_get_modes(struct drm_panel *panel) { struct vbt_panel *vbt_panel = to_vbt_panel(panel); struct intel_dsi *intel_dsi = vbt_panel->intel_dsi; @@ -514,10 +514,6 @@ static int vbt_panel_get_modes(struct drm_panel *panel) return 1; } -static const struct drm_panel_funcs vbt_panel_funcs = { - .get_modes = vbt_panel_get_modes, -}; - struct drm_panel *intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) { struct drm_device *dev = intel_dsi->base.base.dev; @@ -816,7 +812,6 @@ struct drm_panel *intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) vbt_panel->intel_dsi = intel_dsi; drm_panel_init(&vbt_panel->panel); - vbt_panel->panel.funcs = &vbt_panel_funcs; drm_panel_add(&vbt_panel->panel); /* a regular driver would get the device in probe */ From 3f751d6517d2108acd292c028f350a4494945538 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 6 Mar 2017 16:31:26 +0200 Subject: [PATCH 0721/1299] drm/i915/dsi: stop using the drm_panel framework completely Now that we've stopped using the drm_panel hooks, there aren't any benefits left with using the drm_panel framework. Remove the rest of the drm_panel use. No functional changes. Reviewed-by: Hans de Goede Reviewed-by: Bob Paauwe Reviewed-by: Daniel Vetter Cc: Madhav Chauhan Cc: Hans de Goede Cc: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/6602e36641451952065092401bd6e6cfbe93e208.1488810382.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 14 ++------ drivers/gpu/drm/i915/intel_dsi.h | 5 ++- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 39 ++++------------------ 3 files changed, 11 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 5a38112c8619..b9ba2d55e4e1 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -1642,12 +1641,6 @@ static void intel_dsi_encoder_destroy(struct drm_encoder *encoder) { struct intel_dsi *intel_dsi = enc_to_intel_dsi(encoder); - if (intel_dsi->panel) { - drm_panel_detach(intel_dsi->panel); - /* XXX: Logically this call belongs in the panel driver. */ - drm_panel_remove(intel_dsi->panel); - } - /* dispose of the gpios */ if (intel_dsi->gpio_panel) gpiod_put(intel_dsi->gpio_panel); @@ -1805,8 +1798,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_dsi->dsi_hosts[port] = host; } - intel_dsi->panel = intel_dsi_vbt_init(intel_dsi, MIPI_DSI_GENERIC_PANEL_ID); - if (!intel_dsi->panel) { + if (!intel_dsi_vbt_init(intel_dsi, MIPI_DSI_GENERIC_PANEL_ID)) { DRM_DEBUG_KMS("no device found\n"); goto err; } @@ -1840,10 +1832,8 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); - drm_panel_attach(intel_dsi->panel, connector); - mutex_lock(&dev->mode_config.mutex); - intel_dsi_vbt_get_modes(intel_dsi->panel); + intel_dsi_vbt_get_modes(intel_dsi); list_for_each_entry(scan, &connector->probed_modes, head) { if ((scan->type & DRM_MODE_TYPE_PREFERRED)) { fixed_mode = drm_mode_duplicate(dev, scan); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index bd3ec02daecb..0a3f133e8888 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -39,7 +39,6 @@ struct intel_dsi_host; struct intel_dsi { struct intel_encoder base; - struct drm_panel *panel; struct intel_dsi_host *dsi_hosts[I915_MAX_PORTS]; /* GPIO Desc for CRC based Panel control */ @@ -146,8 +145,8 @@ u32 intel_dsi_get_pclk(struct intel_encoder *encoder, int pipe_bpp, void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port); -struct drm_panel *intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); -int intel_dsi_vbt_get_modes(struct drm_panel *panel); +bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id); +int intel_dsi_vbt_get_modes(struct intel_dsi *intel_dsi); enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt); #endif /* _INTEL_DSI_H */ diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index efe4845c56ac..af1783f94d23 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include