summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/drm.tmpl7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c308
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c886
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h193
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c168
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h294
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c48
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h290
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c18
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h21
-rw-r--r--drivers/gpu/drm/drm_mm.c4
-rw-r--r--drivers/gpu/drm/i2c/adv7511.c2
-rw-r--r--drivers/gpu/drm/i2c/tda998x_drv.c3
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c81
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c24
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h98
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c642
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/i915_gem_debug.c92
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c10
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c9
-rw-r--r--drivers/gpu/drm/i915/i915_gem_userptr.c11
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c19
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c47
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h32
-rw-r--r--drivers/gpu/drm/i915/intel_bios.c150
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c15
-rw-r--r--drivers/gpu/drm/i915/intel_csr.c53
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c120
-rw-r--r--drivers/gpu/drm/i915/intel_display.c612
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c229
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h25
-rw-r--r--drivers/gpu/drm/i915/intel_dsi.c8
-rw-r--r--drivers/gpu/drm/i915/intel_dsi_panel_vbt.c4
-rw-r--r--drivers/gpu/drm/i915/intel_dsi_pll.c80
-rw-r--r--drivers/gpu/drm/i915/intel_fbdev.c28
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c116
-rw-r--r--drivers/gpu/drm/i915/intel_i2c.c21
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c36
-rw-r--r--drivers/gpu/drm/i915/intel_overlay.c2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c252
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c50
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c104
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c74
-rw-r--r--drivers/gpu/drm/i915/intel_sideband.c18
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c70
-rw-r--r--drivers/gpu/drm/radeon/atombios_dp.c8
-rw-r--r--drivers/gpu/drm/radeon/cik.c25
-rw-r--r--drivers/gpu/drm/radeon/cik_reg.h56
-rw-r--r--drivers/gpu/drm/radeon/cikd.h9
-rw-r--r--drivers/gpu/drm/radeon/evergreen.c69
-rw-r--r--drivers/gpu/drm/radeon/ni.c102
-rw-r--r--drivers/gpu/drm/radeon/nid.h7
-rw-r--r--drivers/gpu/drm/radeon/r100.c22
-rw-r--r--drivers/gpu/drm/radeon/r300.c25
-rw-r--r--drivers/gpu/drm/radeon/r600.c47
-rw-r--r--drivers/gpu/drm/radeon/radeon.h242
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.c21
-rw-r--r--drivers/gpu/drm/radeon/radeon_asic.h6
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.c18
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c2
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_irq_kms.c10
-rw-r--r--drivers/gpu/drm/radeon/radeon_kfd.c151
-rw-r--r--drivers/gpu/drm/radeon/radeon_kms.c3
-rw-r--r--drivers/gpu/drm/radeon/radeon_mode.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_vce.c28
-rw-r--r--drivers/gpu/drm/radeon/radeon_vm.c53
-rw-r--r--drivers/gpu/drm/radeon/si.c167
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c109
-rw-r--r--drivers/gpu/drm/radeon/sid.h29
-rw-r--r--drivers/gpu/drm/radeon/trinity_dpm.c83
-rw-r--r--drivers/gpu/drm/radeon/vce_v1_0.c197
-rw-r--r--drivers/gpu/drm/radeon/vce_v2_0.c16
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_crtc.c61
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_crtc.h14
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_drv.h6
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_group.c6
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_group.h8
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_kms.c105
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_plane.c71
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_plane.h21
-rw-r--r--drivers/gpu/drm/ttm/ttm_page_alloc_dma.c9
-rw-r--r--include/drm/i915_pciids.h6
-rw-r--r--include/uapi/drm/i915_drm.h8
-rw-r--r--include/uapi/drm/radeon_drm.h1
-rw-r--r--include/uapi/linux/kfd_ioctl.h43
96 files changed, 5697 insertions, 1653 deletions
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 5d9d851d8623..109fde8b4a28 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -4165,6 +4165,12 @@ int num_ioctls;</synopsis>
</tgroup>
</table>
</sect2>
+
+ <sect2>
+ <title>CSR firmware support for DMC</title>
+!Pdrivers/gpu/drm/i915/intel_csr.c csr support for dmc
+!Idrivers/gpu/drm/i915/intel_csr.c
+ </sect2>
</sect1>
<sect1>
@@ -4216,7 +4222,6 @@ int num_ioctls;</synopsis>
!Idrivers/gpu/drm/i915/i915_gem_shrinker.c
</sect2>
</sect1>
-
<sect1>
<title> Tracing </title>
<para>
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 652d25478fd5..28551153ec6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -12,6 +12,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_kernel_queue_vi.o kfd_packet_manager.o \
kfd_process_queue_manager.o kfd_device_queue_manager.o \
kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
- kfd_interrupt.o kfd_events.o cik_event_interrupt.o
+ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
+ kfd_dbgdev.o kfd_dbgmgr.o
obj-$(CONFIG_HSA_AMD) += amdkfd.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b2c6109bd7af..96c904b3acb7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -35,6 +35,7 @@
#include <asm/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
+#include "kfd_dbgmgr.h"
static long kfd_ioctl(struct file *, unsigned int, unsigned long);
static int kfd_open(struct inode *, struct file *);
@@ -432,6 +433,301 @@ out:
return err;
}
+static int kfd_ioctl_dbg_register(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_register_args *args = data;
+ struct kfd_dev *dev;
+ struct kfd_dbgmgr *dbgmgr_ptr;
+ struct kfd_process_device *pdd;
+ bool create_ok;
+ long status = 0;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+ mutex_lock(&p->mutex);
+
+ /*
+ * make sure that we have pdd, if this the first queue created for
+ * this process
+ */
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ mutex_unlock(&p->mutex);
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+ return PTR_ERR(pdd);
+ }
+
+ if (dev->dbgmgr == NULL) {
+ /* In case of a legal call, we have no dbgmgr yet */
+ create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
+ if (create_ok) {
+ status = kfd_dbgmgr_register(dbgmgr_ptr, p);
+ if (status != 0)
+ kfd_dbgmgr_destroy(dbgmgr_ptr);
+ else
+ dev->dbgmgr = dbgmgr_ptr;
+ }
+ } else {
+ pr_debug("debugger already registered\n");
+ status = -EINVAL;
+ }
+
+ mutex_unlock(&p->mutex);
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ return status;
+}
+
+static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_unregister_args *args = data;
+ struct kfd_dev *dev;
+ long status;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+
+ status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
+ if (status == 0) {
+ kfd_dbgmgr_destroy(dev->dbgmgr);
+ dev->dbgmgr = NULL;
+ }
+
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ return status;
+}
+
+/*
+ * Parse and generate variable size data structure for address watch.
+ * Total size of the buffer and # watch points is limited in order
+ * to prevent kernel abuse. (no bearing to the much smaller HW limitation
+ * which is enforced by dbgdev module)
+ * please also note that the watch address itself are not "copied from user",
+ * since it be set into the HW in user mode values.
+ *
+ */
+static int kfd_ioctl_dbg_address_watch(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_address_watch_args *args = data;
+ struct kfd_dev *dev;
+ struct dbg_address_watch_info aw_info;
+ unsigned char *args_buff;
+ long status;
+ void __user *cmd_from_user;
+ uint64_t watch_mask_value = 0;
+ unsigned int args_idx = 0;
+
+ memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ cmd_from_user = (void __user *) args->content_ptr;
+
+ /* Validate arguments */
+
+ if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
+ (args->buf_size_in_bytes <= sizeof(*args)) ||
+ (cmd_from_user == NULL))
+ return -EINVAL;
+
+ /* this is the actual buffer to work with */
+
+ args_buff = kmalloc(args->buf_size_in_bytes -
+ sizeof(*args), GFP_KERNEL);
+ if (args_buff == NULL)
+ return -ENOMEM;
+
+ status = copy_from_user(args_buff, cmd_from_user,
+ args->buf_size_in_bytes - sizeof(*args));
+
+ if (status != 0) {
+ pr_debug("Failed to copy address watch user data\n");
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ aw_info.process = p;
+
+ aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
+ args_idx += sizeof(aw_info.num_watch_points);
+
+ aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
+ args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
+
+ /*
+ * set watch address base pointer to point on the array base
+ * within args_buff
+ */
+ aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
+
+ /* skip over the addresses buffer */
+ args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
+
+ if (args_idx >= args->buf_size_in_bytes) {
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ watch_mask_value = (uint64_t) args_buff[args_idx];
+
+ if (watch_mask_value > 0) {
+ /*
+ * There is an array of masks.
+ * set watch mask base pointer to point on the array base
+ * within args_buff
+ */
+ aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
+
+ /* skip over the masks buffer */
+ args_idx += sizeof(aw_info.watch_mask) *
+ aw_info.num_watch_points;
+ } else {
+ /* just the NULL mask, set to NULL and skip over it */
+ aw_info.watch_mask = NULL;
+ args_idx += sizeof(aw_info.watch_mask);
+ }
+
+ if (args_idx > args->buf_size_in_bytes) {
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ /* Currently HSA Event is not supported for DBG */
+ aw_info.watch_event = NULL;
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+
+ status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
+
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ kfree(args_buff);
+
+ return status;
+}
+
+/* Parse and generate fixed size data structure for wave control */
+static int kfd_ioctl_dbg_wave_control(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_dbg_wave_control_args *args = data;
+ struct kfd_dev *dev;
+ struct dbg_wave_control_info wac_info;
+ unsigned char *args_buff;
+ uint32_t computed_buff_size;
+ long status;
+ void __user *cmd_from_user;
+ unsigned int args_idx = 0;
+
+ memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
+
+ /* we use compact form, independent of the packing attribute value */
+ computed_buff_size = sizeof(*args) +
+ sizeof(wac_info.mode) +
+ sizeof(wac_info.operand) +
+ sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
+ sizeof(wac_info.dbgWave_msg.MemoryVA) +
+ sizeof(wac_info.trapId);
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (dev == NULL)
+ return -EINVAL;
+
+ if (dev->device_info->asic_family == CHIP_CARRIZO) {
+ pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
+ return -EINVAL;
+ }
+
+ /* input size must match the computed "compact" size */
+ if (args->buf_size_in_bytes != computed_buff_size) {
+ pr_debug("size mismatch, computed : actual %u : %u\n",
+ args->buf_size_in_bytes, computed_buff_size);
+ return -EINVAL;
+ }
+
+ cmd_from_user = (void __user *) args->content_ptr;
+
+ if (cmd_from_user == NULL)
+ return -EINVAL;
+
+ /* this is the actual buffer to work with */
+
+ args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args),
+ GFP_KERNEL);
+
+ if (args_buff == NULL)
+ return -ENOMEM;
+
+ /* Now copy the entire buffer from user */
+ status = copy_from_user(args_buff, cmd_from_user,
+ args->buf_size_in_bytes - sizeof(*args));
+ if (status != 0) {
+ pr_debug("Failed to copy wave control user data\n");
+ kfree(args_buff);
+ return -EINVAL;
+ }
+
+ /* move ptr to the start of the "pay-load" area */
+ wac_info.process = p;
+
+ wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
+ args_idx += sizeof(wac_info.operand);
+
+ wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
+ args_idx += sizeof(wac_info.mode);
+
+ wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
+ args_idx += sizeof(wac_info.trapId);
+
+ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
+ *((uint32_t *)(&args_buff[args_idx]));
+ wac_info.dbgWave_msg.MemoryVA = NULL;
+
+ mutex_lock(kfd_get_dbgmgr_mutex());
+
+ pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
+ wac_info.process, wac_info.operand,
+ wac_info.mode, wac_info.trapId,
+ wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+
+ status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
+
+ pr_debug("Returned status of dbg manager is %ld\n", status);
+
+ mutex_unlock(kfd_get_dbgmgr_mutex());
+
+ kfree(args_buff);
+
+ return status;
+}
+
static int kfd_ioctl_get_clock_counters(struct file *filep,
struct kfd_process *p, void *data)
{
@@ -612,6 +908,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
kfd_ioctl_wait_events, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+ kfd_ioctl_dbg_register, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+ kfd_ioctl_dbg_unrgesiter, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+ kfd_ioctl_dbg_address_watch, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+ kfd_ioctl_dbg_wave_control, 0),
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
new file mode 100644
index 000000000000..96153f28d73f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -0,0 +1,886 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/device.h>
+
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_kernel_queue.h"
+#include "kfd_priv.h"
+#include "kfd_pm4_opcodes.h"
+#include "cik_regs.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+#include "kfd_device_queue_manager.h"
+#include "../../radeon/cik_reg.h"
+
+static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
+{
+ BUG_ON(!dev || !dev->kfd2kgd);
+
+ dev->kfd2kgd->address_watch_disable(dev->kgd);
+}
+
+static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
+ unsigned int pasid, uint64_t vmid0_address,
+ uint32_t *packet_buff, size_t size_in_bytes)
+{
+ struct pm4__release_mem *rm_packet;
+ struct pm4__indirect_buffer_pasid *ib_packet;
+ struct kfd_mem_obj *mem_obj;
+ size_t pq_packets_size_in_bytes;
+ union ULARGE_INTEGER *largep;
+ union ULARGE_INTEGER addr;
+ struct kernel_queue *kq;
+ uint64_t *rm_state;
+ unsigned int *ib_packet_buff;
+ int status;
+
+ BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
+
+ kq = dbgdev->kq;
+
+ pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) +
+ sizeof(struct pm4__indirect_buffer_pasid);
+
+ /*
+ * We acquire a buffer from DIQ
+ * The receive packet buff will be sitting on the Indirect Buffer
+ * and in the PQ we put the IB packet + sync packet(s).
+ */
+ status = kq->ops.acquire_packet_buffer(kq,
+ pq_packets_size_in_bytes / sizeof(uint32_t),
+ &ib_packet_buff);
+ if (status != 0) {
+ pr_err("amdkfd: acquire_packet_buffer failed\n");
+ return status;
+ }
+
+ memset(ib_packet_buff, 0, pq_packets_size_in_bytes);
+
+ ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff);
+
+ ib_packet->header.count = 3;
+ ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID;
+ ib_packet->header.type = PM4_TYPE_3;
+
+ largep = (union ULARGE_INTEGER *) &vmid0_address;
+
+ ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2;
+ ib_packet->bitfields3.ib_base_hi = largep->u.high_part;
+
+ ib_packet->control = (1 << 23) | (1 << 31) |
+ ((size_in_bytes / sizeof(uint32_t)) & 0xfffff);
+
+ ib_packet->bitfields5.pasid = pasid;
+
+ /*
+ * for now we use release mem for GPU-CPU synchronization
+ * Consider WaitRegMem + WriteData as a better alternative
+ * we get a GART allocations ( gpu/cpu mapping),
+ * for the sync variable, and wait until:
+ * (a) Sync with HW
+ * (b) Sync var is written by CP to mem.
+ */
+ rm_packet = (struct pm4__release_mem *) (ib_packet_buff +
+ (sizeof(struct pm4__indirect_buffer_pasid) /
+ sizeof(unsigned int)));
+
+ status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
+ &mem_obj);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to allocate GART memory\n");
+ kq->ops.rollback_packet(kq);
+ return status;
+ }
+
+ rm_state = (uint64_t *) mem_obj->cpu_ptr;
+
+ *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING;
+
+ rm_packet->header.opcode = IT_RELEASE_MEM;
+ rm_packet->header.type = PM4_TYPE_3;
+ rm_packet->header.count = sizeof(struct pm4__release_mem) /
+ sizeof(unsigned int) - 2;
+
+ rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ rm_packet->bitfields2.event_index =
+ event_index___release_mem__end_of_pipe;
+
+ rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+ rm_packet->bitfields2.atc = 0;
+ rm_packet->bitfields2.tc_wb_action_ena = 1;
+
+ addr.quad_part = mem_obj->gpu_addr;
+
+ rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2;
+ rm_packet->address_hi = addr.u.high_part;
+
+ rm_packet->bitfields3.data_sel =
+ data_sel___release_mem__send_64_bit_data;
+
+ rm_packet->bitfields3.int_sel =
+ int_sel___release_mem__send_data_after_write_confirm;
+
+ rm_packet->bitfields3.dst_sel =
+ dst_sel___release_mem__memory_controller;
+
+ rm_packet->data_lo = QUEUESTATE__ACTIVE;
+
+ kq->ops.submit_packet(kq);
+
+ /* Wait till CP writes sync code: */
+ status = amdkfd_fence_wait_timeout(
+ (unsigned int *) rm_state,
+ QUEUESTATE__ACTIVE, 1500);
+
+ kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+ return status;
+}
+
+static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
+{
+ BUG_ON(!dbgdev);
+
+ /*
+ * no action is needed in this case,
+ * just make sure diq will not be used
+ */
+
+ dbgdev->kq = NULL;
+
+ return 0;
+}
+
+static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
+{
+ struct queue_properties properties;
+ unsigned int qid;
+ struct kernel_queue *kq = NULL;
+ int status;
+
+ BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
+
+ status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
+ &properties, 0, KFD_QUEUE_TYPE_DIQ,
+ &qid);
+
+ if (status) {
+ pr_err("amdkfd: Failed to create DIQ\n");
+ return status;
+ }
+
+ pr_debug("DIQ Created with queue id: %d\n", qid);
+
+ kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
+
+ if (kq == NULL) {
+ pr_err("amdkfd: Error getting DIQ\n");
+ pqm_destroy_queue(dbgdev->pqm, qid);
+ return -EFAULT;
+ }
+
+ dbgdev->kq = kq;
+
+ return status;
+}
+
+static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
+{
+ BUG_ON(!dbgdev || !dbgdev->dev);
+
+ /* disable watch address */
+ dbgdev_address_watch_disable_nodiq(dbgdev->dev);
+ return 0;
+}
+
+static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
+{
+ /* todo - disable address watch */
+ int status;
+
+ BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
+
+ status = pqm_destroy_queue(dbgdev->pqm,
+ dbgdev->kq->queue->properties.queue_id);
+ dbgdev->kq = NULL;
+
+ return status;
+}
+
+static void dbgdev_address_watch_set_registers(
+ const struct dbg_address_watch_info *adw_info,
+ union TCP_WATCH_ADDR_H_BITS *addrHi,
+ union TCP_WATCH_ADDR_L_BITS *addrLo,
+ union TCP_WATCH_CNTL_BITS *cntl,
+ unsigned int index, unsigned int vmid)
+{
+ union ULARGE_INTEGER addr;
+
+ BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
+
+ addr.quad_part = 0;
+ addrHi->u32All = 0;
+ addrLo->u32All = 0;
+ cntl->u32All = 0;
+
+ if (adw_info->watch_mask != NULL)
+ cntl->bitfields.mask =
+ (uint32_t) (adw_info->watch_mask[index] &
+ ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
+ else
+ cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+
+ addr.quad_part = (unsigned long long) adw_info->watch_address[index];
+
+ addrHi->bitfields.addr = addr.u.high_part &
+ ADDRESS_WATCH_REG_ADDHIGH_MASK;
+ addrLo->bitfields.addr =
+ (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT);
+
+ cntl->bitfields.mode = adw_info->watch_mode[index];
+ cntl->bitfields.vmid = (uint32_t) vmid;
+ /* for now assume it is an ATC address */
+ cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT;
+
+ pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask);
+ pr_debug("\t\t%20s %08x\n", "set reg add high :",
+ addrHi->bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "set reg add low :",
+ addrLo->bitfields.addr);
+}
+
+static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
+ struct dbg_address_watch_info *adw_info)
+{
+ union TCP_WATCH_ADDR_H_BITS addrHi;
+ union TCP_WATCH_ADDR_L_BITS addrLo;
+ union TCP_WATCH_CNTL_BITS cntl;
+ struct kfd_process_device *pdd;
+ unsigned int i;
+
+ BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+ /* taking the vmid for that process on the safe way using pdd */
+ pdd = kfd_get_process_device_data(dbgdev->dev,
+ adw_info->process);
+ if (!pdd) {
+ pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+ return -EFAULT;
+ }
+
+ addrHi.u32All = 0;
+ addrLo.u32All = 0;
+ cntl.u32All = 0;
+
+ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+ (adw_info->num_watch_points == 0)) {
+ pr_err("amdkfd: num_watch_points is invalid\n");
+ return -EINVAL;
+ }
+
+ if ((adw_info->watch_mode == NULL) ||
+ (adw_info->watch_address == NULL)) {
+ pr_err("amdkfd: adw_info fields are not valid\n");
+ return -EINVAL;
+ }
+
+ for (i = 0 ; i < adw_info->num_watch_points ; i++) {
+ dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
+ &cntl, i, pdd->qpd.vmid);
+
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+ pr_debug("\t\t%20s %08x\n", "register index :", i);
+ pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid);
+ pr_debug("\t\t%20s %08x\n", "Address Low is :",
+ addrLo.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Address high is :",
+ addrHi.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Address high is :",
+ addrHi.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+ cntl.bitfields.mask);
+ pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+ cntl.bitfields.mode);
+ pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+ cntl.bitfields.vmid);
+ pr_debug("\t\t%20s %08x\n", "Control atc is :",
+ cntl.bitfields.atc);
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ pdd->dev->kfd2kgd->address_watch_execute(
+ dbgdev->dev->kgd,
+ i,
+ cntl.u32All,
+ addrHi.u32All,
+ addrLo.u32All);
+ }
+
+ return 0;
+}
+
+static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
+ struct dbg_address_watch_info *adw_info)
+{
+ struct pm4__set_config_reg *packets_vec;
+ union TCP_WATCH_ADDR_H_BITS addrHi;
+ union TCP_WATCH_ADDR_L_BITS addrLo;
+ union TCP_WATCH_CNTL_BITS cntl;
+ struct kfd_mem_obj *mem_obj;
+ unsigned int aw_reg_add_dword;
+ uint32_t *packet_buff_uint;
+ unsigned int i;
+ int status;
+ size_t ib_size = sizeof(struct pm4__set_config_reg) * 4;
+ /* we do not control the vmid in DIQ mode, just a place holder */
+ unsigned int vmid = 0;
+
+ BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
+
+ addrHi.u32All = 0;
+ addrLo.u32All = 0;
+ cntl.u32All = 0;
+
+ if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
+ (adw_info->num_watch_points == 0)) {
+ pr_err("amdkfd: num_watch_points is invalid\n");
+ return -EINVAL;
+ }
+
+ if ((NULL == adw_info->watch_mode) ||
+ (NULL == adw_info->watch_address)) {
+ pr_err("amdkfd: adw_info fields are not valid\n");
+ return -EINVAL;
+ }
+
+ status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to allocate GART memory\n");
+ return status;
+ }
+
+ packet_buff_uint = mem_obj->cpu_ptr;
+
+ memset(packet_buff_uint, 0, ib_size);
+
+ packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint);
+
+ packets_vec[0].header.count = 1;
+ packets_vec[0].header.opcode = IT_SET_CONFIG_REG;
+ packets_vec[0].header.type = PM4_TYPE_3;
+ packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+ packets_vec[0].bitfields2.insert_vmid = 1;
+ packets_vec[1].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[1].bitfields2.insert_vmid = 0;
+ packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[2].bitfields2.insert_vmid = 0;
+ packets_vec[3].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET;
+ packets_vec[3].bitfields2.insert_vmid = 1;
+
+ for (i = 0; i < adw_info->num_watch_points; i++) {
+ dbgdev_address_watch_set_registers(adw_info,
+ &addrHi,
+ &addrLo,
+ &cntl,
+ i,
+ vmid);
+
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+ pr_debug("\t\t%20s %08x\n", "register index :", i);
+ pr_debug("\t\t%20s %08x\n", "vmid is :", vmid);
+ pr_debug("\t\t%20s %p\n", "Add ptr is :",
+ adw_info->watch_address);
+ pr_debug("\t\t%20s %08llx\n", "Add is :",
+ adw_info->watch_address[i]);
+ pr_debug("\t\t%20s %08x\n", "Address Low is :",
+ addrLo.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Address high is :",
+ addrHi.bitfields.addr);
+ pr_debug("\t\t%20s %08x\n", "Control Mask is :",
+ cntl.bitfields.mask);
+ pr_debug("\t\t%20s %08x\n", "Control Mode is :",
+ cntl.bitfields.mode);
+ pr_debug("\t\t%20s %08x\n", "Control Vmid is :",
+ cntl.bitfields.vmid);
+ pr_debug("\t\t%20s %08x\n", "Control atc is :",
+ cntl.bitfields.atc);
+ pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_CNTL);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[0].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+
+ packets_vec[0].reg_data[0] = cntl.u32All;
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_ADDR_HI);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[1].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+ packets_vec[1].reg_data[0] = addrHi.u32All;
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_ADDR_LO);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[2].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+ packets_vec[2].reg_data[0] = addrLo.u32All;
+
+ /* enable watch flag if address is not zero*/
+ if (adw_info->watch_address[i] > 0)
+ cntl.bitfields.valid = 1;
+ else
+ cntl.bitfields.valid = 0;
+
+ aw_reg_add_dword =
+ dbgdev->dev->kfd2kgd->address_watch_get_offset(
+ dbgdev->dev->kgd,
+ i,
+ ADDRESS_WATCH_REG_CNTL);
+
+ aw_reg_add_dword /= sizeof(uint32_t);
+
+ packets_vec[3].bitfields2.reg_offset =
+ aw_reg_add_dword - CONFIG_REG_BASE;
+ packets_vec[3].reg_data[0] = cntl.u32All;
+
+ status = dbgdev_diq_submit_ib(
+ dbgdev,
+ adw_info->process->pasid,
+ mem_obj->gpu_addr,
+ packet_buff_uint,
+ ib_size);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to submit IB to DIQ\n");
+ break;
+ }
+ }
+
+ kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+ return status;
+}
+
+static int dbgdev_wave_control_set_registers(
+ struct dbg_wave_control_info *wac_info,
+ union SQ_CMD_BITS *in_reg_sq_cmd,
+ union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
+{
+ int status;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct HsaDbgWaveMsgAMDGen2 *pMsg;
+
+ BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
+
+ reg_sq_cmd.u32All = 0;
+ reg_gfx_index.u32All = 0;
+ pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
+
+ switch (wac_info->mode) {
+ /* Send command to single wave */
+ case HSA_DBG_WAVEMODE_SINGLE:
+ /*
+ * Limit access to the process waves only,
+ * by setting vmid check
+ */
+ reg_sq_cmd.bits.check_vmid = 1;
+ reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD;
+ reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE;
+
+ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+ break;
+
+ /* Send command to all waves with matching VMID */
+ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS:
+
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+ break;
+
+ /* Send command to all CU waves with matching VMID */
+ case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU:
+
+ reg_sq_cmd.bits.check_vmid = 1;
+ reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
+
+ reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray;
+ reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine;
+ reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU;
+
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ switch (wac_info->operand) {
+ case HSA_DBG_WAVEOP_HALT:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT;
+ break;
+
+ case HSA_DBG_WAVEOP_RESUME:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME;
+ break;
+
+ case HSA_DBG_WAVEOP_KILL:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
+ break;
+
+ case HSA_DBG_WAVEOP_DEBUG:
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG;
+ break;
+
+ case HSA_DBG_WAVEOP_TRAP:
+ if (wac_info->trapId < MAX_TRAPID) {
+ reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP;
+ reg_sq_cmd.bits.trap_id = wac_info->trapId;
+ } else {
+ status = -EINVAL;
+ }
+ break;
+
+ default:
+ status = -EINVAL;
+ break;
+ }
+
+ if (status == 0) {
+ *in_reg_sq_cmd = reg_sq_cmd;
+ *in_reg_gfx_index = reg_gfx_index;
+ }
+
+ return status;
+}
+
+static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
+ struct dbg_wave_control_info *wac_info)
+{
+
+ int status;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_mem_obj *mem_obj;
+ uint32_t *packet_buff_uint;
+ struct pm4__set_config_reg *packets_vec;
+ size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
+
+ BUG_ON(!dbgdev || !wac_info);
+
+ reg_sq_cmd.u32All = 0;
+
+ status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+ &reg_gfx_index);
+ if (status) {
+ pr_err("amdkfd: Failed to set wave control registers\n");
+ return status;
+ }
+
+ /* we do not control the VMID in DIQ,so reset it to a known value */
+ reg_sq_cmd.bits.vm_id = 0;
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ pr_debug("\t\t mode is: %u\n", wac_info->mode);
+ pr_debug("\t\t operand is: %u\n", wac_info->operand);
+ pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
+ pr_debug("\t\t msg value is: %u\n",
+ wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+ pr_debug("\t\t vmid is: N/A\n");
+
+ pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+ pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
+ pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
+ pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
+ pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
+ pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
+ pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+ pr_debug("\t\t ibw is : %u\n",
+ reg_gfx_index.bitfields.instance_broadcast_writes);
+ pr_debug("\t\t ii is : %u\n",
+ reg_gfx_index.bitfields.instance_index);
+ pr_debug("\t\t sebw is : %u\n",
+ reg_gfx_index.bitfields.se_broadcast_writes);
+ pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
+ pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
+ pr_debug("\t\t sbw is : %u\n",
+ reg_gfx_index.bitfields.sh_broadcast_writes);
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
+
+ if (status != 0) {
+ pr_err("amdkfd: Failed to allocate GART memory\n");
+ return status;
+ }
+
+ packet_buff_uint = mem_obj->cpu_ptr;
+
+ memset(packet_buff_uint, 0, ib_size);
+
+ packets_vec = (struct pm4__set_config_reg *) packet_buff_uint;
+ packets_vec[0].header.count = 1;
+ packets_vec[0].header.opcode = IT_SET_UCONFIG_REG;
+ packets_vec[0].header.type = PM4_TYPE_3;
+ packets_vec[0].bitfields2.reg_offset =
+ GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+ USERCONFIG_REG_BASE;
+
+ packets_vec[0].bitfields2.insert_vmid = 0;
+ packets_vec[0].reg_data[0] = reg_gfx_index.u32All;
+
+ packets_vec[1].header.count = 1;
+ packets_vec[1].header.opcode = IT_SET_CONFIG_REG;
+ packets_vec[1].header.type = PM4_TYPE_3;
+ packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) -
+ CONFIG_REG_BASE;
+
+ packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET;
+ packets_vec[1].bitfields2.insert_vmid = 1;
+ packets_vec[1].reg_data[0] = reg_sq_cmd.u32All;
+
+ /* Restore the GRBM_GFX_INDEX register */
+
+ reg_gfx_index.u32All = 0;
+ reg_gfx_index.bits.sh_broadcast_writes = 1;
+ reg_gfx_index.bits.instance_broadcast_writes = 1;
+ reg_gfx_index.bits.se_broadcast_writes = 1;
+
+
+ packets_vec[2].ordinal1 = packets_vec[0].ordinal1;
+ packets_vec[2].bitfields2.reg_offset =
+ GRBM_GFX_INDEX / (sizeof(uint32_t)) -
+ USERCONFIG_REG_BASE;
+
+ packets_vec[2].bitfields2.insert_vmid = 0;
+ packets_vec[2].reg_data[0] = reg_gfx_index.u32All;
+
+ status = dbgdev_diq_submit_ib(
+ dbgdev,
+ wac_info->process->pasid,
+ mem_obj->gpu_addr,
+ packet_buff_uint,
+ ib_size);
+
+ if (status != 0)
+ pr_err("amdkfd: Failed to submit IB to DIQ\n");
+
+ kfd_gtt_sa_free(dbgdev->dev, mem_obj);
+
+ return status;
+}
+
+static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
+ struct dbg_wave_control_info *wac_info)
+{
+ int status;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+
+ BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
+
+ reg_sq_cmd.u32All = 0;
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
+
+ if (!pdd) {
+ pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+ return -EFAULT;
+ }
+ status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
+ &reg_gfx_index);
+ if (status) {
+ pr_err("amdkfd: Failed to set wave control registers\n");
+ return status;
+ }
+
+ /* for non DIQ we need to patch the VMID: */
+
+ reg_sq_cmd.bits.vm_id = pdd->qpd.vmid;
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ pr_debug("\t\t mode is: %u\n", wac_info->mode);
+ pr_debug("\t\t operand is: %u\n", wac_info->operand);
+ pr_debug("\t\t trap id is: %u\n", wac_info->trapId);
+ pr_debug("\t\t msg value is: %u\n",
+ wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
+ pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid);
+
+ pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid);
+ pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd);
+ pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id);
+ pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id);
+ pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode);
+ pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id);
+ pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id);
+
+ pr_debug("\t\t ibw is : %u\n",
+ reg_gfx_index.bitfields.instance_broadcast_writes);
+ pr_debug("\t\t ii is : %u\n",
+ reg_gfx_index.bitfields.instance_index);
+ pr_debug("\t\t sebw is : %u\n",
+ reg_gfx_index.bitfields.se_broadcast_writes);
+ pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index);
+ pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index);
+ pr_debug("\t\t sbw is : %u\n",
+ reg_gfx_index.bitfields.sh_broadcast_writes);
+
+ pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
+
+ return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd,
+ reg_gfx_index.u32All,
+ reg_sq_cmd.u32All);
+}
+
+int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
+{
+ int status = 0;
+ unsigned int vmid;
+ union SQ_CMD_BITS reg_sq_cmd;
+ union GRBM_GFX_INDEX_BITS reg_gfx_index;
+ struct kfd_process_device *pdd;
+ struct dbg_wave_control_info wac_info;
+ int temp;
+ int first_vmid_to_scan = 8;
+ int last_vmid_to_scan = 15;
+
+ first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
+ temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
+ last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
+
+ reg_sq_cmd.u32All = 0;
+ status = 0;
+
+ wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS;
+ wac_info.operand = HSA_DBG_WAVEOP_KILL;
+
+ pr_debug("Killing all process wavefronts\n");
+
+ /* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
+ * ATC_VMID15_PASID_MAPPING
+ * to check which VMID the current process is mapped to. */
+
+ for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
+ if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+ (dev->kgd, vmid)) {
+ if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+ (dev->kgd, vmid) == p->pasid) {
+ pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
+ vmid, p->pasid);
+ break;
+ }
+ }
+ }
+
+ if (vmid > last_vmid_to_scan) {
+ pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
+ return -EFAULT;
+ }
+
+ /* taking the VMID for that process on the safe way using PDD */
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd)
+ return -EFAULT;
+
+ status = dbgdev_wave_control_set_registers(&wac_info, &reg_sq_cmd,
+ &reg_gfx_index);
+ if (status != 0)
+ return -EINVAL;
+
+ /* for non DIQ we need to patch the VMID: */
+ reg_sq_cmd.bits.vm_id = vmid;
+
+ dev->kfd2kgd->wave_control_execute(dev->kgd,
+ reg_gfx_index.u32All,
+ reg_sq_cmd.u32All);
+
+ return 0;
+}
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+ enum DBGDEV_TYPE type)
+{
+ BUG_ON(!pdbgdev || !pdev);
+
+ pdbgdev->dev = pdev;
+ pdbgdev->kq = NULL;
+ pdbgdev->type = type;
+ pdbgdev->pqm = NULL;
+
+ switch (type) {
+ case DBGDEV_TYPE_NODIQ:
+ pdbgdev->dbgdev_register = dbgdev_register_nodiq;
+ pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq;
+ pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq;
+ pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq;
+ break;
+ case DBGDEV_TYPE_DIQ:
+ default:
+ pdbgdev->dbgdev_register = dbgdev_register_diq;
+ pdbgdev->dbgdev_unregister = dbgdev_unregister_diq;
+ pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq;
+ pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq;
+ break;
+ }
+
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
new file mode 100644
index 000000000000..4b0dd5aa5306
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef KFD_DBGDEV_H_
+#define KFD_DBGDEV_H_
+
+enum {
+ SQ_CMD_VMID_OFFSET = 28,
+ ADDRESS_WATCH_CNTL_OFFSET = 24
+};
+
+enum {
+ PRIV_QUEUE_SYNC_TIME_MS = 200
+};
+
+/* CONTEXT reg space definition */
+enum {
+ CONTEXT_REG_BASE = 0xA000,
+ CONTEXT_REG_END = 0xA400,
+ CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE
+};
+
+/* USER CONFIG reg space definition */
+enum {
+ USERCONFIG_REG_BASE = 0xC000,
+ USERCONFIG_REG_END = 0x10000,
+ USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE
+};
+
+/* CONFIG reg space definition */
+enum {
+ CONFIG_REG_BASE = 0x2000, /* in dwords */
+ CONFIG_REG_END = 0x2B00,
+ CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE
+};
+
+/* SH reg space definition */
+enum {
+ SH_REG_BASE = 0x2C00,
+ SH_REG_END = 0x3000,
+ SH_REG_SIZE = SH_REG_END - SH_REG_BASE
+};
+
+enum SQ_IND_CMD_CMD {
+ SQ_IND_CMD_CMD_NULL = 0x00000000,
+ SQ_IND_CMD_CMD_HALT = 0x00000001,
+ SQ_IND_CMD_CMD_RESUME = 0x00000002,
+ SQ_IND_CMD_CMD_KILL = 0x00000003,
+ SQ_IND_CMD_CMD_DEBUG = 0x00000004,
+ SQ_IND_CMD_CMD_TRAP = 0x00000005,
+};
+
+enum SQ_IND_CMD_MODE {
+ SQ_IND_CMD_MODE_SINGLE = 0x00000000,
+ SQ_IND_CMD_MODE_BROADCAST = 0x00000001,
+ SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002,
+ SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003,
+ SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004,
+};
+
+union SQ_IND_INDEX_BITS {
+ struct {
+ uint32_t wave_id:4;
+ uint32_t simd_id:2;
+ uint32_t thread_id:6;
+ uint32_t:1;
+ uint32_t force_read:1;
+ uint32_t read_timeout:1;
+ uint32_t unindexed:1;
+ uint32_t index:16;
+
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union SQ_IND_CMD_BITS {
+ struct {
+ uint32_t data:32;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union SQ_CMD_BITS {
+ struct {
+ uint32_t cmd:3;
+ uint32_t:1;
+ uint32_t mode:3;
+ uint32_t check_vmid:1;
+ uint32_t trap_id:3;
+ uint32_t:5;
+ uint32_t wave_id:4;
+ uint32_t simd_id:2;
+ uint32_t:2;
+ uint32_t queue_id:3;
+ uint32_t:1;
+ uint32_t vm_id:4;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union SQ_IND_DATA_BITS {
+ struct {
+ uint32_t data:32;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union GRBM_GFX_INDEX_BITS {
+ struct {
+ uint32_t instance_index:8;
+ uint32_t sh_index:8;
+ uint32_t se_index:8;
+ uint32_t:5;
+ uint32_t sh_broadcast_writes:1;
+ uint32_t instance_broadcast_writes:1;
+ uint32_t se_broadcast_writes:1;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union TCP_WATCH_ADDR_H_BITS {
+ struct {
+ uint32_t addr:16;
+ uint32_t:16;
+
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+union TCP_WATCH_ADDR_L_BITS {
+ struct {
+ uint32_t:6;
+ uint32_t addr:26;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
+enum {
+ QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */
+ QUEUESTATE__ACTIVE_COMPLETION_PENDING,
+ QUEUESTATE__ACTIVE
+};
+
+union ULARGE_INTEGER {
+ struct {
+ uint32_t low_part;
+ uint32_t high_part;
+ } u;
+ unsigned long long quad_part;
+};
+
+
+#define KFD_CIK_VMID_START_OFFSET (8)
+#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8))
+
+
+void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
+ enum DBGDEV_TYPE type);
+
+#endif /* KFD_DBGDEV_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
new file mode 100644
index 000000000000..56d676396342
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+
+#include "kfd_priv.h"
+#include "cik_regs.h"
+#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_diq.h"
+#include "kfd_dbgmgr.h"
+#include "kfd_dbgdev.h"
+
+static DEFINE_MUTEX(kfd_dbgmgr_mutex);
+
+struct mutex *kfd_get_dbgmgr_mutex(void)
+{
+ return &kfd_dbgmgr_mutex;
+}
+
+
+static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
+{
+ BUG_ON(!pmgr);
+
+ kfree(pmgr->dbgdev);
+
+ pmgr->dbgdev = NULL;
+ pmgr->pasid = 0;
+ pmgr->dev = NULL;
+}
+
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
+{
+ if (pmgr != NULL) {
+ kfd_dbgmgr_uninitialize(pmgr);
+ kfree(pmgr);
+ }
+}
+
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
+{
+ enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
+ struct kfd_dbgmgr *new_buff;
+
+ BUG_ON(pdev == NULL);
+ BUG_ON(!pdev->init_complete);
+
+ new_buff = kfd_alloc_struct(new_buff);
+ if (!new_buff) {
+ pr_err("amdkfd: Failed to allocate dbgmgr instance\n");
+ return false;
+ }
+
+ new_buff->pasid = 0;
+ new_buff->dev = pdev;
+ new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
+ if (!new_buff->dbgdev) {
+ pr_err("amdkfd: Failed to allocate dbgdev instance\n");
+ kfree(new_buff);
+ return false;
+ }
+
+ /* get actual type of DBGDevice cpsch or not */
+ if (sched_policy == KFD_SCHED_POLICY_NO_HWS)
+ type = DBGDEV_TYPE_NODIQ;
+
+ kfd_dbgdev_init(new_buff->dbgdev, pdev, type);
+ *ppmgr = new_buff;
+
+ return true;
+}
+
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+ BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+ if (pmgr->pasid != 0) {
+ pr_debug("H/W debugger is already active using pasid %d\n",
+ pmgr->pasid);
+ return -EBUSY;
+ }
+
+ /* remember pasid */
+ pmgr->pasid = p->pasid;
+
+ /* provide the pqm for diq generation */
+ pmgr->dbgdev->pqm = &p->pqm;
+
+ /* activate the actual registering */
+ pmgr->dbgdev->dbgdev_register(pmgr->dbgdev);
+
+ return 0;
+}
+
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
+{
+ BUG_ON(!p || !pmgr || !pmgr->dbgdev);
+
+ /* Is the requests coming from the already registered process? */
+ if (pmgr->pasid != p->pasid) {
+ pr_debug("H/W debugger is not registered by calling pasid %d\n",
+ p->pasid);
+ return -EINVAL;
+ }
+
+ pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev);
+
+ pmgr->pasid = 0;
+
+ return 0;
+}
+
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+ struct dbg_wave_control_info *wac_info)
+{
+ BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info);
+
+ /* Is the requests coming from the already registered process? */
+ if (pmgr->pasid != wac_info->process->pasid) {
+ pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+ wac_info->process->pasid);
+ return -EINVAL;
+ }
+
+ return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info);
+}
+
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+ struct dbg_address_watch_info *adw_info)
+{
+ BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info);
+
+
+ /* Is the requests coming from the already registered process? */
+ if (pmgr->pasid != adw_info->process->pasid) {
+ pr_debug("H/W debugger support was not registered for requester pasid %d\n",
+ adw_info->process->pasid);
+ return -EINVAL;
+ }
+
+ return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev,
+ adw_info);
+}
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
new file mode 100644
index 000000000000..257a745ad0b5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
@@ -0,0 +1,294 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_DBGMGR_H_
+#define KFD_DBGMGR_H_
+
+#include "kfd_priv.h"
+
+/* must align with hsakmttypes definition */
+#pragma pack(push, 4)
+
+enum HSA_DBG_WAVEOP {
+ HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
+ HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
+ HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
+ HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter
+ debug mode */
+ HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take
+ a trap */
+ HSA_DBG_NUM_WAVEOP = 5,
+ HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMODE {
+ /* send command to a single wave */
+ HSA_DBG_WAVEMODE_SINGLE = 0,
+ /*
+ * Broadcast to all wavefronts of all processes is not
+ * supported for HSA user mode
+ */
+
+ /* send to waves within current process */
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2,
+ /* send to waves within current process on CU */
+ HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,
+ HSA_DBG_NUM_WAVEMODE = 3,
+ HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WAVEMSG_TYPE {
+ HSA_DBG_WAVEMSG_AUTO = 0,
+ HSA_DBG_WAVEMSG_USER = 1,
+ HSA_DBG_WAVEMSG_ERROR = 2,
+ HSA_DBG_NUM_WAVEMSG,
+ HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF
+};
+
+enum HSA_DBG_WATCH_MODE {
+ HSA_DBG_WATCH_READ = 0, /* Read operations only */
+ HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */
+ HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */
+ HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */
+ HSA_DBG_WATCH_NUM,
+ HSA_DBG_WATCH_SIZE = 0xFFFFFFFF
+};
+
+/* This structure is hardware specific and may change in the future */
+struct HsaDbgWaveMsgAMDGen2 {
+ union {
+ struct ui32 {
+ uint32_t UserData:8; /* user data */
+ uint32_t ShaderArray:1; /* Shader array */
+ uint32_t Priv:1; /* Privileged */
+ uint32_t Reserved0:4; /* This field is reserved,
+ should be 0 */
+ uint32_t WaveId:4; /* wave id */
+ uint32_t SIMD:2; /* SIMD id */
+ uint32_t HSACU:4; /* Compute unit */
+ uint32_t ShaderEngine:2;/* Shader engine */
+ uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
+ uint32_t Reserved1:4; /* This field is reserved,
+ should be 0 */
+ } ui32;
+ uint32_t Value;
+ };
+ uint32_t Reserved2;
+};
+
+union HsaDbgWaveMessageAMD {
+ struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2;
+ /* for future HsaDbgWaveMsgAMDGen3; */
+};
+
+struct HsaDbgWaveMessage {
+ void *MemoryVA; /* ptr to associated host-accessible data */
+ union HsaDbgWaveMessageAMD DbgWaveMsg;
+};
+
+/*
+ * TODO: This definitions to be MOVED to kfd_event, once it is implemented.
+ *
+ * HSA sync primitive, Event and HW Exception notification API definitions.
+ * The API functions allow the runtime to define a so-called sync-primitive,
+ * a SW object combining a user-mode provided "syncvar" and a scheduler event
+ * that can be signaled through a defined GPU interrupt. A syncvar is
+ * a process virtual memory location of a certain size that can be accessed
+ * by CPU and GPU shader code within the process to set and query the content
+ * within that memory. The definition of the content is determined by the HSA
+ * runtime and potentially GPU shader code interfacing with the HSA runtime.
+ * The syncvar values may be commonly written through an PM4 WRITE_DATA packet
+ * in the user mode instruction stream. The OS scheduler event is typically
+ * associated and signaled by an interrupt issued by the GPU, but other HSA
+ * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
+ * by the KFD by this mechanism, too. */
+
+/* these are the new definitions for events */
+enum HSA_EVENTTYPE {
+ HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */
+ HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
+ HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
+ (start/stop) */
+ HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
+ HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
+ HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
+ HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
+ HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
+ (EOP pm4) */
+ /* ... */
+ HSA_EVENTTYPE_MAXID,
+ HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
+};
+
+/* Sub-definitions for various event types: Syncvar */
+struct HsaSyncVar {
+ union SyncVar {
+ void *UserData; /* pointer to user mode data */
+ uint64_t UserDataPtrValue; /* 64bit compatibility of value */
+ } SyncVar;
+ uint64_t SyncVarSize;
+};
+
+/* Sub-definitions for various event types: NodeChange */
+
+enum HSA_EVENTTYPE_NODECHANGE_FLAGS {
+ HSA_EVENTTYPE_NODECHANGE_ADD = 0,
+ HSA_EVENTTYPE_NODECHANGE_REMOVE = 1,
+ HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF
+};
+
+struct HsaNodeChange {
+ /* HSA node added/removed on the platform */
+ enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;
+};
+
+/* Sub-definitions for various event types: DeviceStateChange */
+enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS {
+ /* device started (and available) */
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0,
+ /* device stopped (i.e. unavailable) */
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1,
+ HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF
+};
+
+enum HSA_DEVICE {
+ HSA_DEVICE_CPU = 0,
+ HSA_DEVICE_GPU = 1,
+ MAX_HSA_DEVICE = 2
+};
+
+struct HsaDeviceStateChange {
+ uint32_t NodeId; /* F-NUMA node that contains the device */
+ enum HSA_DEVICE Device; /* device type: GPU or CPU */
+ enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */
+};
+
+struct HsaEventData {
+ enum HSA_EVENTTYPE EventType; /* event type */
+ union EventData {
+ /*
+ * return data associated with HSA_EVENTTYPE_SIGNAL
+ * and other events
+ */
+ struct HsaSyncVar SyncVar;
+
+ /* data associated with HSA_EVENTTYPE_NODE_CHANGE */
+ struct HsaNodeChange NodeChangeState;
+
+ /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */
+ struct HsaDeviceStateChange DeviceState;
+ } EventData;
+
+ /* the following data entries are internal to the KFD & thunk itself */
+
+ /* internal thunk store for Event data (OsEventHandle) */
+ uint64_t HWData1;
+ /* internal thunk store for Event data (HWAddress) */
+ uint64_t HWData2;
+ /* internal thunk store for Event data (HWData) */
+ uint32_t HWData3;
+};
+
+struct HsaEventDescriptor {
+ /* event type to allocate */
+ enum HSA_EVENTTYPE EventType;
+ /* H-NUMA node containing GPU device that is event source */
+ uint32_t NodeId;
+ /* pointer to user mode syncvar data, syncvar->UserDataPtrValue
+ * may be NULL
+ */
+ struct HsaSyncVar SyncVar;
+};
+
+struct HsaEvent {
+ uint32_t EventId;
+ struct HsaEventData EventData;
+};
+
+#pragma pack(pop)
+
+enum DBGDEV_TYPE {
+ DBGDEV_TYPE_ILLEGAL = 0,
+ DBGDEV_TYPE_NODIQ = 1,
+ DBGDEV_TYPE_DIQ = 2,
+ DBGDEV_TYPE_TEST = 3
+};
+
+struct dbg_address_watch_info {
+ struct kfd_process *process;
+ enum HSA_DBG_WATCH_MODE *watch_mode;
+ uint64_t *watch_address;
+ uint64_t *watch_mask;
+ struct HsaEvent *watch_event;
+ uint32_t num_watch_points;
+};
+
+struct dbg_wave_control_info {
+ struct kfd_process *process;
+ uint32_t trapId;
+ enum HSA_DBG_WAVEOP operand;
+ enum HSA_DBG_WAVEMODE mode;
+ struct HsaDbgWaveMessage dbgWave_msg;
+};
+
+struct kfd_dbgdev {
+
+ /* The device that owns this data. */
+ struct kfd_dev *dev;
+
+ /* kernel queue for DIQ */
+ struct kernel_queue *kq;
+
+ /* a pointer to the pqm of the calling process */
+ struct process_queue_manager *pqm;
+
+ /* type of debug device ( DIQ, non DIQ, etc. ) */
+ enum DBGDEV_TYPE type;
+
+ /* virtualized function pointers to device dbg */
+ int (*dbgdev_register)(struct kfd_dbgdev *dbgdev);
+ int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev);
+ int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev,
+ struct dbg_address_watch_info *adw_info);
+ int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev,
+ struct dbg_wave_control_info *wac_info);
+
+};
+
+struct kfd_dbgmgr {
+ unsigned int pasid;
+ struct kfd_dev *dev;
+ struct kfd_dbgdev *dbgdev;
+};
+
+/* prototypes for debug manager functions */
+struct mutex *kfd_get_dbgmgr_mutex(void);
+void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr);
+bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev);
+long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p);
+long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
+ struct dbg_wave_control_info *wac_info);
+long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
+ struct dbg_address_watch_info *adw_info);
+#endif /* KFD_DBGMGR_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 52cab0f53ebc..1d1e2e952a79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -33,8 +33,11 @@
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
.max_pasid_bits = 16,
+ /* max num of queues for KV.TODO should be a dynamic value */
+ .max_no_of_hqd = 24,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
+ .num_of_watch_points = 4,
.mqd_size_aligned = MQD_SIZE_ALIGNED
};
@@ -294,6 +297,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
goto dqm_start_error;
}
+ kfd->dbgmgr = NULL;
+
kfd->init_complete = true;
dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
kfd->pdev->device);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 4e215bd4d41f..547b0a589693 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+ bool preempt_static_queues, bool lock);
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
@@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm)
BUG_ON(!dqm);
- destroy_queues_cpsch(dqm, true);
+ destroy_queues_cpsch(dqm, true, true);
list_for_each_entry(node, &dqm->queues, list) {
pdd = qpd_to_pdd(node->qpd);
@@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
pr_debug("kfd: In %s\n", __func__);
mutex_lock(&dqm->lock);
- destroy_queues_cpsch(dqm, false);
+ /* here we actually preempt the DIQ */
+ destroy_queues_cpsch(dqm, true, false);
list_del(&kq->list);
dqm->queue_count--;
qpd->is_debug = false;
@@ -913,7 +915,7 @@ out:
return retval;
}
-static int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
unsigned int fence_value,
unsigned long timeout)
{
@@ -935,13 +937,16 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm,
unsigned int sdma_engine)
{
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
- KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
sdma_engine);
}
-static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
+static int destroy_queues_cpsch(struct device_queue_manager *dqm,
+ bool preempt_static_queues, bool lock)
{
int retval;
+ enum kfd_preempt_type_filter preempt_type;
+ struct kfd_process *p;
BUG_ON(!dqm);
@@ -960,8 +965,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
destroy_sdma_queues(dqm, 1);
}
+ preempt_type = preempt_static_queues ?
+ KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
- KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
+ preempt_type, 0, false, 0);
if (retval != 0)
goto out;
@@ -969,8 +978,13 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
KFD_FENCE_COMPLETED);
/* should be timed out */
- amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
+ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
+ if (retval != 0) {
+ p = kfd_get_process(current);
+ p->reset_wavefronts = true;
+ goto out;
+ }
pm_release_ib(&dqm->packets);
dqm->active_runlist = false;
@@ -989,7 +1003,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
if (lock)
mutex_lock(&dqm->lock);
- retval = destroy_queues_cpsch(dqm, false);
+ retval = destroy_queues_cpsch(dqm, false, false);
if (retval != 0) {
pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
goto out;
@@ -1024,13 +1038,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
{
int retval;
struct mqd_manager *mqd;
+ bool preempt_all_queues;
BUG_ON(!dqm || !qpd || !q);
+ preempt_all_queues = false;
+
retval = 0;
/* remove queue from list to prevent rescheduling after preemption */
mutex_lock(&dqm->lock);
+
+ if (qpd->is_debug) {
+ /*
+ * error, currently we do not allow to destroy a queue
+ * of a currently debugged process
+ */
+ retval = -EBUSY;
+ goto failed_try_destroy_debugged_queue;
+
+ }
+
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
@@ -1062,6 +1090,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
return 0;
failed:
+failed_try_destroy_debugged_queue:
+
mutex_unlock(&dqm->lock);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 57278e2d72e0..ec4036a09f3e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -88,9 +88,11 @@ struct device_queue_manager_ops {
struct queue *q,
struct qcm_process_device *qpd,
int *allocate_vmid);
+
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q);
+
int (*update_queue)(struct device_queue_manager *dqm,
struct queue *q);
@@ -100,8 +102,10 @@ struct device_queue_manager_ops {
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+
int (*unregister_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+
int (*initialize)(struct device_queue_manager *dqm);
int (*start)(struct device_queue_manager *dqm);
int (*stop)(struct device_queue_manager *dqm);
@@ -109,9 +113,11 @@ struct device_queue_manager_ops {
int (*create_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
+
void (*destroy_kernel_queue)(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd);
+
bool (*set_cache_memory_policy)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e2533d875f43..99b6d28a11c3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
num_queues = 0;
list_for_each_entry(cur, &qpd->queues_list, list)
num_queues++;
- packet->bitfields10.num_queues = num_queues;
+ packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
packet->sh_mem_config = qpd->sh_mem_config;
packet->sh_mem_bases = qpd->sh_mem_bases;
@@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
}
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q)
+ struct queue *q, bool is_static)
{
struct pm4_map_queues *packet;
+ bool use_static = is_static;
BUG_ON(!pm || !buffer || !q);
@@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
case KFD_QUEUE_TYPE_SDMA:
packet->bitfields2.engine_sel =
engine_sel__mes_map_queues__sdma0;
+ use_static = false; /* no static queues under SDMA */
break;
default:
BUG();
@@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
q->properties.doorbell_off;
+ packet->mes_map_queues_ordinals[0].bitfields3.is_static =
+ (use_static == true) ? 1 : 0;
+
packet->mes_map_queues_ordinals[0].mqd_addr_lo =
lower_32_bits(q->gart_mqd_addr);
@@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pm_release_ib(pm);
return -ENOMEM;
}
+
retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
if (retval != 0)
return retval;
+
proccesses_mapped++;
inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
alloc_size_bytes);
@@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
if (kq->queue->properties.is_active != true)
continue;
+
+ pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+ kq->queue->queue, qpd->is_debug);
+
retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
- kq->queue);
+ kq->queue, qpd->is_debug);
if (retval != 0)
return retval;
- inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
- alloc_size_bytes);
+
+ inc_wptr(&rl_wptr,
+ sizeof(struct pm4_map_queues),
+ alloc_size_bytes);
}
list_for_each_entry(q, &qpd->queues_list, list) {
if (q->properties.is_active != true)
continue;
- retval = pm_create_map_queue(pm,
- &rl_buffer[rl_wptr], q);
+
+ pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+ q->queue, qpd->is_debug);
+
+ retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
+ q, qpd->is_debug);
+
if (retval != 0)
return retval;
- inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues),
- alloc_size_bytes);
+
+ inc_wptr(&rl_wptr,
+ sizeof(struct pm4_map_queues),
+ alloc_size_bytes);
}
}
@@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet = (struct pm4_unmap_queues *)buffer;
memset(buffer, 0, sizeof(struct pm4_unmap_queues));
-
+ pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+ mode, reset, type);
packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
sizeof(struct pm4_unmap_queues));
switch (type) {
@@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
packet->bitfields2.queue_sel =
queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
break;
+ case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+ break;
default:
BUG();
break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index 071ad5724bd2..5b393f3e34a9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -237,7 +237,8 @@ struct pm4_map_queues {
struct {
union {
struct {
- uint32_t reserved5:2;
+ uint32_t is_static:1;
+ uint32_t reserved5:1;
uint32_t doorbell_offset:21;
uint32_t reserved6:3;
uint32_t queue:6;
@@ -328,7 +329,8 @@ enum unmap_queues_action_enum {
enum unmap_queues_queue_sel_enum {
queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
- queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2
+ queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
+ queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
};
enum unmap_queues_engine_sel_enum {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
new file mode 100644
index 000000000000..a0ff34878163
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
@@ -0,0 +1,290 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef KFD_PM4_HEADERS_DIQ_H_
+#define KFD_PM4_HEADERS_DIQ_H_
+
+/*--------------------_INDIRECT_BUFFER-------------------- */
+
+#ifndef _PM4__INDIRECT_BUFFER_DEFINED
+#define _PM4__INDIRECT_BUFFER_DEFINED
+enum _INDIRECT_BUFFER_cache_policy_enum {
+ cache_policy___indirect_buffer__lru = 0,
+ cache_policy___indirect_buffer__stream = 1,
+ cache_policy___indirect_buffer__bypass = 2
+};
+
+enum {
+ IT_INDIRECT_BUFFER_PASID = 0x5C
+};
+
+struct pm4__indirect_buffer_pasid {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reserved1:2;
+ unsigned int ib_base_lo:30;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int ib_base_hi:16;
+ unsigned int reserved2:16;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ unsigned int control;
+ unsigned int ordinal4;
+ };
+
+ union {
+ struct {
+ unsigned int pasid:10;
+ unsigned int reserved4:22;
+ } bitfields5;
+ unsigned int ordinal5;
+ };
+
+};
+
+#endif
+
+/*--------------------_RELEASE_MEM-------------------- */
+
+#ifndef _PM4__RELEASE_MEM_DEFINED
+#define _PM4__RELEASE_MEM_DEFINED
+enum _RELEASE_MEM_event_index_enum {
+ event_index___release_mem__end_of_pipe = 5,
+ event_index___release_mem__shader_done = 6
+};
+
+enum _RELEASE_MEM_cache_policy_enum {
+ cache_policy___release_mem__lru = 0,
+ cache_policy___release_mem__stream = 1,
+ cache_policy___release_mem__bypass = 2
+};
+
+enum _RELEASE_MEM_dst_sel_enum {
+ dst_sel___release_mem__memory_controller = 0,
+ dst_sel___release_mem__tc_l2 = 1,
+ dst_sel___release_mem__queue_write_pointer_register = 2,
+ dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum _RELEASE_MEM_int_sel_enum {
+ int_sel___release_mem__none = 0,
+ int_sel___release_mem__send_interrupt_only = 1,
+ int_sel___release_mem__send_interrupt_after_write_confirm = 2,
+ int_sel___release_mem__send_data_after_write_confirm = 3
+};
+
+enum _RELEASE_MEM_data_sel_enum {
+ data_sel___release_mem__none = 0,
+ data_sel___release_mem__send_32_bit_low = 1,
+ data_sel___release_mem__send_64_bit_data = 2,
+ data_sel___release_mem__send_gpu_clock_counter = 3,
+ data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
+ data_sel___release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4__release_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int event_type:6;
+ unsigned int reserved1:2;
+ enum _RELEASE_MEM_event_index_enum event_index:4;
+ unsigned int tcl1_vol_action_ena:1;
+ unsigned int tc_vol_action_ena:1;
+ unsigned int reserved2:1;
+ unsigned int tc_wb_action_ena:1;
+ unsigned int tcl1_action_ena:1;
+ unsigned int tc_action_ena:1;
+ unsigned int reserved3:6;
+ unsigned int atc:1;
+ enum _RELEASE_MEM_cache_policy_enum cache_policy:2;
+ unsigned int reserved4:5;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int reserved5:16;
+ enum _RELEASE_MEM_dst_sel_enum dst_sel:2;
+ unsigned int reserved6:6;
+ enum _RELEASE_MEM_int_sel_enum int_sel:3;
+ unsigned int reserved7:2;
+ enum _RELEASE_MEM_data_sel_enum data_sel:3;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int reserved8:2;
+ unsigned int address_lo_32b:30;
+ } bitfields4;
+ struct {
+ unsigned int reserved9:3;
+ unsigned int address_lo_64b:29;
+ } bitfields5;
+ unsigned int ordinal4;
+ };
+
+ unsigned int address_hi;
+
+ unsigned int data_lo;
+
+ unsigned int data_hi;
+
+};
+#endif
+
+
+/*--------------------_SET_CONFIG_REG-------------------- */
+
+#ifndef _PM4__SET_CONFIG_REG_DEFINED
+#define _PM4__SET_CONFIG_REG_DEFINED
+
+struct pm4__set_config_reg {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int reg_offset:16;
+ unsigned int reserved1:7;
+ unsigned int vmid_shift:5;
+ unsigned int insert_vmid:1;
+ unsigned int reserved2:3;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ unsigned int reg_data[1]; /*1..N of these fields */
+
+};
+#endif
+
+/*--------------------_WAIT_REG_MEM-------------------- */
+
+#ifndef _PM4__WAIT_REG_MEM_DEFINED
+#define _PM4__WAIT_REG_MEM_DEFINED
+enum _WAIT_REG_MEM_function_enum {
+ function___wait_reg_mem__always_pass = 0,
+ function___wait_reg_mem__less_than_ref_value = 1,
+ function___wait_reg_mem__less_than_equal_to_the_ref_value = 2,
+ function___wait_reg_mem__equal_to_the_reference_value = 3,
+ function___wait_reg_mem__not_equal_reference_value = 4,
+ function___wait_reg_mem__greater_than_or_equal_reference_value = 5,
+ function___wait_reg_mem__greater_than_reference_value = 6,
+ function___wait_reg_mem__reserved = 7
+};
+
+enum _WAIT_REG_MEM_mem_space_enum {
+ mem_space___wait_reg_mem__register_space = 0,
+ mem_space___wait_reg_mem__memory_space = 1
+};
+
+enum _WAIT_REG_MEM_operation_enum {
+ operation___wait_reg_mem__wait_reg_mem = 0,
+ operation___wait_reg_mem__wr_wait_wr_reg = 1
+};
+
+struct pm4__wait_reg_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ enum _WAIT_REG_MEM_function_enum function:3;
+ unsigned int reserved1:1;
+ enum _WAIT_REG_MEM_mem_space_enum mem_space:2;
+ enum _WAIT_REG_MEM_operation_enum operation:2;
+ unsigned int reserved2:24;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int reserved3:2;
+ unsigned int memory_poll_addr_lo:30;
+ } bitfields3;
+ struct {
+ unsigned int register_poll_addr:16;
+ unsigned int reserved4:16;
+ } bitfields4;
+ struct {
+ unsigned int register_write_addr:16;
+ unsigned int reserved5:16;
+ } bitfields5;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int poll_address_hi:16;
+ unsigned int reserved6:16;
+ } bitfields6;
+ struct {
+ unsigned int register_write_addr:16;
+ unsigned int reserved7:16;
+ } bitfields7;
+ unsigned int ordinal4;
+ };
+
+ unsigned int reference;
+
+ unsigned int mask;
+
+ union {
+ struct {
+ unsigned int poll_interval:16;
+ unsigned int reserved8:16;
+ } bitfields8;
+ unsigned int ordinal7;
+ };
+
+};
+#endif
+
+
+#endif /* KFD_PM4_HEADERS_DIQ_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index b6f838f56589..cb79046e5c80 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -128,6 +128,7 @@ struct kfd_device_info {
unsigned int asic_family;
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
+ unsigned int max_no_of_hqd;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
@@ -167,8 +168,8 @@ struct kfd_dev {
const struct kfd2kgd_calls *kfd2kgd;
struct mutex doorbell_mutex;
- unsigned long doorbell_available_index[DIV_ROUND_UP(
- KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
+ DECLARE_BITMAP(doorbell_available_index,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
void *gtt_mem;
uint64_t gtt_start_gpu_addr;
@@ -195,6 +196,9 @@ struct kfd_dev {
* from the HW ring into a SW ring.
*/
bool interrupts_active;
+
+ /* Debug manager */
+ struct kfd_dbgmgr *dbgmgr;
};
/* KGD2KFD callbacks */
@@ -231,6 +235,7 @@ struct device *kfd_chardev(void);
enum kfd_preempt_type_filter {
KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
+ KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
KFD_PREEMPT_TYPE_FILTER_BY_PASID
};
@@ -503,8 +508,6 @@ struct kfd_process {
/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
struct kfd_queue **queues;
- unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)];
-
/*Is the user space process 32 bit?*/
bool is_32bit_user_mode;
@@ -516,6 +519,11 @@ struct kfd_process {
event_pages */
u32 next_nonsignal_event_id;
size_t signal_event_count;
+ /*
+ * This flag tells if we should reset all wavefronts on
+ * process termination
+ */
+ bool reset_wavefronts;
};
/**
@@ -650,6 +658,12 @@ int pqm_create_queue(struct process_queue_manager *pqm,
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
+struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
+ unsigned int qid);
+
+int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
+ unsigned int fence_value,
+ unsigned long timeout);
/* Packet Manager */
@@ -717,4 +731,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
+int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index dc910af2bb3c..56b904f5bdb1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -31,6 +31,7 @@
struct mm_struct;
#include "kfd_priv.h"
+#include "kfd_dbgmgr.h"
/*
* Initial size for the array of queues.
@@ -172,6 +173,9 @@ static void kfd_process_wq_release(struct work_struct *work)
pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
pdd->dev->id, p->pasid);
+ if (p->reset_wavefronts)
+ dbgdev_wave_reset_wavefronts(pdd->dev, p);
+
amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
list_del(&pdd->per_device_list);
@@ -301,6 +305,8 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (kfd_init_apertures(process) != 0)
goto err_init_apretures;
+ process->reset_wavefronts = false;
+
return process;
err_init_apretures:
@@ -399,7 +405,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
mutex_lock(&p->mutex);
+ if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+ kfd_dbgmgr_destroy(dev->dbgmgr);
+
pqm_uninit(&p->pqm);
+ if (p->reset_wavefronts)
+ dbgdev_wave_reset_wavefronts(dev, p);
pdd = kfd_get_process_device_data(dev, p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 530b82c4e78b..7b69070f7ecc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct queue *q;
struct process_queue_node *pqn;
struct kernel_queue *kq;
+ int num_queues = 0;
+ struct queue *cur;
BUG_ON(!pqm || !dev || !properties || !qid);
@@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
return -1;
}
+ /*
+ * for debug process, verify that it is within the static queues limit
+ * currently limit is set to half of the total avail HQD slots
+ * If we are just about to create DIQ, the is_debug flag is not set yet
+ * Hence we also check the type as well
+ */
+ if ((pdd->qpd.is_debug) ||
+ (type == KFD_QUEUE_TYPE_DIQ)) {
+ list_for_each_entry(cur, &pdd->qpd.queues_list, list)
+ num_queues++;
+ if (num_queues >= dev->device_info->max_no_of_hqd/2)
+ return (-ENOSPC);
+ }
+
retval = find_available_queue_slot(pqm, qid);
if (retval != 0)
return retval;
@@ -341,7 +357,7 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
return 0;
}
-static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue(
+struct kernel_queue *pqm_get_kernel_queue(
struct process_queue_manager *pqm,
unsigned int qid)
{
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 4ea21ae88b07..9080daa116b6 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -163,6 +163,27 @@ struct kfd2kgd_calls {
int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd,
unsigned int timeout);
+ int (*address_watch_disable)(struct kgd_dev *kgd);
+ int (*address_watch_execute)(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+ int (*wave_control_execute)(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+ uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+ bool (*get_atc_vmid_pasid_mapping_valid)(
+ struct kgd_dev *kgd,
+ uint8_t vmid);
+ uint16_t (*get_atc_vmid_pasid_mapping_pasid)(
+ struct kgd_dev *kgd,
+ uint8_t vmid);
+ void (*write_vmid_invalidate_request)(struct kgd_dev *kgd,
+ uint8_t vmid);
+
uint16_t (*get_fw_version)(struct kgd_dev *kgd,
enum kgd_engine_type type);
};
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 1134526286c8..3427b115e2bb 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -825,7 +825,7 @@ static u64 drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *entry)
hole_start = drm_mm_hole_node_start(entry);
hole_end = drm_mm_hole_node_end(entry);
hole_size = hole_end - hole_start;
- seq_printf(m, "%#llx-%#llx: %llu: free\n", hole_start,
+ seq_printf(m, "%#018llx-%#018llx: %llu: free\n", hole_start,
hole_end, hole_size);
return hole_size;
}
@@ -846,7 +846,7 @@ int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm)
total_free += drm_mm_dump_hole(m, &mm->head_node);
drm_mm_for_each_node(entry, mm) {
- seq_printf(m, "%#016llx-%#016llx: %llu: used\n", entry->start,
+ seq_printf(m, "%#018llx-%#018llx: %llu: used\n", entry->start,
entry->start + entry->size, entry->size);
total_used += entry->size;
total_free += drm_mm_dump_hole(m, entry);
diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c
index b728523e194f..2aaa3c88999e 100644
--- a/drivers/gpu/drm/i2c/adv7511.c
+++ b/drivers/gpu/drm/i2c/adv7511.c
@@ -438,7 +438,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
- if (irq0 & ADV7511_INT0_HDP)
+ if (irq0 & ADV7511_INT0_HDP && adv7511->encoder)
drm_helper_hpd_irq_event(adv7511->encoder->dev);
if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 011e1cac3e4c..fe1599d75f14 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -644,7 +644,8 @@ tda998x_write_avi(struct tda998x_priv *priv, struct drm_display_mode *mode)
len = hdmi_avi_infoframe_pack(&frame, buf, sizeof(buf));
if (len < 0) {
- dev_err(&priv->hdmi->dev, "hdmi_avi_infoframe_pack() failed: %d\n", len);
+ dev_err(&priv->hdmi->dev,
+ "hdmi_avi_infoframe_pack() failed: %zd\n", len);
return;
}
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index adbbddab42c6..88cc793c46d3 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -120,10 +120,13 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
static void
describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct intel_engine_cs *ring;
struct i915_vma *vma;
int pin_count = 0;
+ int i;
- seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s",
+ seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x [ ",
&obj->base,
obj->active ? "*" : " ",
get_pin_flag(obj),
@@ -131,8 +134,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
get_global_flag(obj),
obj->base.size / 1024,
obj->base.read_domains,
- obj->base.write_domain,
- i915_gem_request_get_seqno(obj->last_read_req),
+ obj->base.write_domain);
+ for_each_ring(ring, dev_priv, i)
+ seq_printf(m, "%x ",
+ i915_gem_request_get_seqno(obj->last_read_req[i]));
+ seq_printf(m, "] %x %x%s%s%s",
i915_gem_request_get_seqno(obj->last_write_req),
i915_gem_request_get_seqno(obj->last_fenced_req),
i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
@@ -169,9 +175,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
*t = '\0';
seq_printf(m, " (%s mappable)", s);
}
- if (obj->last_read_req != NULL)
+ if (obj->last_write_req != NULL)
seq_printf(m, " (%s)",
- i915_gem_request_get_ring(obj->last_read_req)->name);
+ i915_gem_request_get_ring(obj->last_write_req)->name);
if (obj->frontbuffer_bits)
seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
}
@@ -665,7 +671,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring;
- struct drm_i915_gem_request *rq;
+ struct drm_i915_gem_request *req;
int ret, any, i;
ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -677,22 +683,22 @@ static int i915_gem_request_info(struct seq_file *m, void *data)
int count;
count = 0;
- list_for_each_entry(rq, &ring->request_list, list)
+ list_for_each_entry(req, &ring->request_list, list)
count++;
if (count == 0)
continue;
seq_printf(m, "%s requests: %d\n", ring->name, count);
- list_for_each_entry(rq, &ring->request_list, list) {
+ list_for_each_entry(req, &ring->request_list, list) {
struct task_struct *task;
rcu_read_lock();
task = NULL;
- if (rq->pid)
- task = pid_task(rq->pid, PIDTYPE_PID);
+ if (req->pid)
+ task = pid_task(req->pid, PIDTYPE_PID);
seq_printf(m, " %x @ %d: %s [%d]\n",
- rq->seqno,
- (int) (jiffies - rq->emitted_jiffies),
+ req->seqno,
+ (int) (jiffies - req->emitted_jiffies),
task ? task->comm : "<unknown>",
task ? task->pid : -1);
rcu_read_unlock();
@@ -2276,22 +2282,35 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
return 0;
}
+static int count_irq_waiters(struct drm_i915_private *i915)
+{
+ struct intel_engine_cs *ring;
+ int count = 0;
+ int i;
+
+ for_each_ring(ring, i915, i)
+ count += ring->irq_refcount;
+
+ return count;
+}
+
static int i915_rps_boost_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_file *file;
- int ret;
-
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
-
- ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock);
- if (ret)
- goto unlock;
+ seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
+ seq_printf(m, "GPU busy? %d\n", dev_priv->mm.busy);
+ seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
+ seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n",
+ intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
+ intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
+ intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit),
+ intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit),
+ intel_gpu_freq(dev_priv, dev_priv->rps.max_freq));
+ spin_lock(&dev_priv->rps.client_lock);
list_for_each_entry_reverse(file, &dev->filelist, lhead) {
struct drm_i915_file_private *file_priv = file->driver_priv;
struct task_struct *task;
@@ -2301,17 +2320,20 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
seq_printf(m, "%s [%d]: %d boosts%s\n",
task ? task->comm : "<unknown>",
task ? task->pid : -1,
- file_priv->rps_boosts,
- list_empty(&file_priv->rps_boost) ? "" : ", active");
+ file_priv->rps.boosts,
+ list_empty(&file_priv->rps.link) ? "" : ", active");
rcu_read_unlock();
}
+ seq_printf(m, "Semaphore boosts: %d%s\n",
+ dev_priv->rps.semaphores.boosts,
+ list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active");
+ seq_printf(m, "MMIO flip boosts: %d%s\n",
+ dev_priv->rps.mmioflips.boosts,
+ list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active");
seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts);
+ spin_unlock(&dev_priv->rps.client_lock);
- mutex_unlock(&dev_priv->rps.hw_lock);
-unlock:
- mutex_unlock(&dev->struct_mutex);
-
- return ret;
+ return 0;
}
static int i915_llc(struct seq_file *m, void *data)
@@ -5154,6 +5176,9 @@ static int i915_dpcd_show(struct seq_file *m, void *data)
ssize_t err;
int i;
+ if (connector->status != connector_status_connected)
+ return -ENODEV;
+
for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) {
const struct dpcd_block *b = &i915_dpcd_debug[i];
size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a238889630d8..d2df321ba634 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -814,7 +814,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
spin_lock_init(&dev_priv->uncore.lock);
spin_lock_init(&dev_priv->mm.object_stat_lock);
spin_lock_init(&dev_priv->mmio_flip_lock);
- mutex_init(&dev_priv->dpio_lock);
+ mutex_init(&dev_priv->sb_lock);
mutex_init(&dev_priv->modeset_restore_lock);
mutex_init(&dev_priv->csr_lock);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 51149fb75e96..884b4f9b81c4 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -595,6 +595,7 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv);
static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
bool rpm_resume);
static int skl_resume_prepare(struct drm_i915_private *dev_priv);
+static int bxt_resume_prepare(struct drm_i915_private *dev_priv);
static int i915_drm_suspend(struct drm_device *dev)
@@ -811,14 +812,17 @@ static int i915_drm_resume_early(struct drm_device *dev)
if (IS_VALLEYVIEW(dev_priv))
ret = vlv_resume_prepare(dev_priv, false);
if (ret)
- DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret);
+ DRM_ERROR("Resume prepare failed: %d, continuing anyway\n",
+ ret);
intel_uncore_early_sanitize(dev, true);
- if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
- hsw_disable_pc8(dev_priv);
+ if (IS_BROXTON(dev))
+ ret = bxt_resume_prepare(dev_priv);
else if (IS_SKYLAKE(dev_priv))
ret = skl_resume_prepare(dev_priv);
+ else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+ hsw_disable_pc8(dev_priv);
intel_uncore_sanitize(dev);
intel_power_domains_init_hw(dev_priv);
@@ -989,7 +993,7 @@ static int i915_pm_suspend_late(struct device *dev)
struct drm_device *drm_dev = dev_to_i915(dev)->dev;
/*
- * We have a suspedn ordering issue with the snd-hda driver also
+ * We have a suspend ordering issue with the snd-hda driver also
* requiring our device to be power up. Due to the lack of a
* parent/child relationship we currently solve this with an late
* suspend hook.
@@ -1043,6 +1047,8 @@ static int skl_suspend_complete(struct drm_i915_private *dev_priv)
*/
intel_csr_load_status_set(dev_priv, FW_UNINITIALIZED);
+ skl_uninit_cdclk(dev_priv);
+
return 0;
}
@@ -1089,6 +1095,7 @@ static int skl_resume_prepare(struct drm_i915_private *dev_priv)
{
struct drm_device *dev = dev_priv->dev;
+ skl_init_cdclk(dev_priv);
intel_csr_load_program(dev);
return 0;
@@ -1586,16 +1593,15 @@ static int intel_runtime_resume(struct device *device)
*/
static int intel_suspend_complete(struct drm_i915_private *dev_priv)
{
- struct drm_device *dev = dev_priv->dev;
int ret;
- if (IS_BROXTON(dev))
+ if (IS_BROXTON(dev_priv))
ret = bxt_suspend_complete(dev_priv);
- else if (IS_SKYLAKE(dev))
+ else if (IS_SKYLAKE(dev_priv))
ret = skl_suspend_complete(dev_priv);
- else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+ else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
ret = hsw_suspend_complete(dev_priv);
- else if (IS_VALLEYVIEW(dev))
+ else if (IS_VALLEYVIEW(dev_priv))
ret = vlv_suspend_complete(dev_priv);
else
ret = 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index acfa4fc93803..72f5a3f9dbf2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,7 +56,7 @@
#define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics"
-#define DRIVER_DATE "20150508"
+#define DRIVER_DATE "20150522"
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
@@ -272,6 +272,30 @@ struct drm_i915_private;
struct i915_mm_struct;
struct i915_mmu_object;
+struct drm_i915_file_private {
+ struct drm_i915_private *dev_priv;
+ struct drm_file *file;
+
+ struct {
+ spinlock_t lock;
+ struct list_head request_list;
+/* 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+ } mm;
+ struct idr context_idr;
+
+ struct intel_rps_client {
+ struct list_head link;
+ unsigned boosts;
+ } rps;
+
+ struct intel_engine_cs *bsd_ring;
+};
+
enum intel_dpll_id {
DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
/* real shared dpll ids must be >= 0 */
@@ -309,7 +333,7 @@ struct intel_dpll_hw_state {
uint32_t cfgcr1, cfgcr2;
/* bxt */
- uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pcsdw12;
+ uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pll10, pcsdw12;
};
struct intel_shared_dpll_config {
@@ -508,7 +532,7 @@ struct drm_i915_error_state {
struct drm_i915_error_buffer {
u32 size;
u32 name;
- u32 rseqno, wseqno;
+ u32 rseqno[I915_NUM_RINGS], wseqno;
u32 gtt_offset;
u32 read_domains;
u32 write_domain;
@@ -1065,17 +1089,24 @@ struct intel_gen6_power_mgmt {
int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
+ spinlock_t client_lock;
+ struct list_head clients;
+ bool client_boost;
+
bool enabled;
struct delayed_work delayed_resume_work;
- struct list_head clients;
unsigned boosts;
+ struct intel_rps_client semaphores, mmioflips;
+
/* manual wa residency calculations */
struct intel_rps_ei up_ei, down_ei;
/*
* Protects RPS/RC6 register access and PCU communication.
- * Must be taken after struct_mutex if nested.
+ * Must be taken after struct_mutex if nested. Note that
+ * this lock may be held for long periods of time when
+ * talking to hw - so only take it when talking to hw!
*/
struct mutex hw_lock;
};
@@ -1468,7 +1499,8 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1,
struct skl_ddb_allocation {
struct skl_ddb_entry pipe[I915_MAX_PIPES];
- struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES];
+ struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */
+ struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* y-plane */
struct skl_ddb_entry cursor[I915_MAX_PIPES];
};
@@ -1634,8 +1666,8 @@ struct drm_i915_private {
/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
struct pm_qos_request pm_qos;
- /* DPIO indirect register protection */
- struct mutex dpio_lock;
+ /* Sideband mailbox protection */
+ struct mutex sb_lock;
/** Cached value of IMR to avoid reads in updating the bitfield */
union {
@@ -1684,6 +1716,7 @@ struct drm_i915_private {
int num_fence_regs; /* 8 on pre-965, 16 otherwise */
unsigned int fsb_freq, mem_freq, is_ddr3;
+ unsigned int skl_boot_cdclk;
unsigned int cdclk_freq;
unsigned int hpll_freq;
@@ -1938,7 +1971,7 @@ struct drm_i915_gem_object {
struct drm_mm_node *stolen;
struct list_head global_list;
- struct list_head ring_list;
+ struct list_head ring_list[I915_NUM_RINGS];
/** Used in execbuf to temporarily hold a ref */
struct list_head obj_exec_link;
@@ -1949,7 +1982,7 @@ struct drm_i915_gem_object {
* rendering and so a non-zero seqno), and is not set if it i s on
* inactive (ready to be unbound) list.
*/
- unsigned int active:1;
+ unsigned int active:I915_NUM_RINGS;
/**
* This is set if the object has been written to since last bound
@@ -2020,8 +2053,17 @@ struct drm_i915_gem_object {
void *dma_buf_vmapping;
int vmapping_count;
- /** Breadcrumb of last rendering to the buffer. */
- struct drm_i915_gem_request *last_read_req;
+ /** Breadcrumb of last rendering to the buffer.
+ * There can only be one writer, but we allow for multiple readers.
+ * If there is a writer that necessarily implies that all other
+ * read requests are complete - but we may only be lazily clearing
+ * the read requests. A read request is naturally the most recent
+ * request on a ring, so we may have two different write and read
+ * requests on one ring where the write request is older than the
+ * read request. This allows for the CPU to read from an active
+ * buffer by only waiting for the write to complete.
+ * */
+ struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS];
struct drm_i915_gem_request *last_write_req;
/** Breadcrumb of last fenced GPU access to the buffer. */
struct drm_i915_gem_request *last_fenced_req;
@@ -2160,10 +2202,12 @@ i915_gem_request_get_ring(struct drm_i915_gem_request *req)
return req ? req->ring : NULL;
}
-static inline void
+static inline struct drm_i915_gem_request *
i915_gem_request_reference(struct drm_i915_gem_request *req)
{
- kref_get(&req->ref);
+ if (req)
+ kref_get(&req->ref);
+ return req;
}
static inline void
@@ -2204,22 +2248,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
* a later patch when the call to i915_seqno_passed() is obsoleted...
*/
-struct drm_i915_file_private {
- struct drm_i915_private *dev_priv;
- struct drm_file *file;
-
- struct {
- spinlock_t lock;
- struct list_head request_list;
- } mm;
- struct idr context_idr;
-
- struct list_head rps_boost;
- struct intel_engine_cs *bsd_ring;
-
- unsigned rps_boosts;
-};
-
/*
* A command that requires special handling by the command parser.
*/
@@ -2375,6 +2403,7 @@ struct drm_i915_cmd_table {
#define SKL_REVID_C0 (0x2)
#define SKL_REVID_D0 (0x3)
#define SKL_REVID_E0 (0x4)
+#define SKL_REVID_F0 (0x5)
#define BXT_REVID_A0 (0x0)
#define BXT_REVID_B0 (0x3)
@@ -2445,6 +2474,9 @@ struct drm_i915_cmd_table {
#define HAS_IPS(dev) (IS_HSW_ULT(dev) || IS_BROADWELL(dev))
+#define HAS_DP_MST(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \
+ INTEL_INFO(dev)->gen >= 9)
+
#define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi)
#define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg)
#define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \
@@ -2820,7 +2852,6 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv)
void i915_gem_reset(struct drm_device *dev);
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
-int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init(struct drm_device *dev);
int i915_gem_init_rings(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -2838,10 +2869,13 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
unsigned reset_counter,
bool interruptible,
s64 *timeout,
- struct drm_i915_file_private *file_priv);
+ struct intel_rps_client *rps);
int __must_check i915_wait_request(struct drm_i915_gem_request *req);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int __must_check
+i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
+ bool readonly);
+int __must_check
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
int __must_check
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f128ed8d6f65..be35f0486202 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -38,14 +38,14 @@
#include <linux/pci.h>
#include <linux/dma-buf.h>
+#define RQ_BUG_ON(expr)
+
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static __must_check int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- bool readonly);
static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj);
-
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
+static void
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
static void i915_gem_write_fence(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj);
static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
@@ -518,8 +518,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
ret = i915_gem_object_wait_rendering(obj, true);
if (ret)
return ret;
-
- i915_gem_object_retire(obj);
}
ret = i915_gem_object_get_pages(obj);
@@ -939,8 +937,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
-
- i915_gem_object_retire(obj);
}
/* Same trick applies to invalidate partially written cachelines read
* before writing. */
@@ -1181,16 +1177,16 @@ static bool missed_irq(struct drm_i915_private *dev_priv,
return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
}
-static int __i915_spin_request(struct drm_i915_gem_request *rq)
+static int __i915_spin_request(struct drm_i915_gem_request *req)
{
unsigned long timeout;
- if (i915_gem_request_get_ring(rq)->irq_refcount)
+ if (i915_gem_request_get_ring(req)->irq_refcount)
return -EBUSY;
timeout = jiffies + 1;
while (!need_resched()) {
- if (i915_gem_request_completed(rq, true))
+ if (i915_gem_request_completed(req, true))
return 0;
if (time_after_eq(jiffies, timeout))
@@ -1198,7 +1194,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *rq)
cpu_relax_lowlatency();
}
- if (i915_gem_request_completed(rq, false))
+ if (i915_gem_request_completed(req, false))
return 0;
return -EAGAIN;
@@ -1225,7 +1221,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
unsigned reset_counter,
bool interruptible,
s64 *timeout,
- struct drm_i915_file_private *file_priv)
+ struct intel_rps_client *rps)
{
struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
struct drm_device *dev = ring->dev;
@@ -1239,14 +1235,17 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
+ if (list_empty(&req->list))
+ return 0;
+
if (i915_gem_request_completed(req, true))
return 0;
timeout_expire = timeout ?
jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0;
- if (INTEL_INFO(dev)->gen >= 6)
- gen6_rps_boost(dev_priv, file_priv);
+ if (INTEL_INFO(dev_priv)->gen >= 6)
+ gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
/* Record current time in case interrupted by signal, or wedged */
trace_i915_gem_request_wait_begin(req);
@@ -1338,6 +1337,63 @@ out:
return ret;
}
+static inline void
+i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
+{
+ struct drm_i915_file_private *file_priv = request->file_priv;
+
+ if (!file_priv)
+ return;
+
+ spin_lock(&file_priv->mm.lock);
+ list_del(&request->client_list);
+ request->file_priv = NULL;
+ spin_unlock(&file_priv->mm.lock);
+}
+
+static void i915_gem_request_retire(struct drm_i915_gem_request *request)
+{
+ trace_i915_gem_request_retire(request);
+
+ /* We know the GPU must have read the request to have
+ * sent us the seqno + interrupt, so use the position
+ * of tail of the request to update the last known position
+ * of the GPU head.
+ *
+ * Note this requires that we are always called in request
+ * completion order.
+ */
+ request->ringbuf->last_retired_head = request->postfix;
+
+ list_del_init(&request->list);
+ i915_gem_request_remove_from_client(request);
+
+ put_pid(request->pid);
+
+ i915_gem_request_unreference(request);
+}
+
+static void
+__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
+{
+ struct intel_engine_cs *engine = req->ring;
+ struct drm_i915_gem_request *tmp;
+
+ lockdep_assert_held(&engine->dev->struct_mutex);
+
+ if (list_empty(&req->list))
+ return;
+
+ do {
+ tmp = list_first_entry(&engine->request_list,
+ typeof(*tmp), list);
+
+ i915_gem_request_retire(tmp);
+ } while (tmp != req);
+
+ WARN_ON(i915_verify_lists(engine->dev));
+}
+
/**
* Waits for a request to be signaled, and cleans up the
* request and object lists appropriately for that event.
@@ -1348,7 +1404,6 @@ i915_wait_request(struct drm_i915_gem_request *req)
struct drm_device *dev;
struct drm_i915_private *dev_priv;
bool interruptible;
- unsigned reset_counter;
int ret;
BUG_ON(req == NULL);
@@ -1367,29 +1422,13 @@ i915_wait_request(struct drm_i915_gem_request *req)
if (ret)
return ret;
- reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
- i915_gem_request_reference(req);
- ret = __i915_wait_request(req, reset_counter,
+ ret = __i915_wait_request(req,
+ atomic_read(&dev_priv->gpu_error.reset_counter),
interruptible, NULL, NULL);
- i915_gem_request_unreference(req);
- return ret;
-}
-
-static int
-i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj)
-{
- if (!obj->active)
- return 0;
-
- /* Manually manage the write flush as we may have not yet
- * retired the buffer.
- *
- * Note that the last_write_req is always the earlier of
- * the two (read/write) requests, so if we haved successfully waited,
- * we know we have passed the last write.
- */
- i915_gem_request_assign(&obj->last_write_req, NULL);
+ if (ret)
+ return ret;
+ __i915_gem_request_retire__upto(req);
return 0;
}
@@ -1397,22 +1436,56 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj)
* Ensures that all rendering to the object has completed and the object is
* safe to unbind from the GTT or access from the CPU.
*/
-static __must_check int
+int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
bool readonly)
{
- struct drm_i915_gem_request *req;
- int ret;
+ int ret, i;
- req = readonly ? obj->last_write_req : obj->last_read_req;
- if (!req)
+ if (!obj->active)
return 0;
- ret = i915_wait_request(req);
- if (ret)
- return ret;
+ if (readonly) {
+ if (obj->last_write_req != NULL) {
+ ret = i915_wait_request(obj->last_write_req);
+ if (ret)
+ return ret;
- return i915_gem_object_wait_rendering__tail(obj);
+ i = obj->last_write_req->ring->id;
+ if (obj->last_read_req[i] == obj->last_write_req)
+ i915_gem_object_retire__read(obj, i);
+ else
+ i915_gem_object_retire__write(obj);
+ }
+ } else {
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ if (obj->last_read_req[i] == NULL)
+ continue;
+
+ ret = i915_wait_request(obj->last_read_req[i]);
+ if (ret)
+ return ret;
+
+ i915_gem_object_retire__read(obj, i);
+ }
+ RQ_BUG_ON(obj->active);
+ }
+
+ return 0;
+}
+
+static void
+i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
+ struct drm_i915_gem_request *req)
+{
+ int ring = req->ring->id;
+
+ if (obj->last_read_req[ring] == req)
+ i915_gem_object_retire__read(obj, ring);
+ else if (obj->last_write_req == req)
+ i915_gem_object_retire__write(obj);
+
+ __i915_gem_request_retire__upto(req);
}
/* A nonblocking variant of the above wait. This is a highly dangerous routine
@@ -1420,40 +1493,75 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
*/
static __must_check int
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
- struct drm_i915_file_private *file_priv,
+ struct intel_rps_client *rps,
bool readonly)
{
- struct drm_i915_gem_request *req;
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_gem_request *requests[I915_NUM_RINGS];
unsigned reset_counter;
- int ret;
+ int ret, i, n = 0;
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
BUG_ON(!dev_priv->mm.interruptible);
- req = readonly ? obj->last_write_req : obj->last_read_req;
- if (!req)
+ if (!obj->active)
return 0;
ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
if (ret)
return ret;
- ret = i915_gem_check_olr(req);
- if (ret)
- return ret;
-
reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
- i915_gem_request_reference(req);
+
+ if (readonly) {
+ struct drm_i915_gem_request *req;
+
+ req = obj->last_write_req;
+ if (req == NULL)
+ return 0;
+
+ ret = i915_gem_check_olr(req);
+ if (ret)
+ goto err;
+
+ requests[n++] = i915_gem_request_reference(req);
+ } else {
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ struct drm_i915_gem_request *req;
+
+ req = obj->last_read_req[i];
+ if (req == NULL)
+ continue;
+
+ ret = i915_gem_check_olr(req);
+ if (ret)
+ goto err;
+
+ requests[n++] = i915_gem_request_reference(req);
+ }
+ }
+
mutex_unlock(&dev->struct_mutex);
- ret = __i915_wait_request(req, reset_counter, true, NULL, file_priv);
+ for (i = 0; ret == 0 && i < n; i++)
+ ret = __i915_wait_request(requests[i], reset_counter, true,
+ NULL, rps);
mutex_lock(&dev->struct_mutex);
- i915_gem_request_unreference(req);
- if (ret)
- return ret;
- return i915_gem_object_wait_rendering__tail(obj);
+err:
+ for (i = 0; i < n; i++) {
+ if (ret == 0)
+ i915_gem_object_retire_request(obj, requests[i]);
+ i915_gem_request_unreference(requests[i]);
+ }
+
+ return ret;
+}
+
+static struct intel_rps_client *to_rps_client(struct drm_file *file)
+{
+ struct drm_i915_file_private *fpriv = file->driver_priv;
+ return &fpriv->rps;
}
/**
@@ -1498,7 +1606,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
* to catch cases where we are gazumped.
*/
ret = i915_gem_object_wait_rendering__nonblocking(obj,
- file->driver_priv,
+ to_rps_client(file),
!write_domain);
if (ret)
goto unref;
@@ -1919,7 +2027,6 @@ i915_gem_mmap_gtt(struct drm_file *file,
uint32_t handle,
uint64_t *offset)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj;
int ret;
@@ -2235,78 +2342,58 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
return 0;
}
-static void
-i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
- struct intel_engine_cs *ring)
+void i915_vma_move_to_active(struct i915_vma *vma,
+ struct intel_engine_cs *ring)
{
- struct drm_i915_gem_request *req;
- struct intel_engine_cs *old_ring;
-
- BUG_ON(ring == NULL);
-
- req = intel_ring_get_request(ring);
- old_ring = i915_gem_request_get_ring(obj->last_read_req);
-
- if (old_ring != ring && obj->last_write_req) {
- /* Keep the request relative to the current ring */
- i915_gem_request_assign(&obj->last_write_req, req);
- }
+ struct drm_i915_gem_object *obj = vma->obj;
/* Add a reference if we're newly entering the active list. */
- if (!obj->active) {
+ if (obj->active == 0)
drm_gem_object_reference(&obj->base);
- obj->active = 1;
- }
+ obj->active |= intel_ring_flag(ring);
- list_move_tail(&obj->ring_list, &ring->active_list);
+ list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
+ i915_gem_request_assign(&obj->last_read_req[ring->id],
+ intel_ring_get_request(ring));
- i915_gem_request_assign(&obj->last_read_req, req);
+ list_move_tail(&vma->mm_list, &vma->vm->active_list);
}
-void i915_vma_move_to_active(struct i915_vma *vma,
- struct intel_engine_cs *ring)
+static void
+i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
{
- list_move_tail(&vma->mm_list, &vma->vm->active_list);
- return i915_gem_object_move_to_active(vma->obj, ring);
+ RQ_BUG_ON(obj->last_write_req == NULL);
+ RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
+
+ i915_gem_request_assign(&obj->last_write_req, NULL);
+ intel_fb_obj_flush(obj, true);
}
static void
-i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
+i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
{
struct i915_vma *vma;
- BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
- BUG_ON(!obj->active);
+ RQ_BUG_ON(obj->last_read_req[ring] == NULL);
+ RQ_BUG_ON(!(obj->active & (1 << ring)));
+
+ list_del_init(&obj->ring_list[ring]);
+ i915_gem_request_assign(&obj->last_read_req[ring], NULL);
+
+ if (obj->last_write_req && obj->last_write_req->ring->id == ring)
+ i915_gem_object_retire__write(obj);
+
+ obj->active &= ~(1 << ring);
+ if (obj->active)
+ return;
list_for_each_entry(vma, &obj->vma_list, vma_link) {
if (!list_empty(&vma->mm_list))
list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
}
- intel_fb_obj_flush(obj, true);
-
- list_del_init(&obj->ring_list);
-
- i915_gem_request_assign(&obj->last_read_req, NULL);
- i915_gem_request_assign(&obj->last_write_req, NULL);
- obj->base.write_domain = 0;
-
i915_gem_request_assign(&obj->last_fenced_req, NULL);
-
- obj->active = 0;
drm_gem_object_unreference(&obj->base);
-
- WARN_ON(i915_verify_lists(dev));
-}
-
-static void
-i915_gem_object_retire(struct drm_i915_gem_object *obj)
-{
- if (obj->last_read_req == NULL)
- return;
-
- if (i915_gem_request_completed(obj->last_read_req, true))
- i915_gem_object_move_to_inactive(obj);
}
static int
@@ -2483,20 +2570,6 @@ int __i915_add_request(struct intel_engine_cs *ring,
return 0;
}
-static inline void
-i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
-{
- struct drm_i915_file_private *file_priv = request->file_priv;
-
- if (!file_priv)
- return;
-
- spin_lock(&file_priv->mm.lock);
- list_del(&request->client_list);
- request->file_priv = NULL;
- spin_unlock(&file_priv->mm.lock);
-}
-
static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
const struct intel_context *ctx)
{
@@ -2542,16 +2615,6 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
}
}
-static void i915_gem_free_request(struct drm_i915_gem_request *request)
-{
- list_del(&request->list);
- i915_gem_request_remove_from_client(request);
-
- put_pid(request->pid);
-
- i915_gem_request_unreference(request);
-}
-
void i915_gem_request_free(struct kref *req_ref)
{
struct drm_i915_gem_request *req = container_of(req_ref,
@@ -2576,38 +2639,38 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
struct intel_context *ctx)
{
struct drm_i915_private *dev_priv = to_i915(ring->dev);
- struct drm_i915_gem_request *rq;
+ struct drm_i915_gem_request *req;
int ret;
if (ring->outstanding_lazy_request)
return 0;
- rq = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
- if (rq == NULL)
+ req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+ if (req == NULL)
return -ENOMEM;
- kref_init(&rq->ref);
- rq->i915 = dev_priv;
+ kref_init(&req->ref);
+ req->i915 = dev_priv;
- ret = i915_gem_get_seqno(ring->dev, &rq->seqno);
- if (ret) {
- kfree(rq);
- return ret;
- }
+ ret = i915_gem_get_seqno(ring->dev, &req->seqno);
+ if (ret)
+ goto err;
- rq->ring = ring;
+ req->ring = ring;
if (i915.enable_execlists)
- ret = intel_logical_ring_alloc_request_extras(rq, ctx);
+ ret = intel_logical_ring_alloc_request_extras(req, ctx);
else
- ret = intel_ring_alloc_request_extras(rq);
- if (ret) {
- kfree(rq);
- return ret;
- }
+ ret = intel_ring_alloc_request_extras(req);
+ if (ret)
+ goto err;
- ring->outstanding_lazy_request = rq;
+ ring->outstanding_lazy_request = req;
return 0;
+
+err:
+ kmem_cache_free(dev_priv->requests, req);
+ return ret;
}
struct drm_i915_gem_request *
@@ -2652,9 +2715,9 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
obj = list_first_entry(&ring->active_list,
struct drm_i915_gem_object,
- ring_list);
+ ring_list[ring->id]);
- i915_gem_object_move_to_inactive(obj);
+ i915_gem_object_retire__read(obj, ring->id);
}
/*
@@ -2690,7 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
struct drm_i915_gem_request,
list);
- i915_gem_free_request(request);
+ i915_gem_request_retire(request);
}
/* This may not have been flushed before the reset, so clean it now */
@@ -2738,6 +2801,8 @@ void i915_gem_reset(struct drm_device *dev)
i915_gem_context_reset(dev);
i915_gem_restore_fences(dev);
+
+ WARN_ON(i915_verify_lists(dev));
}
/**
@@ -2746,11 +2811,11 @@ void i915_gem_reset(struct drm_device *dev)
void
i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
{
- if (list_empty(&ring->request_list))
- return;
-
WARN_ON(i915_verify_lists(ring->dev));
+ if (list_empty(&ring->active_list))
+ return;
+
/* Retire requests first as we use it above for the early return.
* If we retire requests last, we may use a later seqno and so clear
* the requests lists without clearing the active list, leading to
@@ -2766,16 +2831,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
if (!i915_gem_request_completed(request, true))
break;
- trace_i915_gem_request_retire(request);
-
- /* We know the GPU must have read the request to have
- * sent us the seqno + interrupt, so use the position
- * of tail of the request to update the last known position
- * of the GPU head.
- */
- request->ringbuf->last_retired_head = request->postfix;
-
- i915_gem_free_request(request);
+ i915_gem_request_retire(request);
}
/* Move any buffers on the active list that are no longer referenced
@@ -2787,12 +2843,12 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
obj = list_first_entry(&ring->active_list,
struct drm_i915_gem_object,
- ring_list);
+ ring_list[ring->id]);
- if (!i915_gem_request_completed(obj->last_read_req, true))
+ if (!list_empty(&obj->last_read_req[ring->id]->list))
break;
- i915_gem_object_move_to_inactive(obj);
+ i915_gem_object_retire__read(obj, ring->id);
}
if (unlikely(ring->trace_irq_req &&
@@ -2887,17 +2943,30 @@ i915_gem_idle_work_handler(struct work_struct *work)
static int
i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
{
- struct intel_engine_cs *ring;
- int ret;
+ int ret, i;
+
+ if (!obj->active)
+ return 0;
+
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ struct drm_i915_gem_request *req;
+
+ req = obj->last_read_req[i];
+ if (req == NULL)
+ continue;
- if (obj->active) {
- ring = i915_gem_request_get_ring(obj->last_read_req);
+ if (list_empty(&req->list))
+ goto retire;
- ret = i915_gem_check_olr(obj->last_read_req);
+ ret = i915_gem_check_olr(req);
if (ret)
return ret;
- i915_gem_retire_requests_ring(ring);
+ if (i915_gem_request_completed(req, true)) {
+ __i915_gem_request_retire__upto(req);
+retire:
+ i915_gem_object_retire__read(obj, i);
+ }
}
return 0;
@@ -2931,9 +3000,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_gem_wait *args = data;
struct drm_i915_gem_object *obj;
- struct drm_i915_gem_request *req;
+ struct drm_i915_gem_request *req[I915_NUM_RINGS];
unsigned reset_counter;
- int ret = 0;
+ int i, n = 0;
+ int ret;
if (args->flags != 0)
return -EINVAL;
@@ -2953,11 +3023,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (ret)
goto out;
- if (!obj->active || !obj->last_read_req)
+ if (!obj->active)
goto out;
- req = obj->last_read_req;
-
/* Do this after OLR check to make sure we make forward progress polling
* on this IOCTL with a timeout == 0 (like busy ioctl)
*/
@@ -2968,13 +3036,23 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
drm_gem_object_unreference(&obj->base);
reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
- i915_gem_request_reference(req);
+
+ for (i = 0; i < I915_NUM_RINGS; i++) {
+ if (obj->last_read_req[i] == NULL)
+ continue;
+
+ req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
+ }
+
mutex_unlock(&dev->struct_mutex);
- ret = __i915_wait_request(req, reset_counter, true,
- args->timeout_ns > 0 ? &args->timeout_ns : NULL,
- file->driver_priv);
- i915_gem_request_unreference__unlocked(req);
+ for (i = 0; i < n; i++) {
+ if (ret == 0)
+ ret = __i915_wait_request(req[i], reset_counter, true,
+ args->timeout_ns > 0 ? &args->timeout_ns : NULL,
+ file->driver_priv);
+ i915_gem_request_unreference__unlocked(req[i]);
+ }
return ret;
out:
@@ -2983,6 +3061,59 @@ out:
return ret;
}
+static int
+__i915_gem_object_sync(struct drm_i915_gem_object *obj,
+ struct intel_engine_cs *to,
+ struct drm_i915_gem_request *req)
+{
+ struct intel_engine_cs *from;
+ int ret;
+
+ from = i915_gem_request_get_ring(req);
+ if (to == from)
+ return 0;
+
+ if (i915_gem_request_completed(req, true))
+ return 0;
+
+ ret = i915_gem_check_olr(req);
+ if (ret)
+ return ret;
+
+ if (!i915_semaphore_is_enabled(obj->base.dev)) {
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ ret = __i915_wait_request(req,
+ atomic_read(&i915->gpu_error.reset_counter),
+ i915->mm.interruptible,
+ NULL,
+ &i915->rps.semaphores);
+ if (ret)
+ return ret;
+
+ i915_gem_object_retire_request(obj, req);
+ } else {
+ int idx = intel_ring_sync_index(from, to);
+ u32 seqno = i915_gem_request_get_seqno(req);
+
+ if (seqno <= from->semaphore.sync_seqno[idx])
+ return 0;
+
+ trace_i915_gem_ring_sync_to(from, to, req);
+ ret = to->semaphore.sync_to(to, from, seqno);
+ if (ret)
+ return ret;
+
+ /* We use last_read_req because sync_to()
+ * might have just caused seqno wrap under
+ * the radar.
+ */
+ from->semaphore.sync_seqno[idx] =
+ i915_gem_request_get_seqno(obj->last_read_req[from->id]);
+ }
+
+ return 0;
+}
+
/**
* i915_gem_object_sync - sync an object to a ring.
*
@@ -2991,7 +3122,17 @@ out:
*
* This code is meant to abstract object synchronization with the GPU.
* Calling with NULL implies synchronizing the object with the CPU
- * rather than a particular GPU ring.
+ * rather than a particular GPU ring. Conceptually we serialise writes
+ * between engines inside the GPU. We only allow on engine to write
+ * into a buffer at any time, but multiple readers. To ensure each has
+ * a coherent view of memory, we must:
+ *
+ * - If there is an outstanding write request to the object, the new
+ * request must wait for it to complete (either CPU or in hw, requests
+ * on the same ring will be naturally ordered).
+ *
+ * - If we are a write request (pending_write_domain is set), the new
+ * request must wait for outstanding read requests to complete.
*
* Returns 0 if successful, else propagates up the lower layer error.
*/
@@ -2999,41 +3140,32 @@ int
i915_gem_object_sync(struct drm_i915_gem_object *obj,
struct intel_engine_cs *to)
{
- struct intel_engine_cs *from;
- u32 seqno;
- int ret, idx;
-
- from = i915_gem_request_get_ring(obj->last_read_req);
-
- if (from == NULL || to == from)
- return 0;
+ const bool readonly = obj->base.pending_write_domain == 0;
+ struct drm_i915_gem_request *req[I915_NUM_RINGS];
+ int ret, i, n;
- if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev))
- return i915_gem_object_wait_rendering(obj, false);
-
- idx = intel_ring_sync_index(from, to);
-
- seqno = i915_gem_request_get_seqno(obj->last_read_req);
- /* Optimization: Avoid semaphore sync when we are sure we already
- * waited for an object with higher seqno */
- if (seqno <= from->semaphore.sync_seqno[idx])
+ if (!obj->active)
return 0;
- ret = i915_gem_check_olr(obj->last_read_req);
- if (ret)
- return ret;
+ if (to == NULL)
+ return i915_gem_object_wait_rendering(obj, readonly);
- trace_i915_gem_ring_sync_to(from, to, obj->last_read_req);
- ret = to->semaphore.sync_to(to, from, seqno);
- if (!ret)
- /* We use last_read_req because sync_to()
- * might have just caused seqno wrap under
- * the radar.
- */
- from->semaphore.sync_seqno[idx] =
- i915_gem_request_get_seqno(obj->last_read_req);
+ n = 0;
+ if (readonly) {
+ if (obj->last_write_req)
+ req[n++] = obj->last_write_req;
+ } else {
+ for (i = 0; i < I915_NUM_RINGS; i++)
+ if (obj->last_read_req[i])
+ req[n++] = obj->last_read_req[i];
+ }
+ for (i = 0; i < n; i++) {
+ ret = __i915_gem_object_sync(obj, to, req[i]);
+ if (ret)
+ return ret;
+ }
- return ret;
+ return 0;
}
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
@@ -3079,7 +3211,7 @@ int i915_vma_unbind(struct i915_vma *vma)
BUG_ON(obj->pages == NULL);
- ret = i915_gem_object_finish_gpu(obj);
+ ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
/* Continue on if we fail due to EIO, the GPU is hung so we
@@ -3119,10 +3251,6 @@ int i915_vma_unbind(struct i915_vma *vma)
/* Since the unbound list is global, only move to that list if
* no more VMAs exist. */
if (list_empty(&obj->vma_list)) {
- /* Throw away the active reference before
- * moving to the unbound list. */
- i915_gem_object_retire(obj);
-
i915_gem_gtt_finish_object(obj);
list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
}
@@ -3155,6 +3283,7 @@ int i915_gpu_idle(struct drm_device *dev)
return ret;
}
+ WARN_ON(i915_verify_lists(dev));
return 0;
}
@@ -3777,8 +3906,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- i915_gem_object_retire(obj);
-
/* Flush and acquire obj->pages so that we are coherent through
* direct access in memory with previous cached writes through
* shmemfs and that our cache domain tracking remains valid.
@@ -3854,7 +3981,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
}
if (i915_gem_obj_bound_any(obj)) {
- ret = i915_gem_object_finish_gpu(obj);
+ ret = i915_gem_object_wait_rendering(obj, false);
if (ret)
return ret;
@@ -3976,11 +4103,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
u32 old_read_domains, old_write_domain;
int ret;
- if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) {
- ret = i915_gem_object_sync(obj, pipelined);
- if (ret)
- return ret;
- }
+ ret = i915_gem_object_sync(obj, pipelined);
+ if (ret)
+ return ret;
/* Mark the pin_display early so that we account for the
* display coherency whilst setting up the cache domains.
@@ -4045,23 +4170,6 @@ i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
obj->pin_display--;
}
-int
-i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
-{
- int ret;
-
- if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
- return 0;
-
- ret = i915_gem_object_wait_rendering(obj, false);
- if (ret)
- return ret;
-
- /* Ensure that we invalidate the GPU's caches and TLBs. */
- obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
- return 0;
-}
-
/**
* Moves a single object to the CPU read, and possibly write domain.
*
@@ -4081,7 +4189,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
- i915_gem_object_retire(obj);
i915_gem_object_flush_gtt_write_domain(obj);
old_write_domain = obj->base.write_domain;
@@ -4132,7 +4239,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct drm_i915_file_private *file_priv = file->driver_priv;
- unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
+ unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_gem_request *request, *target = NULL;
unsigned reset_counter;
int ret;
@@ -4370,15 +4477,15 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
* necessary flushes here.
*/
ret = i915_gem_object_flush_active(obj);
+ if (ret)
+ goto unref;
- args->busy = obj->active;
- if (obj->last_read_req) {
- struct intel_engine_cs *ring;
- BUILD_BUG_ON(I915_NUM_RINGS > 16);
- ring = i915_gem_request_get_ring(obj->last_read_req);
- args->busy |= intel_ring_flag(ring) << 16;
- }
+ BUILD_BUG_ON(I915_NUM_RINGS > 16);
+ args->busy = obj->active << 16;
+ if (obj->last_write_req)
+ args->busy |= obj->last_write_req->ring->id;
+unref:
drm_gem_object_unreference(&obj->base);
unlock:
mutex_unlock(&dev->struct_mutex);
@@ -4452,8 +4559,11 @@ unlock:
void i915_gem_object_init(struct drm_i915_gem_object *obj,
const struct drm_i915_gem_object_ops *ops)
{
+ int i;
+
INIT_LIST_HEAD(&obj->global_list);
- INIT_LIST_HEAD(&obj->ring_list);
+ for (i = 0; i < I915_NUM_RINGS; i++)
+ INIT_LIST_HEAD(&obj->ring_list[i]);
INIT_LIST_HEAD(&obj->obj_exec_link);
INIT_LIST_HEAD(&obj->vma_list);
INIT_LIST_HEAD(&obj->batch_pool_link);
@@ -5112,10 +5222,10 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file)
}
spin_unlock(&file_priv->mm.lock);
- if (!list_empty(&file_priv->rps_boost)) {
- mutex_lock(&to_i915(dev)->rps.hw_lock);
- list_del(&file_priv->rps_boost);
- mutex_unlock(&to_i915(dev)->rps.hw_lock);
+ if (!list_empty(&file_priv->rps.link)) {
+ spin_lock(&to_i915(dev)->rps.client_lock);
+ list_del(&file_priv->rps.link);
+ spin_unlock(&to_i915(dev)->rps.client_lock);
}
}
@@ -5133,7 +5243,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file)
file->driver_priv = file_priv;
file_priv->dev_priv = dev->dev_private;
file_priv->file = file;
- INIT_LIST_HEAD(&file_priv->rps_boost);
+ INIT_LIST_HEAD(&file_priv->rps.link);
spin_lock_init(&file_priv->mm.lock);
INIT_LIST_HEAD(&file_priv->mm.request_list);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 5a47eb5e3c5d..8867818b1401 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -753,8 +753,6 @@ static int do_switch(struct intel_engine_cs *ring,
* swapped, but there is no way to do that yet.
*/
from->legacy_hw_ctx.rcs_state->dirty = 1;
- BUG_ON(i915_gem_request_get_ring(
- from->legacy_hw_ctx.rcs_state->last_read_req) != ring);
/* obj is kept alive until the next request by its active ref */
i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state);
diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c
index f462d1b51d97..17299d04189f 100644
--- a/drivers/gpu/drm/i915/i915_gem_debug.c
+++ b/drivers/gpu/drm/i915/i915_gem_debug.c
@@ -34,82 +34,34 @@ int
i915_verify_lists(struct drm_device *dev)
{
static int warned;
- struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_object *obj;
+ struct intel_engine_cs *ring;
int err = 0;
+ int i;
if (warned)
return 0;
- list_for_each_entry(obj, &dev_priv->render_ring.active_list, list) {
- if (obj->base.dev != dev ||
- !atomic_read(&obj->base.refcount.refcount)) {
- DRM_ERROR("freed render active %p\n", obj);
- err++;
- break;
- } else if (!obj->active ||
- (obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) {
- DRM_ERROR("invalid render active %p (a %d r %x)\n",
- obj,
- obj->active,
- obj->base.read_domains);
- err++;
- } else if (obj->base.write_domain && list_empty(&obj->gpu_write_list)) {
- DRM_ERROR("invalid render active %p (w %x, gwl %d)\n",
- obj,
- obj->base.write_domain,
- !list_empty(&obj->gpu_write_list));
- err++;
- }
- }
-
- list_for_each_entry(obj, &dev_priv->mm.flushing_list, list) {
- if (obj->base.dev != dev ||
- !atomic_read(&obj->base.refcount.refcount)) {
- DRM_ERROR("freed flushing %p\n", obj);
- err++;
- break;
- } else if (!obj->active ||
- (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0 ||
- list_empty(&obj->gpu_write_list)) {
- DRM_ERROR("invalid flushing %p (a %d w %x gwl %d)\n",
- obj,
- obj->active,
- obj->base.write_domain,
- !list_empty(&obj->gpu_write_list));
- err++;
- }
- }
-
- list_for_each_entry(obj, &dev_priv->mm.gpu_write_list, gpu_write_list) {
- if (obj->base.dev != dev ||
- !atomic_read(&obj->base.refcount.refcount)) {
- DRM_ERROR("freed gpu write %p\n", obj);
- err++;
- break;
- } else if (!obj->active ||
- (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) {
- DRM_ERROR("invalid gpu write %p (a %d w %x)\n",
- obj,
- obj->active,
- obj->base.write_domain);
- err++;
- }
- }
-
- list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) {
- if (obj->base.dev != dev ||
- !atomic_read(&obj->base.refcount.refcount)) {
- DRM_ERROR("freed inactive %p\n", obj);
- err++;
- break;
- } else if (obj->pin_count || obj->active ||
- (obj->base.write_domain & I915_GEM_GPU_DOMAINS)) {
- DRM_ERROR("invalid inactive %p (p %d a %d w %x)\n",
- obj,
- obj->pin_count, obj->active,
- obj->base.write_domain);
- err++;
+ for_each_ring(ring, dev_priv, i) {
+ list_for_each_entry(obj, &ring->active_list, ring_list[ring->id]) {
+ if (obj->base.dev != dev ||
+ !atomic_read(&obj->base.refcount.refcount)) {
+ DRM_ERROR("%s: freed active obj %p\n",
+ ring->name, obj);
+ err++;
+ break;
+ } else if (!obj->active ||
+ obj->last_read_req[ring->id] == NULL) {
+ DRM_ERROR("%s: invalid active obj %p\n",
+ ring->name, obj);
+ err++;
+ } else if (obj->base.write_domain) {
+ DRM_ERROR("%s: invalid write obj %p (w %x)\n",
+ ring->name,
+ obj, obj->base.write_domain);
+ err++;
+ }
}
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 560c79a8a43d..bd0e4bda2c64 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -889,6 +889,7 @@ static int
i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
struct list_head *vmas)
{
+ const unsigned other_rings = ~intel_ring_flag(ring);
struct i915_vma *vma;
uint32_t flush_domains = 0;
bool flush_chipset = false;
@@ -896,9 +897,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
- ret = i915_gem_object_sync(obj, ring);
- if (ret)
- return ret;
+
+ if (obj->active & other_rings) {
+ ret = i915_gem_object_sync(obj, ring);
+ if (ret)
+ return ret;
+ }
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
flush_chipset |= i915_gem_clflush_object(obj, false);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e3bcc3ba7e40..619dad1b2386 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -757,7 +757,7 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));
/* FIXME: upper bound must not overflow 32 bits */
- WARN_ON((start + length) >= (1ULL << 32));
+ WARN_ON((start + length) > (1ULL << 32));
gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
if (pd)
@@ -952,6 +952,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.start = 0;
ppgtt->base.total = 1ULL << 32;
+ if (IS_ENABLED(CONFIG_X86_32))
+ /* While we have a proliferation of size_t variables
+ * we cannot represent the full ppgtt size on 32bit,
+ * so limit it to the same size as the GGTT (currently
+ * 2GiB).
+ */
+ ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
ppgtt->base.allocate_va_range = gen8_alloc_va_range;
ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 4039ede158be..1f4e5a32a16e 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -219,11 +219,14 @@ i915_mmu_notifier_add(struct drm_device *dev,
struct i915_mmu_object *mo)
{
struct interval_tree_node *it;
- int ret;
+ int ret = 0;
- ret = i915_mutex_lock_interruptible(dev);
- if (ret)
- return ret;
+ /* By this point we have already done a lot of expensive setup that
+ * we do not want to repeat just because the caller (e.g. X) has a
+ * signal pending (and partly because of that expensive setup, X
+ * using an interrupt timer is likely to get stuck in an EINTR loop).
+ */
+ mutex_lock(&dev->struct_mutex);
/* Make sure we drop the final active reference (and thereby
* remove the objects from the interval tree) before we do
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a3e330d2a1d8..6f4256918f76 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -192,15 +192,20 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
struct drm_i915_error_buffer *err,
int count)
{
+ int i;
+
err_printf(m, " %s [%d]:\n", name, count);
while (count--) {
- err_printf(m, " %08x %8u %02x %02x %x %x",
+ err_printf(m, " %08x %8u %02x %02x [ ",
err->gtt_offset,
err->size,
err->read_domains,
- err->write_domain,
- err->rseqno, err->wseqno);
+ err->write_domain);
+ for (i = 0; i < I915_NUM_RINGS; i++)
+ err_printf(m, "%02x ", err->rseqno[i]);
+
+ err_printf(m, "] %02x", err->wseqno);
err_puts(m, pin_flag(err->pinned));
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
@@ -681,10 +686,12 @@ static void capture_bo(struct drm_i915_error_buffer *err,
struct i915_vma *vma)
{
struct drm_i915_gem_object *obj = vma->obj;
+ int i;
err->size = obj->base.size;
err->name = obj->base.name;
- err->rseqno = i915_gem_request_get_seqno(obj->last_read_req);
+ for (i = 0; i < I915_NUM_RINGS; i++)
+ err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]);
err->wseqno = i915_gem_request_get_seqno(obj->last_write_req);
err->gtt_offset = vma->node.start;
err->read_domains = obj->base.read_domains;
@@ -697,8 +704,8 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->dirty = obj->dirty;
err->purgeable = obj->madv != I915_MADV_WILLNEED;
err->userptr = obj->userptr.mm != NULL;
- err->ring = obj->last_read_req ?
- i915_gem_request_get_ring(obj->last_read_req)->id : -1;
+ err->ring = obj->last_write_req ?
+ i915_gem_request_get_ring(obj->last_write_req)->id : -1;
err->cache_level = obj->cache_level;
}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9da955e4f355..e6bb72dca3ff 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -79,7 +79,7 @@ static const u32 hpd_status_g4x[HPD_NUM_PINS] = {
[HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS
};
-static const u32 hpd_status_i915[HPD_NUM_PINS] = { /* i915 and valleyview are the same */
+static const u32 hpd_status_i915[HPD_NUM_PINS] = {
[HPD_CRT] = CRT_HOTPLUG_INT_STATUS,
[HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I915,
[HPD_SDVO_C] = SDVOC_HOTPLUG_INT_STATUS_I915,
@@ -1070,12 +1070,25 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
return events;
}
+static bool any_waiters(struct drm_i915_private *dev_priv)
+{
+ struct intel_engine_cs *ring;
+ int i;
+
+ for_each_ring(ring, dev_priv, i)
+ if (ring->irq_refcount)
+ return true;
+
+ return false;
+}
+
static void gen6_pm_rps_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
container_of(work, struct drm_i915_private, rps.work);
+ bool client_boost;
+ int new_delay, adj, min, max;
u32 pm_iir;
- int new_delay, adj;
spin_lock_irq(&dev_priv->irq_lock);
/* Speed up work cancelation during disabling rps interrupts. */
@@ -1087,12 +1100,14 @@ static void gen6_pm_rps_work(struct work_struct *work)
dev_priv->rps.pm_iir = 0;
/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+ client_boost = dev_priv->rps.client_boost;
+ dev_priv->rps.client_boost = false;
spin_unlock_irq(&dev_priv->irq_lock);
/* Make sure we didn't queue anything we're not going to process. */
WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
- if ((pm_iir & dev_priv->pm_rps_events) == 0)
+ if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
return;
mutex_lock(&dev_priv->rps.hw_lock);
@@ -1101,7 +1116,13 @@ static void gen6_pm_rps_work(struct work_struct *work)
adj = dev_priv->rps.last_adj;
new_delay = dev_priv->rps.cur_freq;
- if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+ min = dev_priv->rps.min_freq_softlimit;
+ max = dev_priv->rps.max_freq_softlimit;
+
+ if (client_boost) {
+ new_delay = dev_priv->rps.max_freq_softlimit;
+ adj = 0;
+ } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
if (adj > 0)
adj *= 2;
else /* CHV needs even encode values */
@@ -1114,6 +1135,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
new_delay = dev_priv->rps.efficient_freq;
adj = 0;
}
+ } else if (any_waiters(dev_priv)) {
+ adj = 0;
} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq)
new_delay = dev_priv->rps.efficient_freq;
@@ -1135,9 +1158,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
* interrupt
*/
new_delay += adj;
- new_delay = clamp_t(int, new_delay,
- dev_priv->rps.min_freq_softlimit,
- dev_priv->rps.max_freq_softlimit);
+ new_delay = clamp_t(int, new_delay, min, max);
intel_set_rps(dev_priv->dev, new_delay);
@@ -1386,7 +1407,7 @@ static int i915_port_to_hotplug_shift(enum port port)
}
}
-static inline enum port get_port_from_pin(enum hpd_pin pin)
+static enum port get_port_from_pin(enum hpd_pin pin)
{
switch (pin) {
case HPD_PORT_B:
@@ -1400,10 +1421,10 @@ static inline enum port get_port_from_pin(enum hpd_pin pin)
}
}
-static inline void intel_hpd_irq_handler(struct drm_device *dev,
- u32 hotplug_trigger,
- u32 dig_hotplug_reg,
- const u32 hpd[HPD_NUM_PINS])
+static void intel_hpd_irq_handler(struct drm_device *dev,
+ u32 hotplug_trigger,
+ u32 dig_hotplug_reg,
+ const u32 hpd[HPD_NUM_PINS])
{
struct drm_i915_private *dev_priv = dev->dev_private;
int i;
@@ -1743,7 +1764,7 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev)
*/
POSTING_READ(PORT_HOTPLUG_STAT);
- if (IS_G4X(dev)) {
+ if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) {
u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X;
intel_hpd_irq_handler(dev, hotplug_trigger, 0, hpd_status_g4x);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 58627a319416..6d3fead3a358 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -595,10 +595,6 @@ enum punit_power_well {
PUNIT_POWER_WELL_DPIO_RX0 = 10,
PUNIT_POWER_WELL_DPIO_RX1 = 11,
PUNIT_POWER_WELL_DPIO_CMN_D = 12,
- /* FIXME: guesswork below */
- PUNIT_POWER_WELL_DPIO_TX_D_LANES_01 = 13,
- PUNIT_POWER_WELL_DPIO_TX_D_LANES_23 = 14,
- PUNIT_POWER_WELL_DPIO_RX2 = 15,
PUNIT_POWER_WELL_NUM,
};
@@ -1204,6 +1200,12 @@ enum skl_disp_power_wells {
#define PORT_PLL_GAIN_CTL(x) ((x) << 16)
/* PORT_PLL_8_A */
#define PORT_PLL_TARGET_CNT_MASK 0x3FF
+/* PORT_PLL_9_A */
+#define PORT_PLL_LOCK_THRESHOLD_MASK 0xe
+/* PORT_PLL_10_A */
+#define PORT_PLL_DCO_AMP_OVR_EN_H (1<<27)
+#define PORT_PLL_DCO_AMP_MASK 0x3c00
+#define PORT_PLL_DCO_AMP(x) (x<<10)
#define _PORT_PLL_BASE(port) _PORT3(port, _PORT_PLL_0_A, \
_PORT_PLL_0_B, \
_PORT_PLL_0_C)
@@ -1455,6 +1457,8 @@ enum skl_disp_power_wells {
#define RING_HWS_PGA(base) ((base)+0x80)
#define RING_HWS_PGA_GEN6(base) ((base)+0x2080)
+#define HSW_GTT_CACHE_EN 0x4024
+#define GTT_CACHE_EN_ALL 0xF0007FFF
#define GEN7_WR_WATERMARK 0x4028
#define GEN7_GFX_PRIO_CTRL 0x402C
#define ARB_MODE 0x4030
@@ -2137,6 +2141,10 @@ enum skl_disp_power_wells {
#define DPIO_PHY_STATUS (VLV_DISPLAY_BASE + 0x6240)
#define DPLL_PORTD_READY_MASK (0xf)
#define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100)
+#define PHY_LDO_DELAY_0NS 0x0
+#define PHY_LDO_DELAY_200NS 0x1
+#define PHY_LDO_DELAY_600NS 0x2
+#define PHY_LDO_SEQ_DELAY(delay, phy) ((delay) << (2*(phy)+23))
#define PHY_CH_SU_PSR 0x1
#define PHY_CH_DEEP_PSR 0x7
#define PHY_CH_POWER_MODE(mode, phy, ch) ((mode) << (6*(phy)+3*(ch)+2))
@@ -5167,6 +5175,8 @@ enum skl_disp_power_wells {
#define _PLANE_KEYMAX_2_A 0x702a0
#define _PLANE_BUF_CFG_1_A 0x7027c
#define _PLANE_BUF_CFG_2_A 0x7037c
+#define _PLANE_NV12_BUF_CFG_1_A 0x70278
+#define _PLANE_NV12_BUF_CFG_2_A 0x70378
#define _PLANE_CTL_1_B 0x71180
#define _PLANE_CTL_2_B 0x71280
@@ -5253,6 +5263,15 @@ enum skl_disp_power_wells {
#define PLANE_BUF_CFG(pipe, plane) \
_PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe))
+#define _PLANE_NV12_BUF_CFG_1_B 0x71278
+#define _PLANE_NV12_BUF_CFG_2_B 0x71378
+#define _PLANE_NV12_BUF_CFG_1(pipe) \
+ _PIPE(pipe, _PLANE_NV12_BUF_CFG_1_A, _PLANE_NV12_BUF_CFG_1_B)
+#define _PLANE_NV12_BUF_CFG_2(pipe) \
+ _PIPE(pipe, _PLANE_NV12_BUF_CFG_2_A, _PLANE_NV12_BUF_CFG_2_B)
+#define PLANE_NV12_BUF_CFG(pipe, plane) \
+ _PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe))
+
/* SKL new cursor registers */
#define _CUR_BUF_CFG_A 0x7017c
#define _CUR_BUF_CFG_B 0x7117c
@@ -5774,6 +5793,7 @@ enum skl_disp_power_wells {
/* GEN8 chicken */
#define HDC_CHICKEN0 0x7300
+#define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15)
#define HDC_FENCE_DEST_SLM_DISABLE (1<<14)
#define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11)
#define HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT (1<<5)
@@ -6422,6 +6442,7 @@ enum skl_disp_power_wells {
#define TRANS_DP_PORT_SEL_D (2<<29)
#define TRANS_DP_PORT_SEL_NONE (3<<29)
#define TRANS_DP_PORT_SEL_MASK (3<<29)
+#define TRANS_DP_PIPE_TO_PORT(val) ((((val) & TRANS_DP_PORT_SEL_MASK) >> 29) + PORT_B)
#define TRANS_DP_AUDIO_ONLY (1<<26)
#define TRANS_DP_ENH_FRAMING (1<<18)
#define TRANS_DP_8BPC (0<<9)
@@ -6681,6 +6702,9 @@ enum skl_disp_power_wells {
#define GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT 8
#define GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT 16
#define GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT 24
+#define SKL_PCODE_CDCLK_CONTROL 0x7
+#define SKL_CDCLK_PREPARE_FOR_CHANGE 0x3
+#define SKL_CDCLK_READY_FOR_CHANGE 0x1
#define GEN6_PCODE_WRITE_MIN_FREQ_TABLE 0x8
#define GEN6_PCODE_READ_MIN_FREQ_TABLE 0x9
#define GEN6_READ_OC_PARAMS 0xc
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index cee596d0a6a2..198fc3c3291b 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -36,10 +36,11 @@
static int panel_type;
-static void *
-find_section(struct bdb_header *bdb, int section_id)
+static const void *
+find_section(const void *_bdb, int section_id)
{
- u8 *base = (u8 *)bdb;
+ const struct bdb_header *bdb = _bdb;
+ const u8 *base = _bdb;
int index = 0;
u16 total, current_size;
u8 current_id;
@@ -53,7 +54,7 @@ find_section(struct bdb_header *bdb, int section_id)
current_id = *(base + index);
index++;
- current_size = *((u16 *)(base + index));
+ current_size = *((const u16 *)(base + index));
index += 2;
if (index + current_size > total)
@@ -69,7 +70,7 @@ find_section(struct bdb_header *bdb, int section_id)
}
static u16
-get_blocksize(void *p)
+get_blocksize(const void *p)
{
u16 *block_ptr, block_size;
@@ -204,7 +205,7 @@ get_lvds_fp_timing(const struct bdb_header *bdb,
/* Try to find integrated panel data */
static void
parse_lfp_panel_data(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
const struct bdb_lvds_options *lvds_options;
const struct bdb_lvds_lfp_data *lvds_lfp_data;
@@ -310,7 +311,8 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
}
static void
-parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_lfp_backlight(struct drm_i915_private *dev_priv,
+ const struct bdb_header *bdb)
{
const struct bdb_lfp_backlight_data *backlight_data;
const struct bdb_lfp_backlight_data_entry *entry;
@@ -348,9 +350,9 @@ parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
/* Try to find sdvo panel data */
static void
parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
- struct lvds_dvo_timing *dvo_timing;
+ const struct lvds_dvo_timing *dvo_timing;
struct drm_display_mode *panel_fixed_mode;
int index;
@@ -361,7 +363,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv,
}
if (index == -1) {
- struct bdb_sdvo_lvds_options *sdvo_lvds_options;
+ const struct bdb_sdvo_lvds_options *sdvo_lvds_options;
sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS);
if (!sdvo_lvds_options)
@@ -402,10 +404,10 @@ static int intel_bios_ssc_frequency(struct drm_device *dev,
static void
parse_general_features(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
struct drm_device *dev = dev_priv->dev;
- struct bdb_general_features *general;
+ const struct bdb_general_features *general;
general = find_section(bdb, BDB_GENERAL_FEATURES);
if (general) {
@@ -428,9 +430,9 @@ parse_general_features(struct drm_i915_private *dev_priv,
static void
parse_general_definitions(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
- struct bdb_general_definitions *general;
+ const struct bdb_general_definitions *general;
general = find_section(bdb, BDB_GENERAL_DEFINITIONS);
if (general) {
@@ -447,19 +449,19 @@ parse_general_definitions(struct drm_i915_private *dev_priv,
}
}
-static union child_device_config *
-child_device_ptr(struct bdb_general_definitions *p_defs, int i)
+static const union child_device_config *
+child_device_ptr(const struct bdb_general_definitions *p_defs, int i)
{
- return (void *) &p_defs->devices[i * p_defs->child_dev_size];
+ return (const void *) &p_defs->devices[i * p_defs->child_dev_size];
}
static void
parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
struct sdvo_device_mapping *p_mapping;
- struct bdb_general_definitions *p_defs;
- union child_device_config *p_child;
+ const struct bdb_general_definitions *p_defs;
+ const union child_device_config *p_child;
int i, child_device_num, count;
u16 block_size;
@@ -545,9 +547,9 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
static void
parse_driver_features(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
- struct bdb_driver_features *driver;
+ const struct bdb_driver_features *driver;
driver = find_section(bdb, BDB_DRIVER_FEATURES);
if (!driver)
@@ -571,11 +573,11 @@ parse_driver_features(struct drm_i915_private *dev_priv,
}
static void
-parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
{
- struct bdb_edp *edp;
- struct edp_power_seq *edp_pps;
- struct edp_link_params *edp_link_params;
+ const struct bdb_edp *edp;
+ const struct edp_power_seq *edp_pps;
+ const struct edp_link_params *edp_link_params;
edp = find_section(bdb, BDB_EDP);
if (!edp) {
@@ -683,10 +685,10 @@ parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
}
static void
-parse_psr(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
{
- struct bdb_psr *psr;
- struct psr_table *psr_table;
+ const struct bdb_psr *psr;
+ const struct psr_table *psr_table;
psr = find_section(bdb, BDB_PSR);
if (!psr) {
@@ -794,13 +796,14 @@ static u8 *goto_next_sequence(u8 *data, int *size)
}
static void
-parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb)
+parse_mipi(struct drm_i915_private *dev_priv, const struct bdb_header *bdb)
{
- struct bdb_mipi_config *start;
- struct bdb_mipi_sequence *sequence;
- struct mipi_config *config;
- struct mipi_pps_data *pps;
- u8 *data, *seq_data;
+ const struct bdb_mipi_config *start;
+ const struct bdb_mipi_sequence *sequence;
+ const struct mipi_config *config;
+ const struct mipi_pps_data *pps;
+ u8 *data;
+ const u8 *seq_data;
int i, panel_id, seq_size;
u16 block_size;
@@ -944,7 +947,7 @@ err:
}
static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
union child_device_config *it, *child = NULL;
struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port];
@@ -1046,7 +1049,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
}
static void parse_ddi_ports(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
struct drm_device *dev = dev_priv->dev;
enum port port;
@@ -1066,10 +1069,11 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv,
static void
parse_device_mapping(struct drm_i915_private *dev_priv,
- struct bdb_header *bdb)
+ const struct bdb_header *bdb)
{
- struct bdb_general_definitions *p_defs;
- union child_device_config *p_child, *child_dev_ptr;
+ const struct bdb_general_definitions *p_defs;
+ const union child_device_config *p_child;
+ union child_device_config *child_dev_ptr;
int i, child_device_num, count;
u16 block_size;
@@ -1126,8 +1130,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
child_dev_ptr = dev_priv->vbt.child_dev + count;
count++;
- memcpy((void *)child_dev_ptr, (void *)p_child,
- sizeof(*p_child));
+ memcpy(child_dev_ptr, p_child, sizeof(*p_child));
}
return;
}
@@ -1196,19 +1199,22 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = {
{ }
};
-static struct bdb_header *validate_vbt(char *base, size_t size,
- struct vbt_header *vbt,
- const char *source)
+static const struct bdb_header *validate_vbt(const void __iomem *_base,
+ size_t size,
+ const void __iomem *_vbt,
+ const char *source)
{
- size_t offset;
- struct bdb_header *bdb;
-
- if (vbt == NULL) {
- DRM_DEBUG_DRIVER("VBT signature missing\n");
- return NULL;
- }
+ /*
+ * This is the one place where we explicitly discard the address space
+ * (__iomem) of the BIOS/VBT. (And this will cause a sparse complaint.)
+ * From now on everything is based on 'base', and treated as regular
+ * memory.
+ */
+ const void *base = (const void *) _base;
+ size_t offset = _vbt - _base;
+ const struct vbt_header *vbt = base + offset;
+ const struct bdb_header *bdb;
- offset = (char *)vbt - base;
if (offset + sizeof(struct vbt_header) > size) {
DRM_DEBUG_DRIVER("VBT header incomplete\n");
return NULL;
@@ -1225,7 +1231,7 @@ static struct bdb_header *validate_vbt(char *base, size_t size,
return NULL;
}
- bdb = (struct bdb_header *)(base + offset);
+ bdb = base + offset;
if (offset + bdb->bdb_size > size) {
DRM_DEBUG_DRIVER("BDB incomplete\n");
return NULL;
@@ -1236,6 +1242,22 @@ static struct bdb_header *validate_vbt(char *base, size_t size,
return bdb;
}
+static const struct bdb_header *find_vbt(void __iomem *bios, size_t size)
+{
+ const struct bdb_header *bdb = NULL;
+ size_t i;
+
+ /* Scour memory looking for the VBT signature. */
+ for (i = 0; i + 4 < size; i++) {
+ if (ioread32(bios + i) == *((const u32 *) "$VBT")) {
+ bdb = validate_vbt(bios, size, bios + i, "PCI ROM");
+ break;
+ }
+ }
+
+ return bdb;
+}
+
/**
* intel_parse_bios - find VBT and initialize settings from the BIOS
* @dev: DRM device
@@ -1250,7 +1272,7 @@ intel_parse_bios(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
struct pci_dev *pdev = dev->pdev;
- struct bdb_header *bdb = NULL;
+ const struct bdb_header *bdb = NULL;
u8 __iomem *bios = NULL;
if (HAS_PCH_NOP(dev))
@@ -1260,27 +1282,17 @@ intel_parse_bios(struct drm_device *dev)
/* XXX Should this validation be moved to intel_opregion.c? */
if (!dmi_check_system(intel_no_opregion_vbt) && dev_priv->opregion.vbt)
- bdb = validate_vbt((char *)dev_priv->opregion.header, OPREGION_SIZE,
- (struct vbt_header *)dev_priv->opregion.vbt,
- "OpRegion");
+ bdb = validate_vbt(dev_priv->opregion.header, OPREGION_SIZE,
+ dev_priv->opregion.vbt, "OpRegion");
if (bdb == NULL) {
- size_t i, size;
+ size_t size;
bios = pci_map_rom(pdev, &size);
if (!bios)
return -1;
- /* Scour memory looking for the VBT signature */
- for (i = 0; i + 4 < size; i++) {
- if (memcmp(bios + i, "$VBT", 4) == 0) {
- bdb = validate_vbt(bios, size,
- (struct vbt_header *)(bios + i),
- "PCI ROM");
- break;
- }
- }
-
+ bdb = find_vbt(bios, size);
if (!bdb) {
pci_unmap_rom(pdev, bios);
return -1;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 93bb5159d093..521af2c069cb 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -207,6 +207,14 @@ static void intel_disable_crt(struct intel_encoder *encoder)
intel_crt_set_dpms(encoder, DRM_MODE_DPMS_OFF);
}
+static void pch_disable_crt(struct intel_encoder *encoder)
+{
+}
+
+static void pch_post_disable_crt(struct intel_encoder *encoder)
+{
+ intel_disable_crt(encoder);
+}
static void hsw_crt_post_disable(struct intel_encoder *encoder)
{
@@ -888,7 +896,12 @@ void intel_crt_init(struct drm_device *dev)
crt->adpa_reg = ADPA;
crt->base.compute_config = intel_crt_compute_config;
- crt->base.disable = intel_disable_crt;
+ if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev)) {
+ crt->base.disable = pch_disable_crt;
+ crt->base.post_disable = pch_post_disable_crt;
+ } else {
+ crt->base.disable = intel_disable_crt;
+ }
crt->base.enable = intel_enable_crt;
if (I915_HAS_HOTPLUG(dev))
crt->base.hpd_pin = HPD_CRT;
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9311cddb86e6..5cb8cc18994a 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -25,6 +25,22 @@
#include "i915_drv.h"
#include "i915_reg.h"
+/**
+ * DOC: csr support for dmc
+ *
+ * Display Context Save and Restore (CSR) firmware support added from gen9
+ * onwards to drive newly added DMC (Display microcontroller) in display
+ * engine to save and restore the state of display engine when it enter into
+ * low-power state and comes back to normal.
+ *
+ * Firmware loading status will be one of the below states: FW_UNINITIALIZED,
+ * FW_LOADED, FW_FAILED.
+ *
+ * Once the firmware is written into the registers status will be moved from
+ * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will
+ * be moved to FW_FAILED.
+ */
+
#define I915_CSR_SKL "i915/skl_dmc_ver4.bin"
MODULE_FIRMWARE(I915_CSR_SKL);
@@ -183,6 +199,14 @@ static char intel_get_substepping(struct drm_device *dev)
return -ENODATA;
}
+/**
+ * intel_csr_load_status_get() - to get firmware loading status.
+ * @dev_priv: i915 device.
+ *
+ * This function helps to get the firmware loading status.
+ *
+ * Return: Firmware loading status.
+ */
enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
{
enum csr_state state;
@@ -194,6 +218,13 @@ enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv)
return state;
}
+/**
+ * intel_csr_load_status_set() - help to set firmware loading status.
+ * @dev_priv: i915 device.
+ * @state: enumeration of firmware loading status.
+ *
+ * Set the firmware loading status.
+ */
void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
enum csr_state state)
{
@@ -202,6 +233,14 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv,
mutex_unlock(&dev_priv->csr_lock);
}
+/**
+ * intel_csr_load_program() - write the firmware from memory to register.
+ * @dev: drm device.
+ *
+ * CSR firmware is read from a .bin file and kept in internal memory one time.
+ * Everytime display comes back from low power state this function is called to
+ * copy the firmware from internal memory to registers.
+ */
void intel_csr_load_program(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -359,6 +398,13 @@ out:
release_firmware(fw);
}
+/**
+ * intel_csr_ucode_init() - initialize the firmware loading.
+ * @dev: drm device.
+ *
+ * This function is called at the time of loading the display driver to read
+ * firmware from a .bin file and copied into a internal memory.
+ */
void intel_csr_ucode_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -393,6 +439,13 @@ void intel_csr_ucode_init(struct drm_device *dev)
}
}
+/**
+ * intel_csr_ucode_fini() - unload the CSR firmware.
+ * @dev: drm device.
+ *
+ * Firmmware unloading includes freeing the internal momory and reset the
+ * firmware loading status.
+ */
void intel_csr_ucode_fini(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 807e15d41a1b..cacb07b7a8f1 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1087,6 +1087,9 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) |
WRPLL_DIVIDER_POST(p);
+ memset(&crtc_state->dpll_hw_state, 0,
+ sizeof(crtc_state->dpll_hw_state));
+
crtc_state->dpll_hw_state.wrpll = val;
pll = intel_get_shared_dpll(intel_crtc, crtc_state);
@@ -1309,6 +1312,9 @@ skl_ddi_pll_select(struct intel_crtc *intel_crtc,
} else /* eDP */
return true;
+ memset(&crtc_state->dpll_hw_state, 0,
+ sizeof(crtc_state->dpll_hw_state));
+
crtc_state->dpll_hw_state.ctrl1 = ctrl1;
crtc_state->dpll_hw_state.cfgcr1 = cfgcr1;
crtc_state->dpll_hw_state.cfgcr2 = cfgcr2;
@@ -1334,22 +1340,17 @@ struct bxt_clk_div {
uint32_t m2_frac;
bool m2_frac_en;
uint32_t n;
- uint32_t prop_coef;
- uint32_t int_coef;
- uint32_t gain_ctl;
- uint32_t targ_cnt;
- uint32_t lanestagger;
};
/* pre-calculated values for DP linkrates */
static struct bxt_clk_div bxt_dp_clk_val[7] = {
- /* 162 */ {4, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
- /* 270 */ {4, 1, 27, 0, 0, 1, 3, 8, 1, 9, 0xd},
- /* 540 */ {2, 1, 27, 0, 0, 1, 3, 8, 1, 9, 0x18},
- /* 216 */ {3, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
- /* 243 */ {4, 1, 24, 1258291, 1, 1, 5, 11, 2, 9, 0xd},
- /* 324 */ {4, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd},
- /* 432 */ {3, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0x18}
+ /* 162 */ {4, 2, 32, 1677722, 1, 1},
+ /* 270 */ {4, 1, 27, 0, 0, 1},
+ /* 540 */ {2, 1, 27, 0, 0, 1},
+ /* 216 */ {3, 2, 32, 1677722, 1, 1},
+ /* 243 */ {4, 1, 24, 1258291, 1, 1},
+ /* 324 */ {4, 1, 32, 1677722, 1, 1},
+ /* 432 */ {3, 1, 32, 1677722, 1, 1}
};
static bool
@@ -1360,6 +1361,9 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
{
struct intel_shared_dpll *pll;
struct bxt_clk_div clk_div = {0};
+ int vco = 0;
+ uint32_t prop_coef, int_coef, gain_ctl, targ_cnt;
+ uint32_t dcoampovr_en_h, dco_amp, lanestagger;
if (intel_encoder->type == INTEL_OUTPUT_HDMI) {
intel_clock_t best_clock;
@@ -1383,21 +1387,7 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
clk_div.m2_frac = best_clock.m2 & ((1 << 22) - 1);
clk_div.m2_frac_en = clk_div.m2_frac != 0;
- /* FIXME: set coef, gain, targcnt based on freq band */
- clk_div.prop_coef = 5;
- clk_div.int_coef = 11;
- clk_div.gain_ctl = 2;
- clk_div.targ_cnt = 9;
- if (clock > 270000)
- clk_div.lanestagger = 0x18;
- else if (clock > 135000)
- clk_div.lanestagger = 0x0d;
- else if (clock > 67000)
- clk_div.lanestagger = 0x07;
- else if (clock > 33000)
- clk_div.lanestagger = 0x04;
- else
- clk_div.lanestagger = 0x02;
+ vco = best_clock.vco;
} else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT ||
intel_encoder->type == INTEL_OUTPUT_EDP) {
struct drm_encoder *encoder = &intel_encoder->base;
@@ -1417,8 +1407,48 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
clk_div = bxt_dp_clk_val[0];
DRM_ERROR("Unknown link rate\n");
}
+ vco = clock * 10 / 2 * clk_div.p1 * clk_div.p2;
+ }
+
+ dco_amp = 15;
+ dcoampovr_en_h = 0;
+ if (vco >= 6200000 && vco <= 6480000) {
+ prop_coef = 4;
+ int_coef = 9;
+ gain_ctl = 3;
+ targ_cnt = 8;
+ } else if ((vco > 5400000 && vco < 6200000) ||
+ (vco >= 4800000 && vco < 5400000)) {
+ prop_coef = 5;
+ int_coef = 11;
+ gain_ctl = 3;
+ targ_cnt = 9;
+ if (vco >= 4800000 && vco < 5400000)
+ dcoampovr_en_h = 1;
+ } else if (vco == 5400000) {
+ prop_coef = 3;
+ int_coef = 8;
+ gain_ctl = 1;
+ targ_cnt = 9;
+ } else {
+ DRM_ERROR("Invalid VCO\n");
+ return false;
}
+ memset(&crtc_state->dpll_hw_state, 0,
+ sizeof(crtc_state->dpll_hw_state));
+
+ if (clock > 270000)
+ lanestagger = 0x18;
+ else if (clock > 135000)
+ lanestagger = 0x0d;
+ else if (clock > 67000)
+ lanestagger = 0x07;
+ else if (clock > 33000)
+ lanestagger = 0x04;
+ else
+ lanestagger = 0x02;
+
crtc_state->dpll_hw_state.ebb0 =
PORT_PLL_P1(clk_div.p1) | PORT_PLL_P2(clk_div.p2);
crtc_state->dpll_hw_state.pll0 = clk_div.m2_int;
@@ -1430,14 +1460,19 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
PORT_PLL_M2_FRAC_ENABLE;
crtc_state->dpll_hw_state.pll6 =
- clk_div.prop_coef | PORT_PLL_INT_COEFF(clk_div.int_coef);
+ prop_coef | PORT_PLL_INT_COEFF(int_coef);
crtc_state->dpll_hw_state.pll6 |=
- PORT_PLL_GAIN_CTL(clk_div.gain_ctl);
+ PORT_PLL_GAIN_CTL(gain_ctl);
+
+ crtc_state->dpll_hw_state.pll8 = targ_cnt;
- crtc_state->dpll_hw_state.pll8 = clk_div.targ_cnt;
+ if (dcoampovr_en_h)
+ crtc_state->dpll_hw_state.pll10 = PORT_PLL_DCO_AMP_OVR_EN_H;
+
+ crtc_state->dpll_hw_state.pll10 |= PORT_PLL_DCO_AMP(dco_amp);
crtc_state->dpll_hw_state.pcsdw12 =
- LANESTAGGER_STRAP_OVRD | clk_div.lanestagger;
+ LANESTAGGER_STRAP_OVRD | lanestagger;
pll = intel_get_shared_dpll(intel_crtc, crtc_state);
if (pll == NULL) {
@@ -2367,10 +2402,16 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv,
temp |= pll->config.hw_state.pll8;
I915_WRITE(BXT_PORT_PLL(port, 8), temp);
- /*
- * FIXME: program PORT_PLL_9/i_lockthresh according to the latest
- * specification update.
- */
+ temp = I915_READ(BXT_PORT_PLL(port, 9));
+ temp &= ~PORT_PLL_LOCK_THRESHOLD_MASK;
+ temp |= (5 << 1);
+ I915_WRITE(BXT_PORT_PLL(port, 9), temp);
+
+ temp = I915_READ(BXT_PORT_PLL(port, 10));
+ temp &= ~PORT_PLL_DCO_AMP_OVR_EN_H;
+ temp &= ~PORT_PLL_DCO_AMP_MASK;
+ temp |= pll->config.hw_state.pll10;
+ I915_WRITE(BXT_PORT_PLL(port, 10), temp);
/* Recalibrate with new settings */
temp = I915_READ(BXT_PORT_PLL_EBB_4(port));
@@ -2434,6 +2475,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
hw_state->pll3 = I915_READ(BXT_PORT_PLL(port, 3));
hw_state->pll6 = I915_READ(BXT_PORT_PLL(port, 6));
hw_state->pll8 = I915_READ(BXT_PORT_PLL(port, 8));
+ hw_state->pll10 = I915_READ(BXT_PORT_PLL(port, 10));
/*
* While we write to the group register to program all lanes at once we
* can read only lane registers. We configure all lanes the same way, so
@@ -2468,6 +2510,7 @@ void intel_ddi_pll_init(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t val = I915_READ(LCPLL_CTL);
+ int cdclk_freq;
if (IS_SKYLAKE(dev))
skl_shared_dplls_init(dev_priv);
@@ -2476,12 +2519,15 @@ void intel_ddi_pll_init(struct drm_device *dev)
else
hsw_shared_dplls_init(dev_priv);
- DRM_DEBUG_KMS("CDCLK running at %dKHz\n",
- dev_priv->display.get_display_clock_speed(dev));
+ cdclk_freq = dev_priv->display.get_display_clock_speed(dev);
+ DRM_DEBUG_KMS("CDCLK running at %dKHz\n", cdclk_freq);
if (IS_SKYLAKE(dev)) {
+ dev_priv->skl_boot_cdclk = cdclk_freq;
if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE))
DRM_ERROR("LCPLL1 is disabled\n");
+ else
+ intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
} else if (IS_BROXTON(dev)) {
broxton_init_cdclk(dev);
broxton_ddi_phy_init(dev);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 240092afc392..4e3f302d86f7 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -45,29 +45,33 @@
#include <drm/drm_rect.h>
#include <linux/dma_remapping.h>
-/* Primary plane formats supported by all gen */
-#define COMMON_PRIMARY_FORMATS \
- DRM_FORMAT_C8, \
- DRM_FORMAT_RGB565, \
- DRM_FORMAT_XRGB8888, \
- DRM_FORMAT_ARGB8888
-
/* Primary plane formats for gen <= 3 */
-static const uint32_t intel_primary_formats_gen2[] = {
- COMMON_PRIMARY_FORMATS,
+static const uint32_t i8xx_primary_formats[] = {
+ DRM_FORMAT_C8,
+ DRM_FORMAT_RGB565,
DRM_FORMAT_XRGB1555,
- DRM_FORMAT_ARGB1555,
+ DRM_FORMAT_XRGB8888,
};
/* Primary plane formats for gen >= 4 */
-static const uint32_t intel_primary_formats_gen4[] = {
- COMMON_PRIMARY_FORMATS, \
+static const uint32_t i965_primary_formats[] = {
+ DRM_FORMAT_C8,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_XRGB8888,
DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_XRGB2101010,
+ DRM_FORMAT_XBGR2101010,
+};
+
+static const uint32_t skl_primary_formats[] = {
+ DRM_FORMAT_C8,
+ DRM_FORMAT_RGB565,
+ DRM_FORMAT_XRGB8888,
+ DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_ARGB8888,
DRM_FORMAT_ABGR8888,
DRM_FORMAT_XRGB2101010,
- DRM_FORMAT_ARGB2101010,
DRM_FORMAT_XBGR2101010,
- DRM_FORMAT_ABGR2101010,
};
/* Cursor formats */
@@ -1136,9 +1140,9 @@ static void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state)
u32 val;
bool cur_state;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
cur_state = val & DSI_PLL_VCO_EN;
I915_STATE_WARN(cur_state != state,
@@ -1657,13 +1661,15 @@ static void chv_enable_pll(struct intel_crtc *crtc,
BUG_ON(!IS_CHERRYVIEW(dev_priv->dev));
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* Enable back the 10bit clock to display controller */
tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
tmp |= DPIO_DCLKP_EN;
vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp);
+ mutex_unlock(&dev_priv->sb_lock);
+
/*
* Need to wait > 100ns between dclkp clock enable bit and PLL enable.
*/
@@ -1679,8 +1685,6 @@ static void chv_enable_pll(struct intel_crtc *crtc,
/* not sure when this should be written */
I915_WRITE(DPLL_MD(pipe), pipe_config->dpll_hw_state.dpll_md);
POSTING_READ(DPLL_MD(pipe));
-
- mutex_unlock(&dev_priv->dpio_lock);
}
static int intel_num_dvo_pipes(struct drm_device *dev)
@@ -1822,7 +1826,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
I915_WRITE(DPLL(pipe), val);
POSTING_READ(DPLL(pipe));
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* Disable 10bit clock to display controller */
val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port));
@@ -1840,7 +1844,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val);
}
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
@@ -2206,20 +2210,6 @@ static void intel_disable_pipe(struct intel_crtc *crtc)
intel_wait_for_pipe_off(crtc);
}
-/*
- * Plane regs are double buffered, going from enabled->disabled needs a
- * trigger in order to latch. The display address reg provides this.
- */
-void intel_flush_primary_plane(struct drm_i915_private *dev_priv,
- enum plane plane)
-{
- struct drm_device *dev = dev_priv->dev;
- u32 reg = INTEL_INFO(dev)->gen >= 4 ? DSPSURF(plane) : DSPADDR(plane);
-
- I915_WRITE(reg, I915_READ(reg));
- POSTING_READ(reg);
-}
-
/**
* intel_enable_primary_hw_plane - enable the primary plane on a given pipe
* @plane: plane to be enabled
@@ -2702,26 +2692,21 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
dspcntr |= DISPPLANE_8BPP;
break;
case DRM_FORMAT_XRGB1555:
- case DRM_FORMAT_ARGB1555:
dspcntr |= DISPPLANE_BGRX555;
break;
case DRM_FORMAT_RGB565:
dspcntr |= DISPPLANE_BGRX565;
break;
case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_ARGB8888:
dspcntr |= DISPPLANE_BGRX888;
break;
case DRM_FORMAT_XBGR8888:
- case DRM_FORMAT_ABGR8888:
dspcntr |= DISPPLANE_RGBX888;
break;
case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
dspcntr |= DISPPLANE_BGRX101010;
break;
case DRM_FORMAT_XBGR2101010:
- case DRM_FORMAT_ABGR2101010:
dspcntr |= DISPPLANE_RGBX101010;
break;
default:
@@ -2817,19 +2802,15 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc,
dspcntr |= DISPPLANE_BGRX565;
break;
case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_ARGB8888:
dspcntr |= DISPPLANE_BGRX888;
break;
case DRM_FORMAT_XBGR8888:
- case DRM_FORMAT_ABGR8888:
dspcntr |= DISPPLANE_RGBX888;
break;
case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
dspcntr |= DISPPLANE_BGRX101010;
break;
case DRM_FORMAT_XBGR2101010:
- case DRM_FORMAT_ABGR2101010:
dspcntr |= DISPPLANE_RGBX101010;
break;
default:
@@ -2953,95 +2934,83 @@ void skl_detach_scalers(struct intel_crtc *intel_crtc)
u32 skl_plane_ctl_format(uint32_t pixel_format)
{
- u32 plane_ctl_format = 0;
switch (pixel_format) {
+ case DRM_FORMAT_C8:
+ return PLANE_CTL_FORMAT_INDEXED;
case DRM_FORMAT_RGB565:
- plane_ctl_format = PLANE_CTL_FORMAT_RGB_565;
- break;
+ return PLANE_CTL_FORMAT_RGB_565;
case DRM_FORMAT_XBGR8888:
- plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX;
- break;
+ return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX;
case DRM_FORMAT_XRGB8888:
- plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888;
- break;
+ return PLANE_CTL_FORMAT_XRGB_8888;
/*
* XXX: For ARBG/ABGR formats we default to expecting scanout buffers
* to be already pre-multiplied. We need to add a knob (or a different
* DRM_FORMAT) for user-space to configure that.
*/
case DRM_FORMAT_ABGR8888:
- plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX |
+ return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX |
PLANE_CTL_ALPHA_SW_PREMULTIPLY;
- break;
case DRM_FORMAT_ARGB8888:
- plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 |
+ return PLANE_CTL_FORMAT_XRGB_8888 |
PLANE_CTL_ALPHA_SW_PREMULTIPLY;
- break;
case DRM_FORMAT_XRGB2101010:
- plane_ctl_format = PLANE_CTL_FORMAT_XRGB_2101010;
- break;
+ return PLANE_CTL_FORMAT_XRGB_2101010;
case DRM_FORMAT_XBGR2101010:
- plane_ctl_format = PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
- break;
+ return PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010;
case DRM_FORMAT_YUYV:
- plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
- break;
+ return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV;
case DRM_FORMAT_YVYU:
- plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU;
- break;
+ return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU;
case DRM_FORMAT_UYVY:
- plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY;
- break;
+ return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY;
case DRM_FORMAT_VYUY:
- plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY;
- break;
+ return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY;
default:
- BUG();
+ MISSING_CASE(pixel_format);
}
- return plane_ctl_format;
+
+ return 0;
}
u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
{
- u32 plane_ctl_tiling = 0;
switch (fb_modifier) {
case DRM_FORMAT_MOD_NONE:
break;
case I915_FORMAT_MOD_X_TILED:
- plane_ctl_tiling = PLANE_CTL_TILED_X;
- break;
+ return PLANE_CTL_TILED_X;
case I915_FORMAT_MOD_Y_TILED:
- plane_ctl_tiling = PLANE_CTL_TILED_Y;
- break;
+ return PLANE_CTL_TILED_Y;
case I915_FORMAT_MOD_Yf_TILED:
- plane_ctl_tiling = PLANE_CTL_TILED_YF;
- break;
+ return PLANE_CTL_TILED_YF;
default:
MISSING_CASE(fb_modifier);
}
- return plane_ctl_tiling;
+
+ return 0;
}
u32 skl_plane_ctl_rotation(unsigned int rotation)
{
- u32 plane_ctl_rotation = 0;
switch (rotation) {
case BIT(DRM_ROTATE_0):
break;
+ /*
+ * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr
+ * while i915 HW rotation is clockwise, thats why this swapping.
+ */
case BIT(DRM_ROTATE_90):
- plane_ctl_rotation = PLANE_CTL_ROTATE_90;
- break;
+ return PLANE_CTL_ROTATE_270;
case BIT(DRM_ROTATE_180):
- plane_ctl_rotation = PLANE_CTL_ROTATE_180;
- break;
+ return PLANE_CTL_ROTATE_180;
case BIT(DRM_ROTATE_270):
- plane_ctl_rotation = PLANE_CTL_ROTATE_270;
- break;
+ return PLANE_CTL_ROTATE_90;
default:
MISSING_CASE(rotation);
}
- return plane_ctl_rotation;
+ return 0;
}
static void skylake_update_primary_plane(struct drm_crtc *crtc,
@@ -3115,7 +3084,7 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc,
if (intel_rotation_90_or_270(rotation)) {
/* stride = Surface height in tiles */
- tile_height = intel_tile_height(dev, fb->bits_per_pixel,
+ tile_height = intel_tile_height(dev, fb->pixel_format,
fb->modifier[0]);
stride = DIV_ROUND_UP(fb->height, tile_height);
x_offset = stride * tile_height - y - src_h;
@@ -3295,27 +3264,30 @@ void intel_finish_reset(struct drm_device *dev)
drm_modeset_unlock_all(dev);
}
-static int
+static void
intel_finish_fb(struct drm_framebuffer *old_fb)
{
struct drm_i915_gem_object *obj = intel_fb_obj(old_fb);
- struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
bool was_interruptible = dev_priv->mm.interruptible;
int ret;
/* Big Hammer, we also need to ensure that any pending
* MI_WAIT_FOR_EVENT inside a user batch buffer on the
* current scanout is retired before unpinning the old
- * framebuffer.
+ * framebuffer. Note that we rely on userspace rendering
+ * into the buffer attached to the pipe they are waiting
+ * on. If not, userspace generates a GPU hang with IPEHR
+ * point to the MI_WAIT_FOR_EVENT.
*
* This should only fail upon a hung GPU, in which case we
* can safely continue.
*/
dev_priv->mm.interruptible = false;
- ret = i915_gem_object_finish_gpu(obj);
+ ret = i915_gem_object_wait_rendering(obj, true);
dev_priv->mm.interruptible = was_interruptible;
- return ret;
+ WARN_ON(ret);
}
static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -3967,7 +3939,7 @@ static void lpt_program_iclkip(struct drm_crtc *crtc)
u32 divsel, phaseinc, auxdiv, phasedir = 0;
u32 temp;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* It is necessary to ungate the pixclk gate prior to programming
* the divisors, and gate it back when it is done.
@@ -4044,7 +4016,7 @@ static void lpt_program_iclkip(struct drm_crtc *crtc)
I915_WRITE(PIXCLK_GATE, PIXCLK_GATE_UNGATE);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void ironlake_pch_transcoder_set_timings(struct intel_crtc *crtc,
@@ -4182,8 +4154,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
temp &= ~(TRANS_DP_PORT_SEL_MASK |
TRANS_DP_SYNC_MASK |
TRANS_DP_BPC_MASK);
- temp |= (TRANS_DP_OUTPUT_ENABLE |
- TRANS_DP_ENH_FRAMING);
+ temp |= TRANS_DP_OUTPUT_ENABLE;
temp |= bpc << 9; /* same format but at 11:9 */
if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC)
@@ -4517,9 +4488,10 @@ skl_update_scaler_users(
}
/* check colorkey */
- if (intel_plane && intel_plane->ckey.flags != I915_SET_COLORKEY_NONE) {
- DRM_DEBUG_KMS("PLANE:%d scaling with color key not allowed",
- intel_plane->base.base.id);
+ if (WARN_ON(intel_plane &&
+ intel_plane->ckey.flags != I915_SET_COLORKEY_NONE)) {
+ DRM_DEBUG_KMS("PLANE:%d scaling %ux%u->%ux%u not allowed with colorkey",
+ intel_plane->base.base.id, src_w, src_h, dst_w, dst_h);
return -EINVAL;
}
@@ -4532,9 +4504,7 @@ skl_update_scaler_users(
case DRM_FORMAT_ABGR8888:
case DRM_FORMAT_ARGB8888:
case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
case DRM_FORMAT_XBGR2101010:
- case DRM_FORMAT_ABGR2101010:
case DRM_FORMAT_YUYV:
case DRM_FORMAT_YVYU:
case DRM_FORMAT_UYVY:
@@ -4858,11 +4828,22 @@ intel_pre_disable_primary(struct drm_crtc *crtc)
static void intel_crtc_enable_planes(struct drm_crtc *crtc)
{
+ struct drm_device *dev = crtc->dev;
+ struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ int pipe = intel_crtc->pipe;
+
intel_enable_primary_hw_plane(crtc->primary, crtc);
intel_enable_sprite_planes(crtc);
intel_crtc_update_cursor(crtc, true);
intel_post_enable_primary(crtc);
+
+ /*
+ * FIXME: Once we grow proper nuclear flip support out of this we need
+ * to compute the mask of flip planes precisely. For the time being
+ * consider this a flip to a NULL plane.
+ */
+ intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
}
static void intel_crtc_disable_planes(struct drm_crtc *crtc)
@@ -5128,13 +5109,14 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
ironlake_pfit_disable(intel_crtc);
+ if (intel_crtc->config->has_pch_encoder)
+ ironlake_fdi_disable(crtc);
+
for_each_encoder_on_crtc(dev, crtc, encoder)
if (encoder->post_disable)
encoder->post_disable(encoder);
if (intel_crtc->config->has_pch_encoder) {
- ironlake_fdi_disable(crtc);
-
ironlake_disable_pch_transcoder(dev_priv, pipe);
if (HAS_PCH_CPT(dev)) {
@@ -5543,16 +5525,224 @@ void broxton_uninit_cdclk(struct drm_device *dev)
intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
}
+static const struct skl_cdclk_entry {
+ unsigned int freq;
+ unsigned int vco;
+} skl_cdclk_frequencies[] = {
+ { .freq = 308570, .vco = 8640 },
+ { .freq = 337500, .vco = 8100 },
+ { .freq = 432000, .vco = 8640 },
+ { .freq = 450000, .vco = 8100 },
+ { .freq = 540000, .vco = 8100 },
+ { .freq = 617140, .vco = 8640 },
+ { .freq = 675000, .vco = 8100 },
+};
+
+static unsigned int skl_cdclk_decimal(unsigned int freq)
+{
+ return (freq - 1000) / 500;
+}
+
+static unsigned int skl_cdclk_get_vco(unsigned int freq)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(skl_cdclk_frequencies); i++) {
+ const struct skl_cdclk_entry *e = &skl_cdclk_frequencies[i];
+
+ if (e->freq == freq)
+ return e->vco;
+ }
+
+ return 8100;
+}
+
+static void
+skl_dpll0_enable(struct drm_i915_private *dev_priv, unsigned int required_vco)
+{
+ unsigned int min_freq;
+ u32 val;
+
+ /* select the minimum CDCLK before enabling DPLL 0 */
+ val = I915_READ(CDCLK_CTL);
+ val &= ~CDCLK_FREQ_SEL_MASK | ~CDCLK_FREQ_DECIMAL_MASK;
+ val |= CDCLK_FREQ_337_308;
+
+ if (required_vco == 8640)
+ min_freq = 308570;
+ else
+ min_freq = 337500;
+
+ val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_freq);
+
+ I915_WRITE(CDCLK_CTL, val);
+ POSTING_READ(CDCLK_CTL);
+
+ /*
+ * We always enable DPLL0 with the lowest link rate possible, but still
+ * taking into account the VCO required to operate the eDP panel at the
+ * desired frequency. The usual DP link rates operate with a VCO of
+ * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640.
+ * The modeset code is responsible for the selection of the exact link
+ * rate later on, with the constraint of choosing a frequency that
+ * works with required_vco.
+ */
+ val = I915_READ(DPLL_CTRL1);
+
+ val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) |
+ DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0));
+ val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0);
+ if (required_vco == 8640)
+ val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080,
+ SKL_DPLL0);
+ else
+ val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810,
+ SKL_DPLL0);
+
+ I915_WRITE(DPLL_CTRL1, val);
+ POSTING_READ(DPLL_CTRL1);
+
+ I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE);
+
+ if (wait_for(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK, 5))
+ DRM_ERROR("DPLL0 not locked\n");
+}
+
+static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv)
+{
+ int ret;
+ u32 val;
+
+ /* inform PCU we want to change CDCLK */
+ val = SKL_CDCLK_PREPARE_FOR_CHANGE;
+ mutex_lock(&dev_priv->rps.hw_lock);
+ ret = sandybridge_pcode_read(dev_priv, SKL_PCODE_CDCLK_CONTROL, &val);
+ mutex_unlock(&dev_priv->rps.hw_lock);
+
+ return ret == 0 && (val & SKL_CDCLK_READY_FOR_CHANGE);
+}
+
+static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv)
+{
+ unsigned int i;
+
+ for (i = 0; i < 15; i++) {
+ if (skl_cdclk_pcu_ready(dev_priv))
+ return true;
+ udelay(10);
+ }
+
+ return false;
+}
+
+static void skl_set_cdclk(struct drm_i915_private *dev_priv, unsigned int freq)
+{
+ u32 freq_select, pcu_ack;
+
+ DRM_DEBUG_DRIVER("Changing CDCLK to %dKHz\n", freq);
+
+ if (!skl_cdclk_wait_for_pcu_ready(dev_priv)) {
+ DRM_ERROR("failed to inform PCU about cdclk change\n");
+ return;
+ }
+
+ /* set CDCLK_CTL */
+ switch(freq) {
+ case 450000:
+ case 432000:
+ freq_select = CDCLK_FREQ_450_432;
+ pcu_ack = 1;
+ break;
+ case 540000:
+ freq_select = CDCLK_FREQ_540;
+ pcu_ack = 2;
+ break;
+ case 308570:
+ case 337500:
+ default:
+ freq_select = CDCLK_FREQ_337_308;
+ pcu_ack = 0;
+ break;
+ case 617140:
+ case 675000:
+ freq_select = CDCLK_FREQ_675_617;
+ pcu_ack = 3;
+ break;
+ }
+
+ I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(freq));
+ POSTING_READ(CDCLK_CTL);
+
+ /* inform PCU of the change */
+ mutex_lock(&dev_priv->rps.hw_lock);
+ sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack);
+ mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+void skl_uninit_cdclk(struct drm_i915_private *dev_priv)
+{
+ /* disable DBUF power */
+ I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST);
+ POSTING_READ(DBUF_CTL);
+
+ udelay(10);
+
+ if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE)
+ DRM_ERROR("DBuf power disable timeout\n");
+
+ /* disable DPLL0 */
+ I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE);
+ if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1))
+ DRM_ERROR("Couldn't disable DPLL0\n");
+
+ intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+}
+
+void skl_init_cdclk(struct drm_i915_private *dev_priv)
+{
+ u32 val;
+ unsigned int required_vco;
+
+ /* enable PCH reset handshake */
+ val = I915_READ(HSW_NDE_RSTWRN_OPT);
+ I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE);
+
+ /* enable PG1 and Misc I/O */
+ intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
+
+ /* DPLL0 already enabed !? */
+ if (I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE) {
+ DRM_DEBUG_DRIVER("DPLL0 already running\n");
+ return;
+ }
+
+ /* enable DPLL0 */
+ required_vco = skl_cdclk_get_vco(dev_priv->skl_boot_cdclk);
+ skl_dpll0_enable(dev_priv, required_vco);
+
+ /* set CDCLK to the frequency the BIOS chose */
+ skl_set_cdclk(dev_priv, dev_priv->skl_boot_cdclk);
+
+ /* enable DBUF power */
+ I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST);
+ POSTING_READ(DBUF_CTL);
+
+ udelay(10);
+
+ if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE))
+ DRM_ERROR("DBuf power enable timeout\n");
+}
+
/* returns HPLL frequency in kHz */
static int valleyview_get_vco(struct drm_i915_private *dev_priv)
{
int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 };
/* Obtain SKU information */
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) &
CCK_FUSE_HPLL_FREQ_MASK;
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
return vco_freq[hpll_freq] * 1000;
}
@@ -5601,12 +5791,13 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
}
mutex_unlock(&dev_priv->rps.hw_lock);
+ mutex_lock(&dev_priv->sb_lock);
+
if (cdclk == 400000) {
u32 divider;
divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1;
- mutex_lock(&dev_priv->dpio_lock);
/* adjust cdclk divider */
val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL);
val &= ~DISPLAY_FREQUENCY_VALUES;
@@ -5617,10 +5808,8 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
DISPLAY_FREQUENCY_STATUS) == (divider << DISPLAY_FREQUENCY_STATUS_SHIFT),
50))
DRM_ERROR("timed out waiting for CDclk change\n");
- mutex_unlock(&dev_priv->dpio_lock);
}
- mutex_lock(&dev_priv->dpio_lock);
/* adjust self-refresh exit latency value */
val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC);
val &= ~0x7f;
@@ -5634,7 +5823,8 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
else
val |= 3000 / 250; /* 3.0 usec */
vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val);
- mutex_unlock(&dev_priv->dpio_lock);
+
+ mutex_unlock(&dev_priv->sb_lock);
vlv_update_cdclk(dev);
}
@@ -6562,9 +6752,9 @@ static int valleyview_get_display_clock_speed(struct drm_device *dev)
if (dev_priv->hpll_freq == 0)
dev_priv->hpll_freq = valleyview_get_vco(dev_priv);
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
divider = val & DISPLAY_FREQUENCY_VALUES;
@@ -6906,7 +7096,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
u32 bestn, bestm1, bestm2, bestp1, bestp2;
u32 coreclk, reg_val;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
bestn = pipe_config->dpll.n;
bestm1 = pipe_config->dpll.m1;
@@ -6984,7 +7174,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc,
vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk);
vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void chv_update_pll(struct intel_crtc *crtc,
@@ -7029,7 +7219,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
I915_WRITE(dpll_reg,
pipe_config->dpll_hw_state.dpll & ~DPLL_VCO_ENABLE);
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* p1 and p2 divider */
vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port),
@@ -7102,7 +7292,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc,
vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) |
DPIO_AFC_RECAL);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
/**
@@ -7477,6 +7667,9 @@ static int i9xx_crtc_compute_clock(struct intel_crtc *crtc,
struct drm_connector_state *connector_state;
int i;
+ memset(&crtc_state->dpll_hw_state, 0,
+ sizeof(crtc_state->dpll_hw_state));
+
for_each_connector_in_state(state, connector, connector_state, i) {
if (connector_state->crtc != &crtc->base)
continue;
@@ -7600,9 +7793,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc,
if (!(pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE))
return;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe));
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7;
clock.m2 = mdiv & DPIO_M2DIV_MASK;
@@ -7696,12 +7889,12 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc,
u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2;
int refclk = 100000;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port));
pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port));
pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port));
pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port));
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0;
clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff);
@@ -8067,7 +8260,7 @@ static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
with_fdi, "LP PCH doesn't have FDI\n"))
with_fdi = false;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
tmp &= ~SBI_SSCCTL_DISABLE;
@@ -8093,7 +8286,7 @@ static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE;
intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
/* Sequence to disable CLKOUT_DP */
@@ -8102,7 +8295,7 @@ static void lpt_disable_clkout_dp(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t reg, tmp;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
SBI_GEN0 : SBI_DBUFF0;
@@ -8121,7 +8314,7 @@ static void lpt_disable_clkout_dp(struct drm_device *dev)
intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
}
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void lpt_init_pch_refclk(struct drm_device *dev)
@@ -8518,6 +8711,9 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc,
bool is_lvds = false;
struct intel_shared_dpll *pll;
+ memset(&crtc_state->dpll_hw_state, 0,
+ sizeof(crtc_state->dpll_hw_state));
+
is_lvds = intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS);
WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)),
@@ -9353,6 +9549,12 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
}
pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
+
+ if (INTEL_INFO(dev)->gen >= 9) {
+ pipe_config->scaler_state.scaler_id = -1;
+ pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
+ }
+
if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
if (INTEL_INFO(dev)->gen == 9)
skylake_get_pfit_config(crtc, pipe_config);
@@ -9360,10 +9562,6 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
ironlake_get_pfit_config(crtc, pipe_config);
else
MISSING_CASE(INTEL_INFO(dev)->gen);
-
- } else {
- pipe_config->scaler_state.scaler_id = -1;
- pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
}
if (IS_HASWELL(dev))
@@ -9868,7 +10066,7 @@ retry:
goto fail;
}
- crtc_state->base.enable = true;
+ crtc_state->base.active = crtc_state->base.enable = true;
if (!mode)
mode = &load_detect_mode;
@@ -9965,7 +10163,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector,
connector_state->best_encoder = NULL;
connector_state->crtc = NULL;
- crtc_state->base.enable = false;
+ crtc_state->base.enable = crtc_state->base.active = false;
ret = intel_modeset_setup_plane_state(state, crtc, NULL, NULL,
0, 0);
@@ -10690,7 +10888,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
else if (i915.enable_execlists)
return true;
else
- return ring != i915_gem_request_get_ring(obj->last_read_req);
+ return ring != i915_gem_request_get_ring(obj->last_write_req);
}
static void skl_do_mmio_flip(struct intel_crtc *intel_crtc)
@@ -10790,22 +10988,19 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc)
static void intel_mmio_flip_work_func(struct work_struct *work)
{
- struct intel_crtc *crtc =
- container_of(work, struct intel_crtc, mmio_flip.work);
- struct intel_mmio_flip *mmio_flip;
+ struct intel_mmio_flip *mmio_flip =
+ container_of(work, struct intel_mmio_flip, work);
- mmio_flip = &crtc->mmio_flip;
if (mmio_flip->req)
WARN_ON(__i915_wait_request(mmio_flip->req,
- crtc->reset_counter,
- false, NULL, NULL) != 0);
+ mmio_flip->crtc->reset_counter,
+ false, NULL,
+ &mmio_flip->i915->rps.mmioflips));
- intel_do_mmio_flip(crtc);
- if (mmio_flip->req) {
- mutex_lock(&crtc->base.dev->struct_mutex);
- i915_gem_request_assign(&mmio_flip->req, NULL);
- mutex_unlock(&crtc->base.dev->struct_mutex);
- }
+ intel_do_mmio_flip(mmio_flip->crtc);
+
+ i915_gem_request_unreference__unlocked(mmio_flip->req);
+ kfree(mmio_flip);
}
static int intel_queue_mmio_flip(struct drm_device *dev,
@@ -10815,12 +11010,18 @@ static int intel_queue_mmio_flip(struct drm_device *dev,
struct intel_engine_cs *ring,
uint32_t flags)
{
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+ struct intel_mmio_flip *mmio_flip;
+
+ mmio_flip = kmalloc(sizeof(*mmio_flip), GFP_KERNEL);
+ if (mmio_flip == NULL)
+ return -ENOMEM;
- i915_gem_request_assign(&intel_crtc->mmio_flip.req,
- obj->last_write_req);
+ mmio_flip->i915 = to_i915(dev);
+ mmio_flip->req = i915_gem_request_reference(obj->last_write_req);
+ mmio_flip->crtc = to_intel_crtc(crtc);
- schedule_work(&intel_crtc->mmio_flip.work);
+ INIT_WORK(&mmio_flip->work, intel_mmio_flip_work_func);
+ schedule_work(&mmio_flip->work);
return 0;
}
@@ -11005,7 +11206,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
ring = &dev_priv->ring[BCS];
} else if (INTEL_INFO(dev)->gen >= 7) {
- ring = i915_gem_request_get_ring(obj->last_read_req);
+ ring = i915_gem_request_get_ring(obj->last_write_req);
if (ring == NULL || ring->id != RCS)
ring = &dev_priv->ring[BCS];
} else {
@@ -11021,7 +11222,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
*/
ret = intel_pin_and_fence_fb_obj(crtc->primary, fb,
crtc->primary->state,
- mmio_flip ? i915_gem_request_get_ring(obj->last_read_req) : ring);
+ mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring);
if (ret)
goto cleanup_pending;
@@ -11037,6 +11238,12 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
i915_gem_request_assign(&work->flip_queued_req,
obj->last_write_req);
} else {
+ if (obj->last_write_req) {
+ ret = i915_gem_check_olr(obj->last_write_req);
+ if (ret)
+ goto cleanup_unpin;
+ }
+
ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring,
page_flip_flags);
if (ret)
@@ -11303,9 +11510,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
DRM_DEBUG_KMS("port clock: %d\n", pipe_config->port_clock);
DRM_DEBUG_KMS("pipe src size: %dx%d\n",
pipe_config->pipe_src_w, pipe_config->pipe_src_h);
- DRM_DEBUG_KMS("num_scalers: %d\n", crtc->num_scalers);
- DRM_DEBUG_KMS("scaler_users: 0x%x\n", pipe_config->scaler_state.scaler_users);
- DRM_DEBUG_KMS("scaler id: %d\n", pipe_config->scaler_state.scaler_id);
+ DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n",
+ crtc->num_scalers,
+ pipe_config->scaler_state.scaler_users,
+ pipe_config->scaler_state.scaler_id);
DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n",
pipe_config->gmch_pfit.control,
pipe_config->gmch_pfit.pgm_ratios,
@@ -11317,6 +11525,39 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
DRM_DEBUG_KMS("ips: %i\n", pipe_config->ips_enabled);
DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide);
+ if (IS_BROXTON(dev)) {
+ DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, "
+ "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, "
+ "pll6: 0x%x, pll8: 0x%x, pcsdw12: 0x%x\n",
+ pipe_config->ddi_pll_sel,
+ pipe_config->dpll_hw_state.ebb0,
+ pipe_config->dpll_hw_state.pll0,
+ pipe_config->dpll_hw_state.pll1,
+ pipe_config->dpll_hw_state.pll2,
+ pipe_config->dpll_hw_state.pll3,
+ pipe_config->dpll_hw_state.pll6,
+ pipe_config->dpll_hw_state.pll8,
+ pipe_config->dpll_hw_state.pcsdw12);
+ } else if (IS_SKYLAKE(dev)) {
+ DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: "
+ "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n",
+ pipe_config->ddi_pll_sel,
+ pipe_config->dpll_hw_state.ctrl1,
+ pipe_config->dpll_hw_state.cfgcr1,
+ pipe_config->dpll_hw_state.cfgcr2);
+ } else if (HAS_DDI(dev)) {
+ DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x\n",
+ pipe_config->ddi_pll_sel,
+ pipe_config->dpll_hw_state.wrpll);
+ } else {
+ DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, "
+ "fp0: 0x%x, fp1: 0x%x\n",
+ pipe_config->dpll_hw_state.dpll,
+ pipe_config->dpll_hw_state.dpll_md,
+ pipe_config->dpll_hw_state.fp0,
+ pipe_config->dpll_hw_state.fp1);
+ }
+
DRM_DEBUG_KMS("planes on this crtc\n");
list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
intel_plane = to_intel_plane(plane);
@@ -11454,12 +11695,18 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
struct intel_crtc_scaler_state scaler_state;
struct intel_dpll_hw_state dpll_hw_state;
enum intel_dpll_id shared_dpll;
+ uint32_t ddi_pll_sel;
+
+ /* FIXME: before the switch to atomic started, a new pipe_config was
+ * kzalloc'd. Code that depends on any field being zero should be
+ * fixed, so that the crtc_state can be safely duplicated. For now,
+ * only fields that are know to not cause problems are preserved. */
- /* Clear only the intel specific part of the crtc state excluding scalers */
tmp_state = crtc_state->base;
scaler_state = crtc_state->scaler_state;
shared_dpll = crtc_state->shared_dpll;
dpll_hw_state = crtc_state->dpll_hw_state;
+ ddi_pll_sel = crtc_state->ddi_pll_sel;
memset(crtc_state, 0, sizeof *crtc_state);
@@ -11467,6 +11714,7 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state)
crtc_state->scaler_state = scaler_state;
crtc_state->shared_dpll = shared_dpll;
crtc_state->dpll_hw_state = dpll_hw_state;
+ crtc_state->ddi_pll_sel = ddi_pll_sel;
}
static int
@@ -12264,8 +12512,6 @@ static int __intel_set_mode_setup_plls(struct drm_atomic_state *state)
if (needs_modeset(crtc_state)) {
clear_pipes |= 1 << intel_crtc->pipe;
intel_crtc_state->shared_dpll = DPLL_ID_PRIVATE;
- memset(&intel_crtc_state->dpll_hw_state, 0,
- sizeof(intel_crtc_state->dpll_hw_state));
}
}
@@ -12342,7 +12588,6 @@ static int __intel_set_mode(struct drm_crtc *modeset_crtc,
continue;
if (!crtc_state->enable) {
- crtc_state->active = false;
intel_crtc_disable(crtc);
} else if (crtc->state->enable) {
intel_crtc_disable_planes(crtc);
@@ -12492,7 +12737,8 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc)
continue;
}
- crtc_state->base.enable = intel_crtc->new_enabled;
+ crtc_state->base.active = crtc_state->base.enable =
+ intel_crtc->new_enabled;
if (&intel_crtc->base == crtc)
drm_mode_copy(&crtc_state->base.mode, &crtc->mode);
@@ -12617,11 +12863,16 @@ intel_modeset_stage_output_state(struct drm_device *dev,
}
for_each_crtc_in_state(state, crtc, crtc_state, i) {
+ bool has_connectors;
+
ret = drm_atomic_add_affected_connectors(state, crtc);
if (ret)
return ret;
- crtc_state->enable = drm_atomic_connectors_for_crtc(state, crtc);
+ has_connectors = !!drm_atomic_connectors_for_crtc(state, crtc);
+ if (has_connectors != crtc_state->enable)
+ crtc_state->enable =
+ crtc_state->active = has_connectors;
}
ret = intel_modeset_setup_plane_state(state, set->crtc, set->mode,
@@ -13008,8 +13259,11 @@ intel_check_primary_plane(struct drm_plane *plane,
intel_atomic_get_crtc_state(state->base.state, intel_crtc) : NULL;
if (INTEL_INFO(dev)->gen >= 9) {
- min_scale = 1;
- max_scale = skl_max_scale(intel_crtc, crtc_state);
+ /* use scaler when colorkey is not required */
+ if (to_intel_plane(plane)->ckey.flags == I915_SET_COLORKEY_NONE) {
+ min_scale = 1;
+ max_scale = skl_max_scale(intel_crtc, crtc_state);
+ }
can_position = true;
}
@@ -13251,8 +13505,8 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
primary->max_downscale = 1;
if (INTEL_INFO(dev)->gen >= 9) {
primary->can_scale = true;
+ state->scaler_id = -1;
}
- state->scaler_id = -1;
primary->pipe = pipe;
primary->plane = pipe;
primary->check_plane = intel_check_primary_plane;
@@ -13262,12 +13516,15 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev,
if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4)
primary->plane = !pipe;
- if (INTEL_INFO(dev)->gen <= 3) {
- intel_primary_formats = intel_primary_formats_gen2;
- num_formats = ARRAY_SIZE(intel_primary_formats_gen2);
+ if (INTEL_INFO(dev)->gen >= 9) {
+ intel_primary_formats = skl_primary_formats;
+ num_formats = ARRAY_SIZE(skl_primary_formats);
+ } else if (INTEL_INFO(dev)->gen >= 4) {
+ intel_primary_formats = i965_primary_formats;
+ num_formats = ARRAY_SIZE(i965_primary_formats);
} else {
- intel_primary_formats = intel_primary_formats_gen4;
- num_formats = ARRAY_SIZE(intel_primary_formats_gen4);
+ intel_primary_formats = i8xx_primary_formats;
+ num_formats = ARRAY_SIZE(i8xx_primary_formats);
}
drm_universal_plane_init(dev, &primary->base, 0,
@@ -13434,7 +13691,6 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
cursor->max_downscale = 1;
cursor->pipe = pipe;
cursor->plane = pipe;
- state->scaler_id = -1;
cursor->check_plane = intel_check_cursor_plane;
cursor->commit_plane = intel_commit_cursor_plane;
cursor->disable_plane = intel_disable_cursor_plane;
@@ -13457,6 +13713,9 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev,
state->base.rotation);
}
+ if (INTEL_INFO(dev)->gen >=9)
+ state->scaler_id = -1;
+
drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs);
return &cursor->base;
@@ -13550,8 +13809,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base;
dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
- INIT_WORK(&intel_crtc->mmio_flip.work, intel_mmio_flip_work_func);
-
drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe);
@@ -13948,25 +14205,35 @@ static int intel_framebuffer_init(struct drm_device *dev,
case DRM_FORMAT_ARGB8888:
break;
case DRM_FORMAT_XRGB1555:
- case DRM_FORMAT_ARGB1555:
if (INTEL_INFO(dev)->gen > 3) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
return -EINVAL;
}
break;
- case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_ABGR8888:
+ if (!IS_VALLEYVIEW(dev) && INTEL_INFO(dev)->gen < 9) {
+ DRM_DEBUG("unsupported pixel format: %s\n",
+ drm_get_format_name(mode_cmd->pixel_format));
+ return -EINVAL;
+ }
+ break;
+ case DRM_FORMAT_XBGR8888:
case DRM_FORMAT_XRGB2101010:
- case DRM_FORMAT_ARGB2101010:
case DRM_FORMAT_XBGR2101010:
- case DRM_FORMAT_ABGR2101010:
if (INTEL_INFO(dev)->gen < 4) {
DRM_DEBUG("unsupported pixel format: %s\n",
drm_get_format_name(mode_cmd->pixel_format));
return -EINVAL;
}
break;
+ case DRM_FORMAT_ABGR2101010:
+ if (!IS_VALLEYVIEW(dev)) {
+ DRM_DEBUG("unsupported pixel format: %s\n",
+ drm_get_format_name(mode_cmd->pixel_format));
+ return -EINVAL;
+ }
+ break;
case DRM_FORMAT_YUYV:
case DRM_FORMAT_UYVY:
case DRM_FORMAT_YVYU:
@@ -14595,6 +14862,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
WARN_ON(crtc->active);
crtc->base.state->enable = false;
+ crtc->base.state->active = false;
crtc->base.enabled = false;
}
@@ -14623,6 +14891,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
crtc->active ? "enabled" : "disabled");
crtc->base.state->enable = crtc->active;
+ crtc->base.state->active = crtc->active;
crtc->base.enabled = crtc->active;
/* Because we only establish the connector -> encoder ->
@@ -14761,6 +15030,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
crtc->config);
crtc->base.state->enable = crtc->active;
+ crtc->base.state->active = crtc->active;
crtc->base.enabled = crtc->active;
plane_state = to_intel_plane_state(primary->state);
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index eca82cff40b9..280c282da9bd 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -1097,6 +1097,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock)
{
u32 ctrl1;
+ memset(&pipe_config->dpll_hw_state, 0,
+ sizeof(pipe_config->dpll_hw_state));
+
pipe_config->ddi_pll_sel = SKL_DPLL0;
pipe_config->dpll_hw_state.cfgcr1 = 0;
pipe_config->dpll_hw_state.cfgcr2 = 0;
@@ -1266,7 +1269,7 @@ static void snprintf_int_array(char *str, size_t len,
str[0] = '\0';
for (i = 0; i < nelem; i++) {
- int r = snprintf(str, len, "%d,", array[i]);
+ int r = snprintf(str, len, "%s%d", i ? ", " : "", array[i]);
if (r >= len)
return;
str += r;
@@ -1567,7 +1570,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
/* Split out the IBX/CPU vs CPT settings */
- if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
+ if (IS_GEN7(dev) && port == PORT_A) {
if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
intel_dp->DP |= DP_SYNC_HS_HIGH;
if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
@@ -1578,7 +1581,18 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
intel_dp->DP |= DP_ENHANCED_FRAMING;
intel_dp->DP |= crtc->pipe << 29;
- } else if (!HAS_PCH_CPT(dev) || port == PORT_A) {
+ } else if (HAS_PCH_CPT(dev) && port != PORT_A) {
+ u32 trans_dp;
+
+ intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
+
+ trans_dp = I915_READ(TRANS_DP_CTL(crtc->pipe));
+ if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
+ trans_dp |= TRANS_DP_ENH_FRAMING;
+ else
+ trans_dp &= ~TRANS_DP_ENH_FRAMING;
+ I915_WRITE(TRANS_DP_CTL(crtc->pipe), trans_dp);
+ } else {
if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev))
intel_dp->DP |= intel_dp->color_range;
@@ -1591,14 +1605,10 @@ static void intel_dp_prepare(struct intel_encoder *encoder)
if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
intel_dp->DP |= DP_ENHANCED_FRAMING;
- if (!IS_CHERRYVIEW(dev)) {
- if (crtc->pipe == 1)
- intel_dp->DP |= DP_PIPEB_SELECT;
- } else {
+ if (IS_CHERRYVIEW(dev))
intel_dp->DP |= DP_PIPE_SELECT_CHV(crtc->pipe);
- }
- } else {
- intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
+ else if (crtc->pipe == PIPE_B)
+ intel_dp->DP |= DP_PIPEB_SELECT;
}
}
@@ -2182,41 +2192,25 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
if (!(tmp & DP_PORT_EN))
return false;
- if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) {
+ if (IS_GEN7(dev) && port == PORT_A) {
*pipe = PORT_TO_PIPE_CPT(tmp);
- } else if (IS_CHERRYVIEW(dev)) {
- *pipe = DP_PORT_TO_PIPE_CHV(tmp);
- } else if (!HAS_PCH_CPT(dev) || port == PORT_A) {
- *pipe = PORT_TO_PIPE(tmp);
- } else {
- u32 trans_sel;
- u32 trans_dp;
- int i;
-
- switch (intel_dp->output_reg) {
- case PCH_DP_B:
- trans_sel = TRANS_DP_PORT_SEL_B;
- break;
- case PCH_DP_C:
- trans_sel = TRANS_DP_PORT_SEL_C;
- break;
- case PCH_DP_D:
- trans_sel = TRANS_DP_PORT_SEL_D;
- break;
- default:
- return true;
- }
+ } else if (HAS_PCH_CPT(dev) && port != PORT_A) {
+ enum pipe p;
- for_each_pipe(dev_priv, i) {
- trans_dp = I915_READ(TRANS_DP_CTL(i));
- if ((trans_dp & TRANS_DP_PORT_SEL_MASK) == trans_sel) {
- *pipe = i;
+ for_each_pipe(dev_priv, p) {
+ u32 trans_dp = I915_READ(TRANS_DP_CTL(p));
+ if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) {
+ *pipe = p;
return true;
}
}
DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n",
intel_dp->output_reg);
+ } else if (IS_CHERRYVIEW(dev)) {
+ *pipe = DP_PORT_TO_PIPE_CHV(tmp);
+ } else {
+ *pipe = PORT_TO_PIPE(tmp);
}
return true;
@@ -2237,24 +2231,24 @@ static void intel_dp_get_config(struct intel_encoder *encoder,
pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A;
- if ((port == PORT_A) || !HAS_PCH_CPT(dev)) {
- if (tmp & DP_SYNC_HS_HIGH)
+ if (HAS_PCH_CPT(dev) && port != PORT_A) {
+ tmp = I915_READ(TRANS_DP_CTL(crtc->pipe));
+ if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH)
flags |= DRM_MODE_FLAG_PHSYNC;
else
flags |= DRM_MODE_FLAG_NHSYNC;
- if (tmp & DP_SYNC_VS_HIGH)
+ if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH)
flags |= DRM_MODE_FLAG_PVSYNC;
else
flags |= DRM_MODE_FLAG_NVSYNC;
} else {
- tmp = I915_READ(TRANS_DP_CTL(crtc->pipe));
- if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH)
+ if (tmp & DP_SYNC_HS_HIGH)
flags |= DRM_MODE_FLAG_PHSYNC;
else
flags |= DRM_MODE_FLAG_NHSYNC;
- if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH)
+ if (tmp & DP_SYNC_VS_HIGH)
flags |= DRM_MODE_FLAG_PVSYNC;
else
flags |= DRM_MODE_FLAG_NVSYNC;
@@ -2361,7 +2355,7 @@ static void chv_post_disable_dp(struct intel_encoder *encoder)
intel_dp_link_down(intel_dp);
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* Propagate soft reset to data lane reset */
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
@@ -2380,7 +2374,7 @@ static void chv_post_disable_dp(struct intel_encoder *encoder)
val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void
@@ -2419,7 +2413,8 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp,
}
I915_WRITE(DP_TP_CTL(port), temp);
- } else if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) {
+ } else if ((IS_GEN7(dev) && port == PORT_A) ||
+ (HAS_PCH_CPT(dev) && port != PORT_A)) {
*DP &= ~DP_LINK_TRAIN_MASK_CPT;
switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
@@ -2676,7 +2671,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder)
int pipe = intel_crtc->pipe;
u32 val;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
val = 0;
@@ -2689,7 +2684,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder)
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
intel_enable_dp(encoder);
}
@@ -2707,7 +2702,7 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder)
intel_dp_prepare(encoder);
/* Program Tx lane resets to default */
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
DPIO_PCS_TX_LANE2_RESET |
DPIO_PCS_TX_LANE1_RESET);
@@ -2721,7 +2716,7 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder)
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00);
vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500);
vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void chv_pre_enable_dp(struct intel_encoder *encoder)
@@ -2737,7 +2732,7 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder)
int data, i, stagger;
u32 val;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* allow hardware to manage TX FIFO reset source */
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
@@ -2807,7 +2802,7 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder)
DPIO_TX1_STAGGER_MULT(7) |
DPIO_TX2_STAGGER_MULT(5));
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
intel_enable_dp(encoder);
}
@@ -2825,7 +2820,7 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder)
intel_dp_prepare(encoder);
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* program left/right clock distribution */
if (pipe != PIPE_B) {
@@ -2875,7 +2870,7 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder)
val |= CHV_CMN_USEDCLKCHANNEL;
vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
/*
@@ -3100,7 +3095,7 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp)
return 0;
}
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000);
vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value);
vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port),
@@ -3109,7 +3104,7 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp)
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value);
vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x80000000);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
return 0;
}
@@ -3196,7 +3191,7 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp)
return 0;
}
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* Clear calc init */
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch));
@@ -3283,7 +3278,7 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp)
val |= DPIO_LRC_BYPASS;
vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
return 0;
}
@@ -3848,6 +3843,7 @@ static void
intel_dp_link_down(struct intel_dp *intel_dp)
{
struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+ struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc);
enum port port = intel_dig_port->port;
struct drm_device *dev = intel_dig_port->base.base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3861,36 +3857,41 @@ intel_dp_link_down(struct intel_dp *intel_dp)
DRM_DEBUG_KMS("\n");
- if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) {
+ if ((IS_GEN7(dev) && port == PORT_A) ||
+ (HAS_PCH_CPT(dev) && port != PORT_A)) {
DP &= ~DP_LINK_TRAIN_MASK_CPT;
- I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT);
+ DP |= DP_LINK_TRAIN_PAT_IDLE_CPT;
} else {
if (IS_CHERRYVIEW(dev))
DP &= ~DP_LINK_TRAIN_MASK_CHV;
else
DP &= ~DP_LINK_TRAIN_MASK;
- I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE);
+ DP |= DP_LINK_TRAIN_PAT_IDLE;
}
+ I915_WRITE(intel_dp->output_reg, DP);
POSTING_READ(intel_dp->output_reg);
- if (HAS_PCH_IBX(dev) &&
- I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) {
- /* Hardware workaround: leaving our transcoder select
- * set to transcoder B while it's off will prevent the
- * corresponding HDMI output on transcoder A.
- *
- * Combine this with another hardware workaround:
- * transcoder select bit can only be cleared while the
- * port is enabled.
- */
- DP &= ~DP_PIPEB_SELECT;
+ DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE);
+ I915_WRITE(intel_dp->output_reg, DP);
+ POSTING_READ(intel_dp->output_reg);
+
+ /*
+ * HW workaround for IBX, we need to move the port
+ * to transcoder A after disabling it to allow the
+ * matching HDMI port to be enabled on transcoder A.
+ */
+ if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B && port != PORT_A) {
+ /* always enable with pattern 1 (as per spec) */
+ DP &= ~(DP_PIPEB_SELECT | DP_LINK_TRAIN_MASK);
+ DP |= DP_PORT_EN | DP_LINK_TRAIN_PAT_1;
+ I915_WRITE(intel_dp->output_reg, DP);
+ POSTING_READ(intel_dp->output_reg);
+
+ DP &= ~DP_PORT_EN;
I915_WRITE(intel_dp->output_reg, DP);
POSTING_READ(intel_dp->output_reg);
}
- DP &= ~DP_AUDIO_OUTPUT_ENABLE;
- I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN);
- POSTING_READ(intel_dp->output_reg);
msleep(intel_dp->panel_power_down_delay);
}
@@ -4040,46 +4041,70 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
u8 buf;
int test_crc_count;
int attempts = 6;
+ int ret = 0;
- if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
- return -EIO;
+ hsw_disable_ips(intel_crtc);
+
+ if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
+ ret = -EIO;
+ goto out;
+ }
- if (!(buf & DP_TEST_CRC_SUPPORTED))
- return -ENOTTY;
+ if (!(buf & DP_TEST_CRC_SUPPORTED)) {
+ ret = -ENOTTY;
+ goto out;
+ }
- if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
- return -EIO;
+ if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+ ret = -EIO;
+ goto out;
+ }
if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
- buf | DP_TEST_SINK_START) < 0)
- return -EIO;
+ buf | DP_TEST_SINK_START) < 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) {
+ ret = -EIO;
+ goto out;
+ }
- if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0)
- return -EIO;
test_crc_count = buf & DP_TEST_COUNT_MASK;
do {
if (drm_dp_dpcd_readb(&intel_dp->aux,
- DP_TEST_SINK_MISC, &buf) < 0)
- return -EIO;
+ DP_TEST_SINK_MISC, &buf) < 0) {
+ ret = -EIO;
+ goto out;
+ }
intel_wait_for_vblank(dev, intel_crtc->pipe);
} while (--attempts && (buf & DP_TEST_COUNT_MASK) == test_crc_count);
if (attempts == 0) {
DRM_DEBUG_KMS("Panel is unable to calculate CRC after 6 vblanks\n");
- return -ETIMEDOUT;
+ ret = -ETIMEDOUT;
+ goto out;
}
- if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0)
- return -EIO;
+ if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) {
+ ret = -EIO;
+ goto out;
+ }
- if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0)
- return -EIO;
+ if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) {
+ ret = -EIO;
+ goto out;
+ }
if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
- buf & ~DP_TEST_SINK_START) < 0)
- return -EIO;
-
- return 0;
+ buf & ~DP_TEST_SINK_START) < 0) {
+ ret = -EIO;
+ goto out;
+ }
+out:
+ hsw_enable_ips(intel_crtc);
+ return ret;
}
static bool
@@ -4142,7 +4167,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp)
if (!drm_dp_dpcd_write(&intel_dp->aux,
DP_TEST_EDID_CHECKSUM,
&intel_connector->detect_edid->checksum,
- 1));
+ 1))
DRM_DEBUG_KMS("Failed to write EDID checksum\n");
test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE;
@@ -5814,12 +5839,10 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
intel_dp_aux_init(intel_dp, intel_connector);
/* init MST on ports that can support it */
- if (IS_HASWELL(dev) || IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9) {
- if (port == PORT_B || port == PORT_C || port == PORT_D) {
- intel_dp_mst_encoder_init(intel_dig_port,
- intel_connector->base.base.id);
- }
- }
+ if (HAS_DP_MST(dev) &&
+ (port == PORT_B || port == PORT_C || port == PORT_D))
+ intel_dp_mst_encoder_init(intel_dig_port,
+ intel_connector->base.base.id);
if (!intel_edp_init_connector(intel_dp, intel_connector)) {
drm_dp_aux_unregister(&intel_dp->aux);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ea3368e83626..2afb31a46275 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -459,8 +459,10 @@ struct intel_pipe_wm {
};
struct intel_mmio_flip {
- struct drm_i915_gem_request *req;
struct work_struct work;
+ struct drm_i915_private *i915;
+ struct drm_i915_gem_request *req;
+ struct intel_crtc *crtc;
};
struct skl_pipe_wm {
@@ -544,7 +546,6 @@ struct intel_crtc {
} wm;
int scanline_offset;
- struct intel_mmio_flip mmio_flip;
struct intel_crtc_atomic_commit atomic;
@@ -555,7 +556,15 @@ struct intel_crtc {
struct intel_plane_wm_parameters {
uint32_t horiz_pixels;
uint32_t vert_pixels;
+ /*
+ * For packed pixel formats:
+ * bytes_per_pixel - holds bytes per pixel
+ * For planar pixel formats:
+ * bytes_per_pixel - holds bytes per pixel for uv-plane
+ * y_bytes_per_pixel - holds bytes per pixel for y-plane
+ */
uint8_t bytes_per_pixel;
+ uint8_t y_bytes_per_pixel;
bool enabled;
bool scaled;
u64 tiling;
@@ -1059,9 +1068,6 @@ intel_rotation_90_or_270(unsigned int rotation)
return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270));
}
-unsigned int
-intel_tile_height(struct drm_device *dev, uint32_t bits_per_pixel,
- uint64_t fb_modifier);
void intel_create_rotation_property(struct drm_device *dev,
struct intel_plane *plane);
@@ -1112,6 +1118,8 @@ void broxton_ddi_phy_init(struct drm_device *dev);
void broxton_ddi_phy_uninit(struct drm_device *dev);
void bxt_enable_dc9(struct drm_i915_private *dev_priv);
void bxt_disable_dc9(struct drm_i915_private *dev_priv);
+void skl_init_cdclk(struct drm_i915_private *dev_priv);
+void skl_uninit_cdclk(struct drm_i915_private *dev_priv);
void intel_dp_get_m_n(struct intel_crtc *crtc,
struct intel_crtc_state *pipe_config);
void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n);
@@ -1359,9 +1367,10 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct drm_i915_private *dev_priv,
- struct drm_i915_file_private *file_priv);
+ struct intel_rps_client *rps,
+ unsigned long submitted);
void intel_queue_rps_boost_for_request(struct drm_device *dev,
- struct drm_i915_gem_request *rq);
+ struct drm_i915_gem_request *req);
void ilk_wm_get_hw_state(struct drm_device *dev);
void skl_wm_get_hw_state(struct drm_device *dev);
void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
@@ -1374,8 +1383,6 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob);
/* intel_sprite.c */
int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane);
-void intel_flush_primary_plane(struct drm_i915_private *dev_priv,
- enum plane plane);
int intel_plane_restore(struct drm_plane *plane);
int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 51966426addf..b5a5558ecd63 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -239,7 +239,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs,
static void band_gap_reset(struct drm_i915_private *dev_priv)
{
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_flisdsi_write(dev_priv, 0x08, 0x0001);
vlv_flisdsi_write(dev_priv, 0x0F, 0x0005);
@@ -248,7 +248,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv)
vlv_flisdsi_write(dev_priv, 0x0F, 0x0000);
vlv_flisdsi_write(dev_priv, 0x08, 0x0000);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static inline bool is_vid_mode(struct intel_dsi *intel_dsi)
@@ -346,11 +346,11 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder)
DRM_DEBUG_KMS("\n");
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* program rcomp for compliance, reduce from 50 ohms to 45 ohms
* needed everytime after power gate */
vlv_flisdsi_write(dev_priv, 0x04, 0x0004);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
/* bandgap reset is needed after everytime we do power gate */
band_gap_reset(dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
index d2cd8d5b27a1..a5e99ac305da 100644
--- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
@@ -212,7 +212,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
function = gtable[gpio].function_reg;
pad = gtable[gpio].pad_reg;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
if (!gtable[gpio].init) {
/* program the function */
/* FIXME: remove constant below */
@@ -224,7 +224,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data)
/* pull up/down */
vlv_gpio_nc_write(dev_priv, pad, val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
return data;
}
diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c
index 3622d0bafdf8..d20cf37b6901 100644
--- a/drivers/gpu/drm/i915/intel_dsi_pll.c
+++ b/drivers/gpu/drm/i915/intel_dsi_pll.c
@@ -162,59 +162,41 @@ static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count)
#endif
-static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp)
+static int dsi_calc_mnp(int target_dsi_clk, struct dsi_mnp *dsi_mnp)
{
- u32 m, n, p;
- u32 ref_clk;
- u32 error;
- u32 tmp_error;
- int target_dsi_clk;
- int calc_dsi_clk;
- u32 calc_m;
- u32 calc_p;
+ unsigned int calc_m = 0, calc_p = 0;
+ unsigned int m, n = 1, p;
+ int ref_clk = 25000;
+ int delta = target_dsi_clk;
u32 m_seed;
- /* dsi_clk is expected in KHZ */
- if (dsi_clk < 300000 || dsi_clk > 1150000) {
+ /* target_dsi_clk is expected in kHz */
+ if (target_dsi_clk < 300000 || target_dsi_clk > 1150000) {
DRM_ERROR("DSI CLK Out of Range\n");
return -ECHRNG;
}
- ref_clk = 25000;
- target_dsi_clk = dsi_clk;
- error = 0xFFFFFFFF;
- tmp_error = 0xFFFFFFFF;
- calc_m = 0;
- calc_p = 0;
-
- for (m = 62; m <= 92; m++) {
- for (p = 2; p <= 6; p++) {
- /* Find the optimal m and p divisors
- with minimal error +/- the required clock */
- calc_dsi_clk = (m * ref_clk) / p;
- if (calc_dsi_clk == target_dsi_clk) {
- calc_m = m;
- calc_p = p;
- error = 0;
- break;
- } else
- tmp_error = abs(target_dsi_clk - calc_dsi_clk);
-
- if (tmp_error < error) {
- error = tmp_error;
+ for (m = 62; m <= 92 && delta; m++) {
+ for (p = 2; p <= 6 && delta; p++) {
+ /*
+ * Find the optimal m and p divisors with minimal delta
+ * +/- the required clock
+ */
+ int calc_dsi_clk = (m * ref_clk) / (p * n);
+ int d = abs(target_dsi_clk - calc_dsi_clk);
+ if (d < delta) {
+ delta = d;
calc_m = m;
calc_p = p;
}
}
-
- if (error == 0)
- break;
}
+ /* register has log2(N1), this works fine for powers of two */
+ n = ffs(n) - 1;
m_seed = lfsr_converts[calc_m - 62];
- n = 1;
dsi_mnp->dsi_pll_ctrl = 1 << (DSI_PLL_P1_POST_DIV_SHIFT + calc_p - 2);
- dsi_mnp->dsi_pll_div = (n - 1) << DSI_PLL_N1_DIV_SHIFT |
+ dsi_mnp->dsi_pll_div = n << DSI_PLL_N1_DIV_SHIFT |
m_seed << DSI_PLL_M1_DIV_SHIFT;
return 0;
@@ -262,7 +244,7 @@ void vlv_enable_dsi_pll(struct intel_encoder *encoder)
DRM_DEBUG_KMS("\n");
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_configure_dsi_pll(encoder);
@@ -276,11 +258,11 @@ void vlv_enable_dsi_pll(struct intel_encoder *encoder)
if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) &
DSI_PLL_LOCK, 20)) {
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
DRM_ERROR("DSI PLL lock failed\n");
return;
}
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
DRM_DEBUG_KMS("DSI PLL locked\n");
}
@@ -292,14 +274,14 @@ void vlv_disable_dsi_pll(struct intel_encoder *encoder)
DRM_DEBUG_KMS("\n");
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
tmp &= ~DSI_PLL_VCO_EN;
tmp |= DSI_PLL_LDO_GATE;
vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void assert_bpp_mismatch(int pixel_format, int pipe_bpp)
@@ -331,21 +313,25 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
u32 dsi_clock, pclk;
u32 pll_ctl, pll_div;
- u32 m = 0, p = 0;
+ u32 m = 0, p = 0, n;
int refclk = 25000;
int i;
DRM_DEBUG_KMS("\n");
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL);
pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
/* mask out other bits and extract the P1 divisor */
pll_ctl &= DSI_PLL_P1_POST_DIV_MASK;
pll_ctl = pll_ctl >> (DSI_PLL_P1_POST_DIV_SHIFT - 2);
+ /* N1 divisor */
+ n = (pll_div & DSI_PLL_N1_DIV_MASK) >> DSI_PLL_N1_DIV_SHIFT;
+ n = 1 << n; /* register has log2(N1) */
+
/* mask out the other bits and extract the M1 divisor */
pll_div &= DSI_PLL_M1_DIV_MASK;
pll_div = pll_div >> DSI_PLL_M1_DIV_SHIFT;
@@ -373,7 +359,7 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
m = i + 62;
- dsi_clock = (m * refclk) / p;
+ dsi_clock = (m * refclk) / (p * n);
/* pixel_format and pipe_bpp should agree */
assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 4e7e7da2e03b..6372cfc7d053 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -96,6 +96,32 @@ static int intel_fbdev_blank(int blank, struct fb_info *info)
return ret;
}
+static int intel_fbdev_pan_display(struct fb_var_screeninfo *var,
+ struct fb_info *info)
+{
+ struct drm_fb_helper *fb_helper = info->par;
+ struct intel_fbdev *ifbdev =
+ container_of(fb_helper, struct intel_fbdev, helper);
+
+ int ret;
+ ret = drm_fb_helper_pan_display(var, info);
+
+ if (ret == 0) {
+ /*
+ * FIXME: fbdev presumes that all callbacks also work from
+ * atomic contexts and relies on that for emergency oops
+ * printing. KMS totally doesn't do that and the locking here is
+ * by far not the only place this goes wrong. Ignore this for
+ * now until we solve this for real.
+ */
+ mutex_lock(&fb_helper->dev->struct_mutex);
+ intel_fb_obj_invalidate(ifbdev->fb->obj, NULL, ORIGIN_GTT);
+ mutex_unlock(&fb_helper->dev->struct_mutex);
+ }
+
+ return ret;
+}
+
static struct fb_ops intelfb_ops = {
.owner = THIS_MODULE,
.fb_check_var = drm_fb_helper_check_var,
@@ -103,7 +129,7 @@ static struct fb_ops intelfb_ops = {
.fb_fillrect = cfb_fillrect,
.fb_copyarea = cfb_copyarea,
.fb_imageblit = cfb_imageblit,
- .fb_pan_display = drm_fb_helper_pan_display,
+ .fb_pan_display = intel_fbdev_pan_display,
.fb_blank = intel_fbdev_blank,
.fb_setcmap = drm_fb_helper_setcmap,
.fb_debug_enter = drm_fb_helper_debug_enter,
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index d04e6dc97fe5..e97731aab6dc 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -873,59 +873,59 @@ static void intel_disable_hdmi(struct intel_encoder *encoder)
struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
u32 temp;
- u32 enable_bits = SDVO_ENABLE | SDVO_AUDIO_ENABLE;
-
- if (crtc->config->has_audio)
- intel_audio_codec_disable(encoder);
temp = I915_READ(intel_hdmi->hdmi_reg);
- /* HW workaround for IBX, we need to move the port to transcoder A
- * before disabling it. */
- if (HAS_PCH_IBX(dev)) {
- struct drm_crtc *crtc = encoder->base.crtc;
- int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
-
- if (temp & SDVO_PIPE_B_SELECT) {
- temp &= ~SDVO_PIPE_B_SELECT;
- I915_WRITE(intel_hdmi->hdmi_reg, temp);
- POSTING_READ(intel_hdmi->hdmi_reg);
-
- /* Again we need to write this twice. */
- I915_WRITE(intel_hdmi->hdmi_reg, temp);
- POSTING_READ(intel_hdmi->hdmi_reg);
-
- /* Transcoder selection bits only update
- * effectively on vblank. */
- if (crtc)
- intel_wait_for_vblank(dev, pipe);
- else
- msleep(50);
- }
- }
-
- /* HW workaround, need to toggle enable bit off and on for 12bpc, but
- * we do this anyway which shows more stable in testing.
- */
- if (HAS_PCH_SPLIT(dev)) {
- I915_WRITE(intel_hdmi->hdmi_reg, temp & ~SDVO_ENABLE);
- POSTING_READ(intel_hdmi->hdmi_reg);
- }
-
- temp &= ~enable_bits;
-
+ temp &= ~(SDVO_ENABLE | SDVO_AUDIO_ENABLE);
I915_WRITE(intel_hdmi->hdmi_reg, temp);
POSTING_READ(intel_hdmi->hdmi_reg);
- /* HW workaround, need to write this twice for issue that may result
- * in first write getting masked.
+ /*
+ * HW workaround for IBX, we need to move the port
+ * to transcoder A after disabling it to allow the
+ * matching DP port to be enabled on transcoder A.
*/
- if (HAS_PCH_SPLIT(dev)) {
+ if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B) {
+ temp &= ~SDVO_PIPE_B_SELECT;
+ temp |= SDVO_ENABLE;
+ /*
+ * HW workaround, need to write this twice for issue
+ * that may result in first write getting masked.
+ */
+ I915_WRITE(intel_hdmi->hdmi_reg, temp);
+ POSTING_READ(intel_hdmi->hdmi_reg);
+ I915_WRITE(intel_hdmi->hdmi_reg, temp);
+ POSTING_READ(intel_hdmi->hdmi_reg);
+
+ temp &= ~SDVO_ENABLE;
I915_WRITE(intel_hdmi->hdmi_reg, temp);
POSTING_READ(intel_hdmi->hdmi_reg);
}
}
+static void g4x_disable_hdmi(struct intel_encoder *encoder)
+{
+ struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+
+ if (crtc->config->has_audio)
+ intel_audio_codec_disable(encoder);
+
+ intel_disable_hdmi(encoder);
+}
+
+static void pch_disable_hdmi(struct intel_encoder *encoder)
+{
+ struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+
+ if (crtc->config->has_audio)
+ intel_audio_codec_disable(encoder);
+}
+
+static void pch_post_disable_hdmi(struct intel_encoder *encoder)
+{
+ intel_disable_hdmi(encoder);
+}
+
static int hdmi_portclock_limit(struct intel_hdmi *hdmi, bool respect_dvi_limit)
{
struct drm_device *dev = intel_hdmi_to_dev(hdmi);
@@ -1036,7 +1036,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
*/
if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink &&
clock_12bpc <= portclock_limit &&
- hdmi_12bpc_possible(pipe_config)) {
+ hdmi_12bpc_possible(pipe_config) &&
+ 0 /* FIXME 12bpc support totally broken */) {
DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n");
desired_bpp = 12*3;
@@ -1293,7 +1294,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder)
u32 val;
/* Enable clock channels for this port */
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port));
val = 0;
if (pipe)
@@ -1316,7 +1317,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder)
/* Program lane clock */
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
intel_hdmi->set_infoframes(&encoder->base,
intel_crtc->config->has_hdmi_sink,
@@ -1340,7 +1341,7 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
intel_hdmi_prepare(encoder);
/* Program Tx lane resets to default */
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port),
DPIO_PCS_TX_LANE2_RESET |
DPIO_PCS_TX_LANE1_RESET);
@@ -1357,7 +1358,7 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), 0x00002000);
vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
@@ -1373,7 +1374,7 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
intel_hdmi_prepare(encoder);
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* program left/right clock distribution */
if (pipe != PIPE_B) {
@@ -1423,7 +1424,7 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
val |= CHV_CMN_USEDCLKCHANNEL;
vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void vlv_hdmi_post_disable(struct intel_encoder *encoder)
@@ -1436,10 +1437,10 @@ static void vlv_hdmi_post_disable(struct intel_encoder *encoder)
int pipe = intel_crtc->pipe;
/* Reset lanes to avoid HDMI flicker (VLV w/a) */
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000);
vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void chv_hdmi_post_disable(struct intel_encoder *encoder)
@@ -1453,7 +1454,7 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder)
enum pipe pipe = intel_crtc->pipe;
u32 val;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* Propagate soft reset to data lane reset */
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch));
@@ -1472,7 +1473,7 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder)
val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET);
vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
@@ -1490,7 +1491,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
int data, i, stagger;
u32 val;
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
/* allow hardware to manage TX FIFO reset source */
val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch));
@@ -1633,7 +1634,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
val |= DPIO_LRC_BYPASS;
vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
intel_hdmi->set_infoframes(&encoder->base,
intel_crtc->config->has_hdmi_sink,
@@ -1806,7 +1807,12 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port)
DRM_MODE_ENCODER_TMDS);
intel_encoder->compute_config = intel_hdmi_compute_config;
- intel_encoder->disable = intel_disable_hdmi;
+ if (HAS_PCH_SPLIT(dev)) {
+ intel_encoder->disable = pch_disable_hdmi;
+ intel_encoder->post_disable = pch_post_disable_hdmi;
+ } else {
+ intel_encoder->disable = g4x_disable_hdmi;
+ }
intel_encoder->get_hw_state = intel_hdmi_get_hw_state;
intel_encoder->get_config = intel_hdmi_get_config;
if (IS_CHERRYVIEW(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 3daa7e322326..92072f56e418 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -49,6 +49,19 @@ static const struct gmbus_pin gmbus_pins[] = {
[GMBUS_PIN_DPD] = { "dpd", GPIOF },
};
+static const struct gmbus_pin gmbus_pins_bdw[] = {
+ [GMBUS_PIN_VGADDC] = { "vga", GPIOA },
+ [GMBUS_PIN_DPC] = { "dpc", GPIOD },
+ [GMBUS_PIN_DPB] = { "dpb", GPIOE },
+ [GMBUS_PIN_DPD] = { "dpd", GPIOF },
+};
+
+static const struct gmbus_pin gmbus_pins_skl[] = {
+ [GMBUS_PIN_DPC] = { "dpc", GPIOD },
+ [GMBUS_PIN_DPB] = { "dpb", GPIOE },
+ [GMBUS_PIN_DPD] = { "dpd", GPIOF },
+};
+
static const struct gmbus_pin gmbus_pins_bxt[] = {
[GMBUS_PIN_1_BXT] = { "dpb", PCH_GPIOB },
[GMBUS_PIN_2_BXT] = { "dpc", PCH_GPIOC },
@@ -61,6 +74,10 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv,
{
if (IS_BROXTON(dev_priv))
return &gmbus_pins_bxt[pin];
+ else if (IS_SKYLAKE(dev_priv))
+ return &gmbus_pins_skl[pin];
+ else if (IS_BROADWELL(dev_priv))
+ return &gmbus_pins_bdw[pin];
else
return &gmbus_pins[pin];
}
@@ -72,6 +89,10 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv,
if (IS_BROXTON(dev_priv))
size = ARRAY_SIZE(gmbus_pins_bxt);
+ else if (IS_SKYLAKE(dev_priv))
+ size = ARRAY_SIZE(gmbus_pins_skl);
+ else if (IS_BROADWELL(dev_priv))
+ size = ARRAY_SIZE(gmbus_pins_bdw);
else
size = ARRAY_SIZE(gmbus_pins);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0fa9209ff556..9f5485ddcbe6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -394,6 +394,12 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
assert_spin_locked(&ring->execlist_lock);
+ /*
+ * If irqs are not active generate a warning as batches that finish
+ * without the irqs may get lost and a GPU Hang may occur.
+ */
+ WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
+
if (list_empty(&ring->execlist_queue))
return;
@@ -421,7 +427,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
* WaIdleLiteRestore: make sure we never cause a lite
* restore with HEAD==TAIL
*/
- if (req0 && req0->elsp_submitted) {
+ if (req0->elsp_submitted) {
/*
* Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
* as we resubmit the request. See gen8_emit_request()
@@ -622,6 +628,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
struct list_head *vmas)
{
struct intel_engine_cs *ring = ringbuf->ring;
+ const unsigned other_rings = ~intel_ring_flag(ring);
struct i915_vma *vma;
uint32_t flush_domains = 0;
bool flush_chipset = false;
@@ -630,9 +637,11 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_object *obj = vma->obj;
- ret = i915_gem_object_sync(obj, ring);
- if (ret)
- return ret;
+ if (obj->active & other_rings) {
+ ret = i915_gem_object_sync(obj, ring);
+ if (ret)
+ return ret;
+ }
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
flush_chipset |= i915_gem_clflush_object(obj, false);
@@ -673,7 +682,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
{
struct intel_engine_cs *ring = ringbuf->ring;
struct drm_i915_gem_request *request;
- int ret, new_space;
+ unsigned space;
+ int ret;
if (intel_ring_space(ringbuf) >= bytes)
return 0;
@@ -684,14 +694,13 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
* from multiple ringbuffers. Here, we must ignore any that
* aren't from the ringbuffer we're considering.
*/
- struct intel_context *ctx = request->ctx;
- if (ctx->engine[ring->id].ringbuf != ringbuf)
+ if (request->ringbuf != ringbuf)
continue;
/* Would completion of this request free enough space? */
- new_space = __intel_ring_space(request->postfix, ringbuf->tail,
- ringbuf->size);
- if (new_space >= bytes)
+ space = __intel_ring_space(request->postfix, ringbuf->tail,
+ ringbuf->size);
+ if (space >= bytes)
break;
}
@@ -702,11 +711,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
if (ret)
return ret;
- i915_gem_retire_requests_ring(ring);
-
- WARN_ON(intel_ring_space(ringbuf) < new_space);
-
- return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC;
+ ringbuf->space = space;
+ return 0;
}
/*
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 5fd2d5ac02e2..25c8ec697da1 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -228,7 +228,6 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
ret = i915_wait_request(overlay->last_flip_req);
if (ret)
return ret;
- i915_gem_retire_requests(dev);
i915_gem_request_assign(&overlay->last_flip_req, NULL);
return 0;
@@ -376,7 +375,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
ret = i915_wait_request(overlay->last_flip_req);
if (ret)
return ret;
- i915_gem_retire_requests(overlay->dev);
if (overlay->flip_tail)
overlay->flip_tail(overlay);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 7006f94b94c1..c5914564939c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -1946,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
int ilk_wm_max_level(const struct drm_device *dev)
{
/* how many WM levels are we expecting */
- if (IS_GEN9(dev))
+ if (INTEL_INFO(dev)->gen >= 9)
return 7;
else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
return 4;
@@ -2639,8 +2639,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
}
static unsigned int
-skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p)
+skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y)
{
+
+ /* for planar format */
+ if (p->y_bytes_per_pixel) {
+ if (y) /* y-plane data rate */
+ return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel;
+ else /* uv-plane data rate */
+ return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel;
+ }
+
+ /* for packed formats */
return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel;
}
@@ -2663,7 +2673,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc,
if (!p->enabled)
continue;
- total_data_rate += skl_plane_relative_data_rate(p);
+ total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */
+ if (p->y_bytes_per_pixel) {
+ total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */
+ }
}
return total_data_rate;
@@ -2682,6 +2695,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
struct skl_ddb_entry *alloc = &ddb->pipe[pipe];
uint16_t alloc_size, start, cursor_blocks;
uint16_t minimum[I915_MAX_PLANES];
+ uint16_t y_minimum[I915_MAX_PLANES];
unsigned int total_data_rate;
int plane;
@@ -2710,6 +2724,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
minimum[plane] = 8;
alloc_size -= minimum[plane];
+ y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0;
+ alloc_size -= y_minimum[plane];
}
/*
@@ -2723,16 +2739,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
start = alloc->start;
for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) {
const struct intel_plane_wm_parameters *p;
- unsigned int data_rate;
- uint16_t plane_blocks;
+ unsigned int data_rate, y_data_rate;
+ uint16_t plane_blocks, y_plane_blocks = 0;
p = &params->plane[plane];
if (!p->enabled)
continue;
- data_rate = skl_plane_relative_data_rate(p);
+ data_rate = skl_plane_relative_data_rate(p, 0);
/*
+ * allocation for (packed formats) or (uv-plane part of planar format):
* promote the expression to 64 bits to avoid overflowing, the
* result is < available as data_rate / total_data_rate < 1
*/
@@ -2744,6 +2761,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc,
ddb->plane[pipe][plane].end = start + plane_blocks;
start += plane_blocks;
+
+ /*
+ * allocation for y_plane part of planar format:
+ */
+ if (p->y_bytes_per_pixel) {
+ y_data_rate = skl_plane_relative_data_rate(p, 1);
+ y_plane_blocks = y_minimum[plane];
+ y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate,
+ total_data_rate);
+
+ ddb->y_plane[pipe][plane].start = start;
+ ddb->y_plane[pipe][plane].end = start + y_plane_blocks;
+
+ start += y_plane_blocks;
+ }
+
}
}
@@ -2856,13 +2889,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config);
fb = crtc->primary->state->fb;
+ /* For planar: Bpp is for uv plane, y_Bpp is for y plane */
if (fb) {
p->plane[0].enabled = true;
- p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8;
+ p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+ drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8;
+ p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ?
+ drm_format_plane_cpp(fb->pixel_format, 0) : 0;
p->plane[0].tiling = fb->modifier[0];
} else {
p->plane[0].enabled = false;
p->plane[0].bytes_per_pixel = 0;
+ p->plane[0].y_bytes_per_pixel = 0;
p->plane[0].tiling = DRM_FORMAT_MOD_NONE;
}
p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w;
@@ -2870,6 +2908,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc,
p->plane[0].rotation = crtc->primary->state->rotation;
fb = crtc->cursor->state->fb;
+ p->cursor.y_bytes_per_pixel = 0;
if (fb) {
p->cursor.enabled = true;
p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8;
@@ -2905,22 +2944,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
uint32_t plane_bytes_per_line, plane_blocks_per_line;
uint32_t res_blocks, res_lines;
uint32_t selected_result;
+ uint8_t bytes_per_pixel;
if (latency == 0 || !p->active || !p_params->enabled)
return false;
+ bytes_per_pixel = p_params->y_bytes_per_pixel ?
+ p_params->y_bytes_per_pixel :
+ p_params->bytes_per_pixel;
method1 = skl_wm_method1(p->pixel_rate,
- p_params->bytes_per_pixel,
+ bytes_per_pixel,
latency);
method2 = skl_wm_method2(p->pixel_rate,
p->pipe_htotal,
p_params->horiz_pixels,
- p_params->bytes_per_pixel,
+ bytes_per_pixel,
p_params->tiling,
latency);
- plane_bytes_per_line = p_params->horiz_pixels *
- p_params->bytes_per_pixel;
+ plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel;
plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
if (p_params->tiling == I915_FORMAT_MOD_Y_TILED ||
@@ -3137,10 +3179,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv,
new->plane_trans[pipe][i]);
I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]);
- for (i = 0; i < intel_num_planes(crtc); i++)
+ for (i = 0; i < intel_num_planes(crtc); i++) {
skl_ddb_entry_write(dev_priv,
PLANE_BUF_CFG(pipe, i),
&new->ddb.plane[pipe][i]);
+ skl_ddb_entry_write(dev_priv,
+ PLANE_NV12_BUF_CFG(pipe, i),
+ &new->ddb.y_plane[pipe][i]);
+ }
skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe),
&new->ddb.cursor[pipe]);
@@ -3298,6 +3344,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc,
return false;
intel_crtc->wm.skl_active = *pipe_wm;
+
return true;
}
@@ -3391,8 +3438,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc,
intel_plane->wm.scaled = scaled;
intel_plane->wm.horiz_pixels = sprite_width;
intel_plane->wm.vert_pixels = sprite_height;
- intel_plane->wm.bytes_per_pixel = pixel_size;
intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE;
+
+ /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */
+ intel_plane->wm.bytes_per_pixel =
+ (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+ drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size;
+ intel_plane->wm.y_bytes_per_pixel =
+ (fb && fb->pixel_format == DRM_FORMAT_NV12) ?
+ drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0;
+
/*
* Framebuffer can be NULL on plane disable, but it does not
* matter for watermarks if we assume no tiling in that case.
@@ -4042,51 +4097,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val)
trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
}
-/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down
+/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down
*
* * If Gfx is Idle, then
- * 1. Mask Turbo interrupts
- * 2. Bring up Gfx clock
- * 3. Change the freq to Rpn and wait till P-Unit updates freq
- * 4. Clear the Force GFX CLK ON bit so that Gfx can down
- * 5. Unmask Turbo interrupts
+ * 1. Forcewake Media well.
+ * 2. Request idle freq.
+ * 3. Release Forcewake of Media well.
*/
static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
{
- struct drm_device *dev = dev_priv->dev;
u32 val = dev_priv->rps.idle_freq;
- /* CHV and latest VLV don't need to force the gfx clock */
- if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) {
- valleyview_set_rps(dev_priv->dev, val);
- return;
- }
-
- /*
- * When we are idle. Drop to min voltage state.
- */
-
if (dev_priv->rps.cur_freq <= val)
return;
- /* Mask turbo interrupt so that they will not come in between */
- I915_WRITE(GEN6_PMINTRMSK,
- gen6_sanitize_rps_pm_mask(dev_priv, ~0));
-
- vlv_force_gfx_clock(dev_priv, true);
-
- dev_priv->rps.cur_freq = val;
-
- vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
-
- if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS))
- & GENFREQSTATUS) == 0, 100))
- DRM_ERROR("timed out waiting for Punit\n");
-
- gen6_set_rps_thresholds(dev_priv, val);
- vlv_force_gfx_clock(dev_priv, false);
-
- I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+ /* Wake up the media well, as that takes a lot less
+ * power than the Render well. */
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
+ valleyview_set_rps(dev_priv->dev, val);
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
}
void gen6_rps_busy(struct drm_i915_private *dev_priv)
@@ -4114,33 +4143,48 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
dev_priv->rps.last_adj = 0;
I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
}
+ mutex_unlock(&dev_priv->rps.hw_lock);
+ spin_lock(&dev_priv->rps.client_lock);
while (!list_empty(&dev_priv->rps.clients))
list_del_init(dev_priv->rps.clients.next);
- mutex_unlock(&dev_priv->rps.hw_lock);
+ spin_unlock(&dev_priv->rps.client_lock);
}
void gen6_rps_boost(struct drm_i915_private *dev_priv,
- struct drm_i915_file_private *file_priv)
+ struct intel_rps_client *rps,
+ unsigned long submitted)
{
- u32 val;
+ /* This is intentionally racy! We peek at the state here, then
+ * validate inside the RPS worker.
+ */
+ if (!(dev_priv->mm.busy &&
+ dev_priv->rps.enabled &&
+ dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit))
+ return;
- mutex_lock(&dev_priv->rps.hw_lock);
- val = dev_priv->rps.max_freq_softlimit;
- if (dev_priv->rps.enabled &&
- dev_priv->mm.busy &&
- dev_priv->rps.cur_freq < val &&
- (file_priv == NULL || list_empty(&file_priv->rps_boost))) {
- intel_set_rps(dev_priv->dev, val);
- dev_priv->rps.last_adj = 0;
+ /* Force a RPS boost (and don't count it against the client) if
+ * the GPU is severely congested.
+ */
+ if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES))
+ rps = NULL;
+
+ spin_lock(&dev_priv->rps.client_lock);
+ if (rps == NULL || list_empty(&rps->link)) {
+ spin_lock_irq(&dev_priv->irq_lock);
+ if (dev_priv->rps.interrupts_enabled) {
+ dev_priv->rps.client_boost = true;
+ queue_work(dev_priv->wq, &dev_priv->rps.work);
+ }
+ spin_unlock_irq(&dev_priv->irq_lock);
- if (file_priv != NULL) {
- list_add(&file_priv->rps_boost, &dev_priv->rps.clients);
- file_priv->rps_boosts++;
+ if (rps != NULL) {
+ list_add(&rps->link, &dev_priv->rps.clients);
+ rps->boosts++;
} else
dev_priv->rps.boosts++;
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+ spin_unlock(&dev_priv->rps.client_lock);
}
void intel_set_rps(struct drm_device *dev, u8 val)
@@ -4714,24 +4758,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv)
return rp1;
}
-static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv)
-{
- struct drm_device *dev = dev_priv->dev;
- u32 val, rpn;
-
- if (dev->pdev->revision >= 0x20) {
- val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE);
- rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) &
- FB_GFX_FREQ_FUSE_MASK);
- } else { /* For pre-production hardware */
- val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG);
- rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) &
- PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK);
- }
-
- return rpn;
-}
-
static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv)
{
u32 val, rp1;
@@ -4938,9 +4964,9 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
mutex_lock(&dev_priv->rps.hw_lock);
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
val = vlv_cck_read(dev_priv, CCK_FUSE_REG);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
switch ((val >> 2) & 0x7) {
case 0:
@@ -4983,7 +5009,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev)
intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq),
dev_priv->rps.rp1_freq);
- dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv);
+ /* PUnit validated range is only [RPe, RP0] */
+ dev_priv->rps.min_freq = dev_priv->rps.efficient_freq;
DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n",
intel_gpu_freq(dev_priv, dev_priv->rps.min_freq),
dev_priv->rps.min_freq);
@@ -5859,13 +5886,15 @@ static void ibx_init_clock_gating(struct drm_device *dev)
static void g4x_disable_trickle_feed(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
- int pipe;
+ enum pipe pipe;
for_each_pipe(dev_priv, pipe) {
I915_WRITE(DSPCNTR(pipe),
I915_READ(DSPCNTR(pipe)) |
DISPPLANE_TRICKLE_FEED_DISABLE);
- intel_flush_primary_plane(dev_priv, pipe);
+
+ I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe)));
+ POSTING_READ(DSPSURF(pipe));
}
}
@@ -6155,10 +6184,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
enum pipe pipe;
+ uint32_t misccpctl;
- I915_WRITE(WM3_LP_ILK, 0);
- I915_WRITE(WM2_LP_ILK, 0);
- I915_WRITE(WM1_LP_ILK, 0);
+ ilk_init_lp_watermarks(dev);
/* WaSwitchSolVfFArbitrationPriority:bdw */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
@@ -6187,6 +6215,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev)
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+ /*
+ * WaProgramL3SqcReg1Default:bdw
+ * WaTempDisableDOPClkGating:bdw
+ */
+ misccpctl = I915_READ(GEN7_MISCCPCTL);
+ I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE);
+ I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
+ I915_WRITE(GEN7_MISCCPCTL, misccpctl);
+
+ /*
+ * WaGttCachingOffByDefault:bdw
+ * GTT cache may not work with big pages, so if those
+ * are ever enabled GTT cache may need to be disabled.
+ */
+ I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
+
lpt_init_clock_gating(dev);
}
@@ -6462,6 +6506,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev)
/* WaDisableSDEUnitClockGating:chv */
I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) |
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+
+ /*
+ * GTT cache may not work with big pages, so if those
+ * are ever enabled GTT cache may need to be disabled.
+ */
+ I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL);
}
static void g4x_init_clock_gating(struct drm_device *dev)
@@ -6830,34 +6880,39 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
struct request_boost {
struct work_struct work;
- struct drm_i915_gem_request *rq;
+ struct drm_i915_gem_request *req;
};
static void __intel_rps_boost_work(struct work_struct *work)
{
struct request_boost *boost = container_of(work, struct request_boost, work);
+ struct drm_i915_gem_request *req = boost->req;
- if (!i915_gem_request_completed(boost->rq, true))
- gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL);
+ if (!i915_gem_request_completed(req, true))
+ gen6_rps_boost(to_i915(req->ring->dev), NULL,
+ req->emitted_jiffies);
- i915_gem_request_unreference__unlocked(boost->rq);
+ i915_gem_request_unreference__unlocked(req);
kfree(boost);
}
void intel_queue_rps_boost_for_request(struct drm_device *dev,
- struct drm_i915_gem_request *rq)
+ struct drm_i915_gem_request *req)
{
struct request_boost *boost;
- if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+ if (req == NULL || INTEL_INFO(dev)->gen < 6)
+ return;
+
+ if (i915_gem_request_completed(req, true))
return;
boost = kmalloc(sizeof(*boost), GFP_ATOMIC);
if (boost == NULL)
return;
- i915_gem_request_reference(rq);
- boost->rq = rq;
+ i915_gem_request_reference(req);
+ boost->req = req;
INIT_WORK(&boost->work, __intel_rps_boost_work);
queue_work(to_i915(dev)->wq, &boost->work);
@@ -6868,10 +6923,13 @@ void intel_pm_setup(struct drm_device *dev)
struct drm_i915_private *dev_priv = dev->dev_private;
mutex_init(&dev_priv->rps.hw_lock);
+ spin_lock_init(&dev_priv->rps.client_lock);
INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
intel_gen6_powersave_work);
INIT_LIST_HEAD(&dev_priv->rps.clients);
+ INIT_LIST_HEAD(&dev_priv->rps.semaphores.link);
+ INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
dev_priv->pm.suspended = false;
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9b96ed7de9bb..d934f857394d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -853,9 +853,6 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
GEN6_WIZ_HASHING_MASK,
GEN6_WIZ_HASHING_16x4);
- /* WaProgramL3SqcReg1Default:bdw */
- WA_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT);
-
return 0;
}
@@ -918,6 +915,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ uint32_t tmp;
/* WaDisablePartialInstShootdown:skl,bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
@@ -961,15 +959,19 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
GEN9_CCS_TLB_PREFETCH_ENABLE);
- /*
- * FIXME: don't apply the following on BXT for stepping C. On BXT A0
- * the flag reads back as 0.
- */
- /* WaDisableMaskBasedCammingInRCC:sklC,bxtA */
- if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev))
+ /* WaDisableMaskBasedCammingInRCC:skl,bxt */
+ if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
+ (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
PIXEL_MASK_CAMMING_DISABLE);
+ /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
+ tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
+ if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
+ (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
+ tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
+ WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
+
return 0;
}
@@ -1060,10 +1062,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring)
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}
- /* WaForceContextSaveRestoreNonCoherent:bxt */
- WA_SET_BIT_MASKED(HDC_CHICKEN0,
- HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
-
return 0;
}
@@ -2102,15 +2100,16 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
struct drm_i915_gem_request *request;
- int ret, new_space;
+ unsigned space;
+ int ret;
if (intel_ring_space(ringbuf) >= n)
return 0;
list_for_each_entry(request, &ring->request_list, list) {
- new_space = __intel_ring_space(request->postfix, ringbuf->tail,
- ringbuf->size);
- if (new_space >= n)
+ space = __intel_ring_space(request->postfix, ringbuf->tail,
+ ringbuf->size);
+ if (space >= n)
break;
}
@@ -2121,10 +2120,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
if (ret)
return ret;
- i915_gem_retire_requests_ring(ring);
-
- WARN_ON(intel_ring_space(ringbuf) < new_space);
-
+ ringbuf->space = space;
return 0;
}
@@ -2168,10 +2164,14 @@ int intel_ring_idle(struct intel_engine_cs *ring)
return 0;
req = list_entry(ring->request_list.prev,
- struct drm_i915_gem_request,
- list);
-
- return i915_wait_request(req);
+ struct drm_i915_gem_request,
+ list);
+
+ /* Make sure we do not trigger any retires */
+ return __i915_wait_request(req,
+ atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
+ to_i915(ring->dev)->mm.interruptible,
+ NULL, NULL);
}
int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 317b9b43d1c1..1a45385f4d66 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -771,7 +771,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv,
vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl);
if (wait_for(COND, 100))
- DRM_ERROR("timout setting power well state %08x (%08x)\n",
+ DRM_ERROR("timeout setting power well state %08x (%08x)\n",
state,
vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL));
@@ -1029,7 +1029,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl);
if (wait_for(COND, 100))
- DRM_ERROR("timout setting power well state %08x (%08x)\n",
+ DRM_ERROR("timeout setting power well state %08x (%08x)\n",
state,
vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ));
@@ -1233,18 +1233,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
BIT(POWER_DOMAIN_AUX_C) | \
BIT(POWER_DOMAIN_INIT))
-#define CHV_PIPE_A_POWER_DOMAINS ( \
- BIT(POWER_DOMAIN_PIPE_A) | \
- BIT(POWER_DOMAIN_INIT))
-
-#define CHV_PIPE_B_POWER_DOMAINS ( \
- BIT(POWER_DOMAIN_PIPE_B) | \
- BIT(POWER_DOMAIN_INIT))
-
-#define CHV_PIPE_C_POWER_DOMAINS ( \
- BIT(POWER_DOMAIN_PIPE_C) | \
- BIT(POWER_DOMAIN_INIT))
-
#define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \
BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \
BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \
@@ -1260,17 +1248,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
BIT(POWER_DOMAIN_AUX_D) | \
BIT(POWER_DOMAIN_INIT))
-#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \
- BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \
- BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \
- BIT(POWER_DOMAIN_AUX_D) | \
- BIT(POWER_DOMAIN_INIT))
-
-#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \
- BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \
- BIT(POWER_DOMAIN_AUX_D) | \
- BIT(POWER_DOMAIN_INIT))
-
static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
.sync_hw = i9xx_always_on_power_well_noop,
.enable = i9xx_always_on_power_well_noop,
@@ -1428,40 +1405,17 @@ static struct i915_power_well chv_power_wells[] = {
.domains = VLV_ALWAYS_ON_POWER_DOMAINS,
.ops = &i9xx_always_on_power_well_ops,
},
-#if 0
{
.name = "display",
- .domains = VLV_DISPLAY_POWER_DOMAINS,
- .data = PUNIT_POWER_WELL_DISP2D,
- .ops = &vlv_display_power_well_ops,
- },
-#endif
- {
- .name = "pipe-a",
/*
- * FIXME: pipe A power well seems to be the new disp2d well.
- * At least all registers seem to be housed there. Figure
- * out if this a a temporary situation in pre-production
- * hardware or a permanent state of affairs.
+ * Pipe A power well is the new disp2d well. Pipe B and C
+ * power wells don't actually exist. Pipe A power well is
+ * required for any pipe to work.
*/
- .domains = CHV_PIPE_A_POWER_DOMAINS | VLV_DISPLAY_POWER_DOMAINS,
+ .domains = VLV_DISPLAY_POWER_DOMAINS,
.data = PIPE_A,
.ops = &chv_pipe_power_well_ops,
},
-#if 0
- {
- .name = "pipe-b",
- .domains = CHV_PIPE_B_POWER_DOMAINS,
- .data = PIPE_B,
- .ops = &chv_pipe_power_well_ops,
- },
- {
- .name = "pipe-c",
- .domains = CHV_PIPE_C_POWER_DOMAINS,
- .data = PIPE_C,
- .ops = &chv_pipe_power_well_ops,
- },
-#endif
{
.name = "dpio-common-bc",
.domains = CHV_DPIO_CMN_BC_POWER_DOMAINS,
@@ -1474,50 +1428,6 @@ static struct i915_power_well chv_power_wells[] = {
.data = PUNIT_POWER_WELL_DPIO_CMN_D,
.ops = &chv_dpio_cmn_power_well_ops,
},
-#if 0
- {
- .name = "dpio-tx-b-01",
- .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
- VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
- .ops = &vlv_dpio_power_well_ops,
- .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01,
- },
- {
- .name = "dpio-tx-b-23",
- .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
- VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
- .ops = &vlv_dpio_power_well_ops,
- .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23,
- },
- {
- .name = "dpio-tx-c-01",
- .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
- VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
- .ops = &vlv_dpio_power_well_ops,
- .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01,
- },
- {
- .name = "dpio-tx-c-23",
- .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
- VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
- .ops = &vlv_dpio_power_well_ops,
- .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
- },
- {
- .name = "dpio-tx-d-01",
- .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
- CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
- .ops = &vlv_dpio_power_well_ops,
- .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01,
- },
- {
- .name = "dpio-tx-d-23",
- .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
- CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
- .ops = &vlv_dpio_power_well_ops,
- .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23,
- },
-#endif
};
static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv,
@@ -1724,6 +1634,8 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv)
* value.
*/
dev_priv->chv_phy_control =
+ PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY0) |
+ PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY1) |
PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH0) |
PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH1) |
PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY1, DPIO_CH0);
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 0a0625761f42..d24ef75596a1 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -243,6 +243,14 @@ static void intel_sdvo_write_sdvox(struct intel_sdvo *intel_sdvo, u32 val)
if (intel_sdvo->sdvo_reg == PCH_SDVOB) {
I915_WRITE(intel_sdvo->sdvo_reg, val);
POSTING_READ(intel_sdvo->sdvo_reg);
+ /*
+ * HW workaround, need to write this twice for issue
+ * that may result in first write getting masked.
+ */
+ if (HAS_PCH_IBX(dev)) {
+ I915_WRITE(intel_sdvo->sdvo_reg, val);
+ POSTING_READ(intel_sdvo->sdvo_reg);
+ }
return;
}
@@ -1429,6 +1437,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder)
{
struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
+ struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
u32 temp;
intel_sdvo_set_active_outputs(intel_sdvo, 0);
@@ -1437,35 +1446,34 @@ static void intel_disable_sdvo(struct intel_encoder *encoder)
DRM_MODE_DPMS_OFF);
temp = I915_READ(intel_sdvo->sdvo_reg);
- if ((temp & SDVO_ENABLE) != 0) {
- /* HW workaround for IBX, we need to move the port to
- * transcoder A before disabling it. */
- if (HAS_PCH_IBX(encoder->base.dev)) {
- struct drm_crtc *crtc = encoder->base.crtc;
- int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1;
-
- if (temp & SDVO_PIPE_B_SELECT) {
- temp &= ~SDVO_PIPE_B_SELECT;
- I915_WRITE(intel_sdvo->sdvo_reg, temp);
- POSTING_READ(intel_sdvo->sdvo_reg);
-
- /* Again we need to write this twice. */
- I915_WRITE(intel_sdvo->sdvo_reg, temp);
- POSTING_READ(intel_sdvo->sdvo_reg);
-
- /* Transcoder selection bits only update
- * effectively on vblank. */
- if (crtc)
- intel_wait_for_vblank(encoder->base.dev, pipe);
- else
- msleep(50);
- }
- }
- intel_sdvo_write_sdvox(intel_sdvo, temp & ~SDVO_ENABLE);
+ temp &= ~SDVO_ENABLE;
+ intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+ /*
+ * HW workaround for IBX, we need to move the port
+ * to transcoder A after disabling it to allow the
+ * matching DP port to be enabled on transcoder A.
+ */
+ if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) {
+ temp &= ~SDVO_PIPE_B_SELECT;
+ temp |= SDVO_ENABLE;
+ intel_sdvo_write_sdvox(intel_sdvo, temp);
+
+ temp &= ~SDVO_ENABLE;
+ intel_sdvo_write_sdvox(intel_sdvo, temp);
}
}
+static void pch_disable_sdvo(struct intel_encoder *encoder)
+{
+}
+
+static void pch_post_disable_sdvo(struct intel_encoder *encoder)
+{
+ intel_disable_sdvo(encoder);
+}
+
static void intel_enable_sdvo(struct intel_encoder *encoder)
{
struct drm_device *dev = encoder->base.dev;
@@ -1478,14 +1486,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder)
bool success;
temp = I915_READ(intel_sdvo->sdvo_reg);
- if ((temp & SDVO_ENABLE) == 0) {
- /* HW workaround for IBX, we need to move the port
- * to transcoder A before disabling it, so restore it here. */
- if (HAS_PCH_IBX(dev))
- temp |= SDVO_PIPE_SEL(intel_crtc->pipe);
+ temp |= SDVO_ENABLE;
+ intel_sdvo_write_sdvox(intel_sdvo, temp);
- intel_sdvo_write_sdvox(intel_sdvo, temp | SDVO_ENABLE);
- }
for (i = 0; i < 2; i++)
intel_wait_for_vblank(dev, intel_crtc->pipe);
@@ -2988,7 +2991,12 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob)
}
intel_encoder->compute_config = intel_sdvo_compute_config;
- intel_encoder->disable = intel_disable_sdvo;
+ if (HAS_PCH_SPLIT(dev)) {
+ intel_encoder->disable = pch_disable_sdvo;
+ intel_encoder->post_disable = pch_post_disable_sdvo;
+ } else {
+ intel_encoder->disable = intel_disable_sdvo;
+ }
intel_encoder->pre_enable = intel_sdvo_pre_enable;
intel_encoder->enable = intel_enable_sdvo;
intel_encoder->get_hw_state = intel_sdvo_get_hw_state;
diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c
index 693ce8281970..8831fc579ade 100644
--- a/drivers/gpu/drm/i915/intel_sideband.c
+++ b/drivers/gpu/drm/i915/intel_sideband.c
@@ -49,7 +49,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn,
(port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) |
(bar << IOSF_BAR_SHIFT);
- WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
if (wait_for((I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) == 0, 5)) {
DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n",
@@ -81,10 +81,10 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr)
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
SB_CRRDDA_NP, addr, &val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
return val;
}
@@ -93,10 +93,10 @@ void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val)
{
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT,
SB_CRWRDA_NP, addr, &val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
}
u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg)
@@ -121,10 +121,10 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr)
WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
- mutex_lock(&dev_priv->dpio_lock);
+ mutex_lock(&dev_priv->sb_lock);
vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC,
SB_CRRDDA_NP, addr, &val);
- mutex_unlock(&dev_priv->dpio_lock);
+ mutex_unlock(&dev_priv->sb_lock);
return val;
}
@@ -213,7 +213,7 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg,
enum intel_sbi_destination destination)
{
u32 value = 0;
- WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0,
100)) {
@@ -243,7 +243,7 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value,
{
u32 tmp;
- WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock));
+ WARN_ON(!mutex_is_locked(&dev_priv->sb_lock));
if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0,
100)) {
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index f215e223aa4a..8193a35388d7 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -229,8 +229,8 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc,
if (intel_rotation_90_or_270(rotation)) {
/* stride: Surface height in tiles */
- tile_height = intel_tile_height(dev, fb->bits_per_pixel,
- fb->modifier[0]);
+ tile_height = intel_tile_height(dev, fb->pixel_format,
+ fb->modifier[0]);
stride = DIV_ROUND_UP(fb->height, tile_height);
plane_size = (src_w << 16) | src_h;
x_offset = stride * tile_height - y - (src_h + 1);
@@ -282,7 +282,6 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc, bool force)
I915_WRITE(PLANE_CTL(pipe, plane), 0);
- /* Activate double buffered register update */
I915_WRITE(PLANE_SURF(pipe, plane), 0);
POSTING_READ(PLANE_SURF(pipe, plane));
@@ -339,7 +338,6 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
struct drm_device *dev = dplane->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_plane *intel_plane = to_intel_plane(dplane);
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
int pipe = intel_plane->pipe;
int plane = intel_plane->plane;
@@ -453,8 +451,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
I915_WRITE(SPCNTR(pipe, plane), sprctl);
I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
sprsurf_offset);
-
- intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+ POSTING_READ(SPSURF(pipe, plane));
}
static void
@@ -463,21 +460,17 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc, bool force)
struct drm_device *dev = dplane->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_plane *intel_plane = to_intel_plane(dplane);
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_plane->pipe;
int plane = intel_plane->plane;
I915_WRITE(SPCNTR(pipe, plane), 0);
- /* Activate double buffered register update */
I915_WRITE(SPSURF(pipe, plane), 0);
-
- intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+ POSTING_READ(SPSURF(pipe, plane));
intel_update_sprite_watermarks(dplane, crtc, 0, 0, 0, false, false);
}
-
static void
ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
struct drm_framebuffer *fb,
@@ -489,7 +482,6 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
struct drm_device *dev = plane->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_plane *intel_plane = to_intel_plane(plane);
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
enum pipe pipe = intel_plane->pipe;
u32 sprctl, sprscale = 0;
@@ -599,8 +591,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
I915_WRITE(SPRCTL(pipe), sprctl);
I915_WRITE(SPRSURF(pipe),
i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
-
- intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+ POSTING_READ(SPRSURF(pipe));
}
static void
@@ -609,17 +600,15 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc, bool force)
struct drm_device *dev = plane->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_plane *intel_plane = to_intel_plane(plane);
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_plane->pipe;
I915_WRITE(SPRCTL(pipe), I915_READ(SPRCTL(pipe)) & ~SPRITE_ENABLE);
/* Can't leave the scaler enabled... */
if (intel_plane->can_scale)
I915_WRITE(SPRSCALE(pipe), 0);
- /* Activate double buffered register update */
- I915_WRITE(SPRSURF(pipe), 0);
- intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+ I915_WRITE(SPRSURF(pipe), 0);
+ POSTING_READ(SPRSURF(pipe));
}
static void
@@ -633,7 +622,6 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
struct drm_device *dev = plane->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_plane *intel_plane = to_intel_plane(plane);
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
struct drm_i915_gem_object *obj = intel_fb_obj(fb);
int pipe = intel_plane->pipe;
unsigned long dvssurf_offset, linear_offset;
@@ -730,8 +718,7 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
I915_WRITE(DVSCNTR(pipe), dvscntr);
I915_WRITE(DVSSURF(pipe),
i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
-
- intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+ POSTING_READ(DVSSURF(pipe));
}
static void
@@ -740,17 +727,14 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc, bool force)
struct drm_device *dev = plane->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_plane *intel_plane = to_intel_plane(plane);
- struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
int pipe = intel_plane->pipe;
I915_WRITE(DVSCNTR(pipe), 0);
/* Disable the scaler */
I915_WRITE(DVSSCALE(pipe), 0);
- /* Flush double buffered register updates */
I915_WRITE(DVSSURF(pipe), 0);
-
- intel_flush_primary_plane(dev_priv, intel_crtc->plane);
+ POSTING_READ(DVSSURF(pipe));
}
static int
@@ -770,6 +754,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
const struct drm_rect *clip = &state->clip;
int hscale, vscale;
int max_scale, min_scale;
+ bool can_scale;
int pixel_size;
int ret;
@@ -794,18 +779,29 @@ intel_check_sprite_plane(struct drm_plane *plane,
return -EINVAL;
}
+ /* setup can_scale, min_scale, max_scale */
+ if (INTEL_INFO(dev)->gen >= 9) {
+ /* use scaler when colorkey is not required */
+ if (intel_plane->ckey.flags == I915_SET_COLORKEY_NONE) {
+ can_scale = 1;
+ min_scale = 1;
+ max_scale = skl_max_scale(intel_crtc, crtc_state);
+ } else {
+ can_scale = 0;
+ min_scale = DRM_PLANE_HELPER_NO_SCALING;
+ max_scale = DRM_PLANE_HELPER_NO_SCALING;
+ }
+ } else {
+ can_scale = intel_plane->can_scale;
+ max_scale = intel_plane->max_downscale << 16;
+ min_scale = intel_plane->can_scale ? 1 : (1 << 16);
+ }
+
/*
* FIXME the following code does a bunch of fuzzy adjustments to the
* coordinates and sizes. We probably need some way to decide whether
* more strict checking should be done instead.
*/
- max_scale = intel_plane->max_downscale << 16;
- min_scale = intel_plane->can_scale ? 1 : (1 << 16);
-
- if (INTEL_INFO(dev)->gen >= 9) {
- min_scale = 1;
- max_scale = skl_max_scale(intel_crtc, crtc_state);
- }
drm_rect_rotate(src, fb->width << 16, fb->height << 16,
state->base.rotation);
@@ -876,7 +872,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
* Must keep src and dst the
* same if we can't scale.
*/
- if (!intel_plane->can_scale)
+ if (!can_scale)
crtc_w &= ~1;
if (crtc_w == 0)
@@ -888,7 +884,7 @@ intel_check_sprite_plane(struct drm_plane *plane,
if (state->visible && (src_w != crtc_w || src_h != crtc_h)) {
unsigned int width_bytes;
- WARN_ON(!intel_plane->can_scale);
+ WARN_ON(!can_scale);
/* FIXME interlacing min height is 6 */
@@ -1052,7 +1048,7 @@ int intel_plane_restore(struct drm_plane *plane)
plane->state->src_w, plane->state->src_h);
}
-static uint32_t ilk_plane_formats[] = {
+static const uint32_t ilk_plane_formats[] = {
DRM_FORMAT_XRGB8888,
DRM_FORMAT_YUYV,
DRM_FORMAT_YVYU,
@@ -1060,7 +1056,7 @@ static uint32_t ilk_plane_formats[] = {
DRM_FORMAT_VYUY,
};
-static uint32_t snb_plane_formats[] = {
+static const uint32_t snb_plane_formats[] = {
DRM_FORMAT_XBGR8888,
DRM_FORMAT_XRGB8888,
DRM_FORMAT_YUYV,
@@ -1069,7 +1065,7 @@ static uint32_t snb_plane_formats[] = {
DRM_FORMAT_VYUY,
};
-static uint32_t vlv_plane_formats[] = {
+static const uint32_t vlv_plane_formats[] = {
DRM_FORMAT_RGB565,
DRM_FORMAT_ABGR8888,
DRM_FORMAT_ARGB8888,
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 3e3290c203c6..ed173d30f1c0 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -253,7 +253,7 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector)
#define DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_LEVEL_3
#define DP_PRE_EMPHASIS_MAX DP_TRAIN_PRE_EMPH_LEVEL_3
-static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
+static void dp_get_adjust_train(const u8 link_status[DP_LINK_STATUS_SIZE],
int lane_count,
u8 train_set[4])
{
@@ -311,7 +311,7 @@ static int dp_get_max_dp_pix_clock(int link_rate,
/***** radeon specific DP functions *****/
int radeon_dp_get_max_link_rate(struct drm_connector *connector,
- u8 dpcd[DP_DPCD_SIZE])
+ const u8 dpcd[DP_DPCD_SIZE])
{
int max_link_rate;
@@ -328,7 +328,7 @@ int radeon_dp_get_max_link_rate(struct drm_connector *connector,
* if the max lane# < low rate lane# then use max lane# instead.
*/
static int radeon_dp_get_dp_lane_number(struct drm_connector *connector,
- u8 dpcd[DP_DPCD_SIZE],
+ const u8 dpcd[DP_DPCD_SIZE],
int pix_clock)
{
int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
@@ -347,7 +347,7 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector,
}
static int radeon_dp_get_dp_link_clock(struct drm_connector *connector,
- u8 dpcd[DP_DPCD_SIZE],
+ const u8 dpcd[DP_DPCD_SIZE],
int pix_clock)
{
int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector));
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index a0c35bbc8546..d3a22f212948 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -174,6 +174,31 @@ int cik_get_allowed_info_register(struct radeon_device *rdev,
}
}
+/*
+ * Indirect registers accessor
+ */
+u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&rdev->didt_idx_lock, flags);
+ WREG32(CIK_DIDT_IND_INDEX, (reg));
+ r = RREG32(CIK_DIDT_IND_DATA);
+ spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
+ return r;
+}
+
+void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->didt_idx_lock, flags);
+ WREG32(CIK_DIDT_IND_INDEX, (reg));
+ WREG32(CIK_DIDT_IND_DATA, (v));
+ spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
+}
+
/* get temperature in millidegrees */
int ci_get_temp(struct radeon_device *rdev)
{
diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h
index 0ec5d53eb6fb..4e883fdc59d8 100644
--- a/drivers/gpu/drm/radeon/cik_reg.h
+++ b/drivers/gpu/drm/radeon/cik_reg.h
@@ -149,10 +149,30 @@
#define KFD_CIK_SDMA_QUEUE_OFFSET 0x200
+#define SQ_IND_INDEX 0x8DE0
+#define SQ_CMD 0x8DEC
+#define SQ_IND_DATA 0x8DE4
+
+/*
+ * The TCP_WATCHx_xxxx addresses that are shown here are in dwords,
+ * and that's why they are multiplied by 4
+ */
+#define TCP_WATCH0_ADDR_H (0x32A0*4)
+#define TCP_WATCH1_ADDR_H (0x32A3*4)
+#define TCP_WATCH2_ADDR_H (0x32A6*4)
+#define TCP_WATCH3_ADDR_H (0x32A9*4)
+#define TCP_WATCH0_ADDR_L (0x32A1*4)
+#define TCP_WATCH1_ADDR_L (0x32A4*4)
+#define TCP_WATCH2_ADDR_L (0x32A7*4)
+#define TCP_WATCH3_ADDR_L (0x32AA*4)
+#define TCP_WATCH0_CNTL (0x32A2*4)
+#define TCP_WATCH1_CNTL (0x32A5*4)
+#define TCP_WATCH2_CNTL (0x32A8*4)
+#define TCP_WATCH3_CNTL (0x32AB*4)
+
#define CPC_INT_CNTL 0xC2D0
#define CP_HQD_IQ_RPTR 0xC970u
-#define AQL_ENABLE (1U << 0)
#define SDMA0_RLC0_RB_CNTL 0xD400u
#define SDMA_RB_VMID(x) (x << 24)
#define SDMA0_RLC0_RB_BASE 0xD404u
@@ -186,4 +206,38 @@
#define SDMA0_CNTL 0xD010
#define SDMA1_CNTL 0xD810
+enum {
+ MAX_TRAPID = 8, /* 3 bits in the bitfield. */
+ MAX_WATCH_ADDRESSES = 4
+};
+
+enum {
+ ADDRESS_WATCH_REG_ADDR_HI = 0,
+ ADDRESS_WATCH_REG_ADDR_LO,
+ ADDRESS_WATCH_REG_CNTL,
+ ADDRESS_WATCH_REG_MAX
+};
+
+enum { /* not defined in the CI/KV reg file */
+ ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
+ ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
+ ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
+ /* extend the mask to 26 bits in order to match the low address field */
+ ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
+ ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
+};
+
+union TCP_WATCH_CNTL_BITS {
+ struct {
+ uint32_t mask:24;
+ uint32_t vmid:4;
+ uint32_t atc:1;
+ uint32_t mode:2;
+ uint32_t valid:1;
+ } bitfields, bits;
+ uint32_t u32All;
+ signed int i32All;
+ float f32All;
+};
+
#endif
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index b33ba3b0808b..391ff9d5d706 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -2148,9 +2148,12 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
-#define ATC_VMID0_PASID_MAPPING 0x339Cu
-#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u
-#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
+#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u
+#define ATC_VMID0_PASID_MAPPING 0x339Cu
+#define ATC_VMID_PASID_MAPPING_PASID_MASK (0xFFFF)
+#define ATC_VMID_PASID_MAPPING_PASID_SHIFT 0
+#define ATC_VMID_PASID_MAPPING_VALID_MASK (0x1 << 31)
+#define ATC_VMID_PASID_MAPPING_VALID_SHIFT 31
#define ATC_VM_APERTURE0_CNTL 0x3310u
#define ATS_ACCESS_MODE_NEVER 0
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 05e6d6ef5963..5397bed26b86 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -35,6 +35,75 @@
#include "evergreen_blit_shaders.h"
#include "radeon_ucode.h"
+/*
+ * Indirect registers accessor
+ */
+u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&rdev->cg_idx_lock, flags);
+ WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
+ r = RREG32(EVERGREEN_CG_IND_DATA);
+ spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
+ return r;
+}
+
+void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->cg_idx_lock, flags);
+ WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
+ WREG32(EVERGREEN_CG_IND_DATA, (v));
+ spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
+}
+
+u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+ r = RREG32(EVERGREEN_PIF_PHY0_DATA);
+ spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+ return r;
+}
+
+void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
+ WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
+ spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+}
+
+u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+ r = RREG32(EVERGREEN_PIF_PHY1_DATA);
+ spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+ return r;
+}
+
+void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->pif_idx_lock, flags);
+ WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
+ WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
+ spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
+}
+
static const u32 crtc_offsets[6] =
{
EVERGREEN_CRTC0_REGISTER_OFFSET,
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index aba2f428c0a8..75977d7e177e 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -36,6 +36,31 @@
#include "radeon_ucode.h"
#include "clearstate_cayman.h"
+/*
+ * Indirect registers accessor
+ */
+u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&rdev->smc_idx_lock, flags);
+ WREG32(TN_SMC_IND_INDEX_0, (reg));
+ r = RREG32(TN_SMC_IND_DATA_0);
+ spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
+ return r;
+}
+
+void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->smc_idx_lock, flags);
+ WREG32(TN_SMC_IND_INDEX_0, (reg));
+ WREG32(TN_SMC_IND_DATA_0, (v));
+ spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
+}
+
static const u32 tn_rlc_save_restore_register_list[] =
{
0x98fc,
@@ -2041,6 +2066,25 @@ static int cayman_startup(struct radeon_device *rdev)
if (r)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+ if (rdev->family == CHIP_ARUBA) {
+ r = radeon_vce_resume(rdev);
+ if (!r)
+ r = vce_v1_0_resume(rdev);
+
+ if (!r)
+ r = radeon_fence_driver_start_ring(rdev,
+ TN_RING_TYPE_VCE1_INDEX);
+ if (!r)
+ r = radeon_fence_driver_start_ring(rdev,
+ TN_RING_TYPE_VCE2_INDEX);
+
+ if (r) {
+ dev_err(rdev->dev, "VCE init error (%d).\n", r);
+ rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
+ rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
+ }
+ }
+
r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
if (r) {
dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@@ -2118,6 +2162,19 @@ static int cayman_startup(struct radeon_device *rdev)
DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
}
+ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+ if (ring->ring_size)
+ r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+
+ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+ if (ring->ring_size)
+ r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0);
+
+ if (!r)
+ r = vce_v1_0_init(rdev);
+ else if (r != -ENOENT)
+ DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2273,6 +2330,19 @@ int cayman_init(struct radeon_device *rdev)
r600_ring_init(rdev, ring, 4096);
}
+ if (rdev->family == CHIP_ARUBA) {
+ r = radeon_vce_init(rdev);
+ if (!r) {
+ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 4096);
+
+ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 4096);
+ }
+ }
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -2326,6 +2396,7 @@ void cayman_fini(struct radeon_device *rdev)
radeon_irq_kms_fini(rdev);
uvd_v1_0_fini(rdev);
radeon_uvd_fini(rdev);
+ radeon_vce_fini(rdev);
cayman_pcie_gart_fini(rdev);
r600_vram_scratch_fini(rdev);
radeon_gem_fini(rdev);
@@ -2554,3 +2625,34 @@ void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
radeon_ring_write(ring, 0x0);
}
+
+int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
+{
+ struct atom_clock_dividers dividers;
+ int r, i;
+
+ r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+ ecclk, false, &dividers);
+ if (r)
+ return r;
+
+ for (i = 0; i < 100; i++) {
+ if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS)
+ break;
+ mdelay(10);
+ }
+ if (i == 100)
+ return -ETIMEDOUT;
+
+ WREG32_P(CG_ECLK_CNTL, dividers.post_div, ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK));
+
+ for (i = 0; i < 100; i++) {
+ if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS)
+ break;
+ mdelay(10);
+ }
+ if (i == 100)
+ return -ETIMEDOUT;
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 3b290838918c..47eb49b77d32 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -46,6 +46,13 @@
#define DMIF_ADDR_CONFIG 0xBD4
+/* fusion vce clocks */
+#define CG_ECLK_CNTL 0x620
+# define ECLK_DIVIDER_MASK 0x7f
+# define ECLK_DIR_CNTL_EN (1 << 8)
+#define CG_ECLK_STATUS 0x624
+# define ECLK_STATUS (1 << 0)
+
/* DCE6 only */
#define DMIF_ADDR_CALC 0xC00
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 04f2514f7564..238b13f045c1 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4090,6 +4090,28 @@ int r100_init(struct radeon_device *rdev)
return 0;
}
+uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg)
+{
+ unsigned long flags;
+ uint32_t ret;
+
+ spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
+ writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+ ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+ return ret;
+}
+
+void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
+ writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+ writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
+}
+
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
{
if (reg < rdev->rio_mem_size)
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 08d68f3e13e9..718b12b03b57 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -50,6 +50,31 @@
*/
/*
+ * Indirect registers accessor
+ */
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+ unsigned long flags;
+ uint32_t r;
+
+ spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
+ WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+ r = RREG32(RADEON_PCIE_DATA);
+ spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
+ return r;
+}
+
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
+ WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
+ WREG32(RADEON_PCIE_DATA, (v));
+ spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
+}
+
+/*
* rv370,rv380 PCIE GART
*/
static int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 25b4ac967742..d0ff93256bb0 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -108,6 +108,53 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev);
extern int evergreen_rlc_resume(struct radeon_device *rdev);
extern void rv770_set_clk_bypass_mode(struct radeon_device *rdev);
+/*
+ * Indirect registers accessor
+ */
+u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
+ WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
+ r = RREG32(R600_RCU_DATA);
+ spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
+ return r;
+}
+
+void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
+ WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
+ WREG32(R600_RCU_DATA, (v));
+ spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
+}
+
+u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
+{
+ unsigned long flags;
+ u32 r;
+
+ spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
+ WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
+ r = RREG32(R600_UVD_CTX_DATA);
+ spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
+ return r;
+}
+
+void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
+ WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
+ WREG32(R600_UVD_CTX_DATA, (v));
+ spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
+}
+
/**
* r600_get_allowed_info_register - fetch the register for the info ioctl
*
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 46eb0fa75a61..4d2d0579fd49 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -467,7 +467,6 @@ struct radeon_bo_va {
/* protected by bo being reserved */
struct list_head bo_list;
uint32_t flags;
- uint64_t addr;
struct radeon_fence *last_pt_update;
unsigned ref_count;
@@ -719,7 +718,7 @@ struct radeon_doorbell {
resource_size_t size;
u32 __iomem *ptr;
u32 num_doorbells; /* Number of doorbells actually reserved for radeon. */
- unsigned long used[DIV_ROUND_UP(RADEON_MAX_DOORBELLS, BITS_PER_LONG)];
+ DECLARE_BITMAP(used, RADEON_MAX_DOORBELLS);
};
int radeon_doorbell_get(struct radeon_device *rdev, u32 *page);
@@ -941,6 +940,9 @@ struct radeon_vm {
/* BOs freed, but not yet updated in the PT */
struct list_head freed;
+ /* BOs cleared in the PT */
+ struct list_head cleared;
+
/* contains the page directory */
struct radeon_bo *page_directory;
unsigned max_pde_used;
@@ -1709,8 +1711,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
* VCE
*/
#define RADEON_MAX_VCE_HANDLES 16
-#define RADEON_VCE_STACK_SIZE (1024*1024)
-#define RADEON_VCE_HEAP_SIZE (4*1024*1024)
struct radeon_vce {
struct radeon_bo *vcpu_bo;
@@ -1721,6 +1721,7 @@ struct radeon_vce {
struct drm_file *filp[RADEON_MAX_VCE_HANDLES];
unsigned img_size[RADEON_MAX_VCE_HANDLES];
struct delayed_work idle_work;
+ uint32_t keyselect;
};
int radeon_vce_init(struct radeon_device *rdev);
@@ -2435,6 +2436,7 @@ struct radeon_device {
atomic64_t vram_usage;
atomic64_t gtt_usage;
atomic64_t num_bytes_moved;
+ atomic_t gpu_reset_counter;
/* ACPI interface */
struct radeon_atif atif;
struct radeon_atcs atcs;
@@ -2472,38 +2474,24 @@ int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
#define RADEON_MIN_MMIO_SIZE 0x10000
+uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v);
static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg,
bool always_indirect)
{
/* The mmio size is 64kb at minimum. Allows the if to be optimized out. */
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
return readl(((void __iomem *)rdev->rmmio) + reg);
- else {
- unsigned long flags;
- uint32_t ret;
-
- spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
- writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
- ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
- spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
-
- return ret;
- }
+ else
+ return r100_mm_rreg_slow(rdev, reg);
}
-
static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v,
bool always_indirect)
{
if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect)
writel(v, ((void __iomem *)rdev->rmmio) + reg);
- else {
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
- writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
- writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
- spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);
- }
+ else
+ r100_mm_wreg_slow(rdev, reg, v);
}
u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
@@ -2579,6 +2567,13 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f)
tmp_ |= ((val) & ~(mask)); \
WREG32_PLL(reg, tmp_); \
} while (0)
+#define WREG32_SMC_P(reg, val, mask) \
+ do { \
+ uint32_t tmp_ = RREG32_SMC(reg); \
+ tmp_ &= (mask); \
+ tmp_ |= ((val) & ~(mask)); \
+ WREG32_SMC(reg, tmp_); \
+ } while (0)
#define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false))
#define RREG32_IO(reg) r100_io_rreg(rdev, (reg))
#define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v))
@@ -2587,184 +2582,29 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f)
#define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v))
/*
- * Indirect registers accessor
+ * Indirect registers accessors.
+ * They used to be inlined, but this increases code size by ~65 kbytes.
+ * Since each performs a pair of MMIO ops
+ * within a spin_lock_irqsave/spin_unlock_irqrestore region,
+ * the cost of call+ret is almost negligible. MMIO and locking
+ * costs several dozens of cycles each at best, call+ret is ~5 cycles.
*/
-static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg)
-{
- unsigned long flags;
- uint32_t r;
-
- spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
- WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
- r = RREG32(RADEON_PCIE_DATA);
- spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
- return r;
-}
-
-static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->pcie_idx_lock, flags);
- WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask));
- WREG32(RADEON_PCIE_DATA, (v));
- spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags);
-}
-
-static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&rdev->smc_idx_lock, flags);
- WREG32(TN_SMC_IND_INDEX_0, (reg));
- r = RREG32(TN_SMC_IND_DATA_0);
- spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
- return r;
-}
-
-static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->smc_idx_lock, flags);
- WREG32(TN_SMC_IND_INDEX_0, (reg));
- WREG32(TN_SMC_IND_DATA_0, (v));
- spin_unlock_irqrestore(&rdev->smc_idx_lock, flags);
-}
-
-static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
- WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
- r = RREG32(R600_RCU_DATA);
- spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
- return r;
-}
-
-static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->rcu_idx_lock, flags);
- WREG32(R600_RCU_INDEX, ((reg) & 0x1fff));
- WREG32(R600_RCU_DATA, (v));
- spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags);
-}
-
-static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&rdev->cg_idx_lock, flags);
- WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
- r = RREG32(EVERGREEN_CG_IND_DATA);
- spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
- return r;
-}
-
-static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->cg_idx_lock, flags);
- WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff));
- WREG32(EVERGREEN_CG_IND_DATA, (v));
- spin_unlock_irqrestore(&rdev->cg_idx_lock, flags);
-}
-
-static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&rdev->pif_idx_lock, flags);
- WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
- r = RREG32(EVERGREEN_PIF_PHY0_DATA);
- spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
- return r;
-}
-
-static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->pif_idx_lock, flags);
- WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff));
- WREG32(EVERGREEN_PIF_PHY0_DATA, (v));
- spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-}
-
-static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&rdev->pif_idx_lock, flags);
- WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
- r = RREG32(EVERGREEN_PIF_PHY1_DATA);
- spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
- return r;
-}
-
-static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->pif_idx_lock, flags);
- WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff));
- WREG32(EVERGREEN_PIF_PHY1_DATA, (v));
- spin_unlock_irqrestore(&rdev->pif_idx_lock, flags);
-}
-
-static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
- WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
- r = RREG32(R600_UVD_CTX_DATA);
- spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
- return r;
-}
-
-static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->uvd_idx_lock, flags);
- WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff));
- WREG32(R600_UVD_CTX_DATA, (v));
- spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags);
-}
-
-
-static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
-{
- unsigned long flags;
- u32 r;
-
- spin_lock_irqsave(&rdev->didt_idx_lock, flags);
- WREG32(CIK_DIDT_IND_INDEX, (reg));
- r = RREG32(CIK_DIDT_IND_DATA);
- spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
- return r;
-}
-
-static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&rdev->didt_idx_lock, flags);
- WREG32(CIK_DIDT_IND_INDEX, (reg));
- WREG32(CIK_DIDT_IND_DATA, (v));
- spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
-}
+uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg);
+void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg);
+void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg);
+void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg);
+void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg);
+void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg);
+void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg);
+void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v);
+u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg);
+void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v);
void r100_pll_errata_after_index(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 8dbf5083c4ff..f2421bc3e901 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1761,6 +1761,19 @@ static struct radeon_asic cayman_asic = {
},
};
+static struct radeon_asic_ring trinity_vce_ring = {
+ .ib_execute = &radeon_vce_ib_execute,
+ .emit_fence = &radeon_vce_fence_emit,
+ .emit_semaphore = &radeon_vce_semaphore_emit,
+ .cs_parse = &radeon_vce_cs_parse,
+ .ring_test = &radeon_vce_ring_test,
+ .ib_test = &radeon_vce_ib_test,
+ .is_lockup = &radeon_ring_test_lockup,
+ .get_rptr = &vce_v1_0_get_rptr,
+ .get_wptr = &vce_v1_0_get_wptr,
+ .set_wptr = &vce_v1_0_set_wptr,
+};
+
static struct radeon_asic trinity_asic = {
.init = &cayman_init,
.fini = &cayman_fini,
@@ -1794,6 +1807,8 @@ static struct radeon_asic trinity_asic = {
[R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring,
[CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring,
[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+ [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
+ [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
},
.irq = {
.set = &evergreen_irq_set,
@@ -1838,6 +1853,7 @@ static struct radeon_asic trinity_asic = {
.set_pcie_lanes = NULL,
.set_clock_gating = NULL,
.set_uvd_clocks = &sumo_set_uvd_clocks,
+ .set_vce_clocks = &tn_set_vce_clocks,
.get_temperature = &tn_get_temp,
},
.dpm = {
@@ -1929,6 +1945,8 @@ static struct radeon_asic si_asic = {
[R600_RING_TYPE_DMA_INDEX] = &si_dma_ring,
[CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring,
[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+ [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring,
+ [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring,
},
.irq = {
.set = &si_irq_set,
@@ -1973,6 +1991,7 @@ static struct radeon_asic si_asic = {
.set_pcie_lanes = &r600_set_pcie_lanes,
.set_clock_gating = NULL,
.set_uvd_clocks = &si_set_uvd_clocks,
+ .set_vce_clocks = &si_set_vce_clocks,
.get_temperature = &si_get_temp,
},
.dpm = {
@@ -2436,6 +2455,8 @@ int radeon_asic_init(struct radeon_device *rdev)
/* set num crtcs */
rdev->num_crtc = 4;
rdev->has_uvd = true;
+ rdev->cg_flags =
+ RADEON_CG_SUPPORT_VCE_MGCG;
break;
case CHIP_TAHITI:
case CHIP_PITCAIRN:
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index a3ca8cd305c5..e0aa33262eac 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -694,6 +694,7 @@ int trinity_dpm_force_performance_level(struct radeon_device *rdev,
void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable);
u32 trinity_dpm_get_current_sclk(struct radeon_device *rdev);
u32 trinity_dpm_get_current_mclk(struct radeon_device *rdev);
+int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk);
/* DCE6 - SI */
void dce6_bandwidth_update(struct radeon_device *rdev);
@@ -745,6 +746,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
u32 si_get_xclk(struct radeon_device *rdev);
uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev);
int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
+int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk);
int si_get_temp(struct radeon_device *rdev);
int si_get_allowed_info_register(struct radeon_device *rdev,
u32 reg, u32 *val);
@@ -970,10 +972,14 @@ uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev,
struct radeon_ring *ring);
void vce_v1_0_set_wptr(struct radeon_device *rdev,
struct radeon_ring *ring);
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data);
+unsigned vce_v1_0_bo_size(struct radeon_device *rdev);
+int vce_v1_0_resume(struct radeon_device *rdev);
int vce_v1_0_init(struct radeon_device *rdev);
int vce_v1_0_start(struct radeon_device *rdev);
/* vce v2.0 */
+unsigned vce_v2_0_bo_size(struct radeon_device *rdev);
int vce_v2_0_resume(struct radeon_device *rdev);
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index dcb779647c57..abbc154b1bff 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -242,6 +242,13 @@ static struct radeon_audio_funcs dce6_dp_funcs = {
.dpms = evergreen_dp_enable,
};
+static void radeon_audio_enable(struct radeon_device *rdev,
+ struct r600_audio_pin *pin, u8 enable_mask)
+{
+ if (rdev->audio.funcs->enable)
+ rdev->audio.funcs->enable(rdev, pin, enable_mask);
+}
+
static void radeon_audio_interface_init(struct radeon_device *rdev)
{
if (ASIC_IS_DCE6(rdev)) {
@@ -307,7 +314,7 @@ int radeon_audio_init(struct radeon_device *rdev)
/* disable audio. it will be set up later */
for (i = 0; i < rdev->audio.num_pins; i++)
- radeon_audio_enable(rdev, &rdev->audio.pin[i], false);
+ radeon_audio_enable(rdev, &rdev->audio.pin[i], 0);
return 0;
}
@@ -443,13 +450,6 @@ static void radeon_audio_select_pin(struct drm_encoder *encoder)
radeon_encoder->audio->select_pin(encoder);
}
-void radeon_audio_enable(struct radeon_device *rdev,
- struct r600_audio_pin *pin, u8 enable_mask)
-{
- if (rdev->audio.funcs->enable)
- rdev->audio.funcs->enable(rdev, pin, enable_mask);
-}
-
void radeon_audio_detect(struct drm_connector *connector,
enum drm_connector_status status)
{
@@ -505,7 +505,7 @@ void radeon_audio_fini(struct radeon_device *rdev)
return;
for (i = 0; i < rdev->audio.num_pins; i++)
- radeon_audio_enable(rdev, &rdev->audio.pin[i], false);
+ radeon_audio_enable(rdev, &rdev->audio.pin[i], 0);
rdev->audio.enabled = false;
}
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index c92d059ab204..8438304f7139 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -74,8 +74,6 @@ u32 radeon_audio_endpoint_rreg(struct radeon_device *rdev,
void radeon_audio_endpoint_wreg(struct radeon_device *rdev,
u32 offset, u32 reg, u32 v);
struct r600_audio_pin *radeon_audio_get_pin(struct drm_encoder *encoder);
-void radeon_audio_enable(struct radeon_device *rdev,
- struct r600_audio_pin *pin, u8 enable_mask);
void radeon_audio_fini(struct radeon_device *rdev);
void radeon_audio_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b7ca4c514621..13e207e0dff0 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1725,6 +1725,8 @@ int radeon_gpu_reset(struct radeon_device *rdev)
return 0;
}
+ atomic_inc(&rdev->gpu_reset_counter);
+
radeon_save_bios_scratch_regs(rdev);
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 7d620d4b3f31..5751446677d3 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -90,9 +90,10 @@
* CS to GPU on >= r600
* 2.41.0 - evergreen/cayman: Add SET_BASE/DRAW_INDIRECT command parsing support
* 2.42.0 - Add VCE/VUI (Video Usability Information) support
+ * 2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
*/
#define KMS_DRIVER_MAJOR 2
-#define KMS_DRIVER_MINOR 42
+#define KMS_DRIVER_MINOR 43
#define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 7162c935371c..1162bfa464f3 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -79,10 +79,12 @@ static void radeon_hotplug_work_func(struct work_struct *work)
struct drm_mode_config *mode_config = &dev->mode_config;
struct drm_connector *connector;
+ mutex_lock(&mode_config->mutex);
if (mode_config->num_connector) {
list_for_each_entry(connector, &mode_config->connector_list, head)
radeon_connector_hotplug(connector);
}
+ mutex_unlock(&mode_config->mutex);
/* Just fire off a uevent and let userspace tell us what to do */
drm_helper_hpd_irq_event(dev);
}
@@ -143,7 +145,13 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
*/
int radeon_driver_irq_postinstall_kms(struct drm_device *dev)
{
- dev->max_vblank_count = 0x001fffff;
+ struct radeon_device *rdev = dev->dev_private;
+
+ if (ASIC_IS_AVIVO(rdev))
+ dev->max_vblank_count = 0x00ffffff;
+ else
+ dev->max_vblank_count = 0x001fffff;
+
return 0;
}
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 813a416db538..e476c331f3fa 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -34,6 +34,13 @@
#define CIK_PIPE_PER_MEC (4)
+static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = {
+ TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL,
+ TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL,
+ TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL,
+ TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL
+};
+
struct kgd_mem {
struct radeon_bo *bo;
uint64_t gpu_addr;
@@ -79,6 +86,23 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int timeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static const struct kfd2kgd_calls kfd2kgd = {
.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -96,6 +120,13 @@ static const struct kfd2kgd_calls kfd2kgd = {
.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
.hqd_destroy = kgd_hqd_destroy,
.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
+ .write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version
};
@@ -372,8 +403,8 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
* the SW cleared it.
* So the protocol is to always wait & clear.
*/
- uint32_t pasid_mapping = (pasid == 0) ? 0 :
- (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID;
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID_PASID_MAPPING_VALID_MASK;
write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
pasid_mapping);
@@ -665,6 +696,122 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
return 0;
}
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ union TCP_WATCH_CNTL_BITS cntl;
+ unsigned int i;
+
+ cntl.u32All = 0;
+
+ cntl.bitfields.valid = 0;
+ cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK;
+ cntl.bitfields.atc = 1;
+
+ /* Turning off this address until we set all the registers */
+ for (i = 0; i < MAX_WATCH_ADDRESSES; i++)
+ write_register(kgd,
+ watchRegs[i * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_CNTL],
+ cntl.u32All);
+
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ union TCP_WATCH_CNTL_BITS cntl;
+
+ cntl.u32All = cntl_val;
+
+ /* Turning off this watch point until we set all the registers */
+ cntl.bitfields.valid = 0;
+ write_register(kgd,
+ watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_CNTL],
+ cntl.u32All);
+
+ write_register(kgd,
+ watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_ADDR_HI],
+ addr_hi);
+
+ write_register(kgd,
+ watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_ADDR_LO],
+ addr_lo);
+
+ /* Enable the watch point */
+ cntl.bitfields.valid = 1;
+
+ write_register(kgd,
+ watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX +
+ ADDRESS_WATCH_REG_CNTL],
+ cntl.u32All);
+
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct radeon_device *rdev = get_radeon_device(kgd);
+ uint32_t data;
+
+ mutex_lock(&rdev->grbm_idx_mutex);
+
+ write_register(kgd, GRBM_GFX_INDEX, gfx_index_val);
+ write_register(kgd, SQ_CMD, sq_cmd);
+
+ /* Restore the GRBM_GFX_INDEX register */
+
+ data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES |
+ SE_BROADCAST_WRITES;
+
+ write_register(kgd, GRBM_GFX_INDEX, data);
+
+ mutex_unlock(&rdev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)
+{
+ uint32_t reg;
+ struct radeon_device *rdev = (struct radeon_device *) kgd;
+
+ reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
+ return reg & ATC_VMID_PASID_MAPPING_VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct radeon_device *rdev = (struct radeon_device *) kgd;
+
+ reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4);
+ return reg & ATC_VMID_PASID_MAPPING_PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct radeon_device *rdev = (struct radeon_device *) kgd;
+
+ return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid);
+}
+
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
{
struct radeon_device *rdev = (struct radeon_device *) kgd;
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 7b2a7335cc5d..9632e886ddc3 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -576,6 +576,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (radeon_get_allowed_info_register(rdev, *value, value))
return -EINVAL;
break;
+ case RADEON_INFO_GPU_RESET_COUNTER:
+ *value = atomic_read(&rdev->gpu_reset_counter);
+ break;
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->request);
return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index fa91a17b81b6..6de5459316b5 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -754,7 +754,7 @@ extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector);
extern int radeon_dp_get_panel_mode(struct drm_encoder *encoder,
struct drm_connector *connector);
int radeon_dp_get_max_link_rate(struct drm_connector *connector,
- u8 *dpcd);
+ const u8 *dpcd);
extern void radeon_dp_set_rx_power_state(struct drm_connector *connector,
u8 power_state);
extern void radeon_dp_aux_init(struct radeon_connector *radeon_connector);
diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c
index 0de5711ac508..574f62bbd215 100644
--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@@ -38,8 +38,10 @@
#define VCE_IDLE_TIMEOUT_MS 1000
/* Firmware Names */
+#define FIRMWARE_TAHITI "radeon/TAHITI_vce.bin"
#define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin"
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
static void radeon_vce_idle_work_handler(struct work_struct *work);
@@ -63,6 +65,14 @@ int radeon_vce_init(struct radeon_device *rdev)
INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
switch (rdev->family) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_ARUBA:
+ fw_name = FIRMWARE_TAHITI;
+ break;
+
case CHIP_BONAIRE:
case CHIP_KAVERI:
case CHIP_KABINI:
@@ -118,13 +128,17 @@ int radeon_vce_init(struct radeon_device *rdev)
rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8);
/* we can only work with this fw version for now */
- if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8)))
+ if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) &&
+ (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) &&
+ (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8))))
return -EINVAL;
/* allocate firmware, stack and heap BO */
- size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
- RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
+ if (rdev->family < CHIP_BONAIRE)
+ size = vce_v1_0_bo_size(rdev);
+ else
+ size = vce_v2_0_bo_size(rdev);
r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL,
&rdev->vce.vcpu_bo);
@@ -225,13 +239,17 @@ int radeon_vce_resume(struct radeon_device *rdev)
return r;
}
- memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
+ memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo));
+ if (rdev->family < CHIP_BONAIRE)
+ r = vce_v1_0_load_fw(rdev, cpu_addr);
+ else
+ memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
radeon_bo_kunmap(rdev->vce.vcpu_bo);
radeon_bo_unreserve(rdev->vce.vcpu_bo);
- return 0;
+ return r;
}
/**
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index de42fc4a22b8..9739ded91b7a 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -331,7 +331,6 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
bo_va->it.start = 0;
bo_va->it.last = 0;
bo_va->flags = 0;
- bo_va->addr = 0;
bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->vm_status);
@@ -491,9 +490,11 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
}
if (bo_va->it.start || bo_va->it.last) {
- if (bo_va->addr) {
+ spin_lock(&vm->status_lock);
+ if (list_empty(&bo_va->vm_status)) {
/* add a clone of the bo_va to clear the old address */
struct radeon_bo_va *tmp;
+ spin_unlock(&vm->status_lock);
tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
if (!tmp) {
mutex_unlock(&vm->mutex);
@@ -502,14 +503,11 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
tmp->it.start = bo_va->it.start;
tmp->it.last = bo_va->it.last;
tmp->vm = vm;
- tmp->addr = bo_va->addr;
tmp->bo = radeon_bo_ref(bo_va->bo);
spin_lock(&vm->status_lock);
list_add(&tmp->vm_status, &vm->freed);
- spin_unlock(&vm->status_lock);
-
- bo_va->addr = 0;
}
+ spin_unlock(&vm->status_lock);
interval_tree_remove(&bo_va->it, &vm->va);
bo_va->it.start = 0;
@@ -520,10 +518,12 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
bo_va->it.start = soffset;
bo_va->it.last = eoffset - 1;
interval_tree_insert(&bo_va->it, &vm->va);
+ spin_lock(&vm->status_lock);
+ list_add(&bo_va->vm_status, &vm->cleared);
+ spin_unlock(&vm->status_lock);
}
bo_va->flags = flags;
- bo_va->addr = 0;
soffset >>= radeon_vm_block_size;
eoffset >>= radeon_vm_block_size;
@@ -921,7 +921,16 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
}
spin_lock(&vm->status_lock);
- list_del_init(&bo_va->vm_status);
+ if (mem) {
+ if (list_empty(&bo_va->vm_status)) {
+ spin_unlock(&vm->status_lock);
+ return 0;
+ }
+ list_del_init(&bo_va->vm_status);
+ } else {
+ list_del(&bo_va->vm_status);
+ list_add(&bo_va->vm_status, &vm->cleared);
+ }
spin_unlock(&vm->status_lock);
bo_va->flags &= ~RADEON_VM_PAGE_VALID;
@@ -947,10 +956,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
addr = 0;
}
- if (addr == bo_va->addr)
- return 0;
- bo_va->addr = addr;
-
trace_radeon_vm_bo_update(bo_va);
nptes = bo_va->it.last - bo_va->it.start + 1;
@@ -1038,7 +1043,7 @@ int radeon_vm_clear_freed(struct radeon_device *rdev,
struct radeon_vm *vm)
{
struct radeon_bo_va *bo_va;
- int r;
+ int r = 0;
spin_lock(&vm->status_lock);
while (!list_empty(&vm->freed)) {
@@ -1049,14 +1054,15 @@ int radeon_vm_clear_freed(struct radeon_device *rdev,
r = radeon_vm_bo_update(rdev, bo_va, NULL);
radeon_bo_unref(&bo_va->bo);
radeon_fence_unref(&bo_va->last_pt_update);
+ spin_lock(&vm->status_lock);
+ list_del(&bo_va->vm_status);
kfree(bo_va);
if (r)
- return r;
+ break;
- spin_lock(&vm->status_lock);
}
spin_unlock(&vm->status_lock);
- return 0;
+ return r;
}
@@ -1114,14 +1120,14 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
mutex_lock(&vm->mutex);
if (bo_va->it.start || bo_va->it.last)
interval_tree_remove(&bo_va->it, &vm->va);
- spin_lock(&vm->status_lock);
- list_del(&bo_va->vm_status);
- if (bo_va->addr) {
+ spin_lock(&vm->status_lock);
+ if (list_empty(&bo_va->vm_status)) {
bo_va->bo = radeon_bo_ref(bo_va->bo);
list_add(&bo_va->vm_status, &vm->freed);
} else {
radeon_fence_unref(&bo_va->last_pt_update);
+ list_del(&bo_va->vm_status);
kfree(bo_va);
}
spin_unlock(&vm->status_lock);
@@ -1144,12 +1150,10 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
struct radeon_bo_va *bo_va;
list_for_each_entry(bo_va, &bo->va, bo_list) {
- if (bo_va->addr) {
- spin_lock(&bo_va->vm->status_lock);
- list_del(&bo_va->vm_status);
+ spin_lock(&bo_va->vm->status_lock);
+ if (list_empty(&bo_va->vm_status))
list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
- spin_unlock(&bo_va->vm->status_lock);
- }
+ spin_unlock(&bo_va->vm->status_lock);
}
}
@@ -1179,6 +1183,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
spin_lock_init(&vm->status_lock);
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->freed);
+ INIT_LIST_HEAD(&vm->cleared);
pd_size = radeon_vm_directory_size(rdev);
pd_entries = radeon_vm_num_pdes(rdev);
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 5326f753e107..34c3739c87cf 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -6907,6 +6907,22 @@ static int si_startup(struct radeon_device *rdev)
rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
}
+ r = radeon_vce_resume(rdev);
+ if (!r) {
+ r = vce_v1_0_resume(rdev);
+ if (!r)
+ r = radeon_fence_driver_start_ring(rdev,
+ TN_RING_TYPE_VCE1_INDEX);
+ if (!r)
+ r = radeon_fence_driver_start_ring(rdev,
+ TN_RING_TYPE_VCE2_INDEX);
+ }
+ if (r) {
+ dev_err(rdev->dev, "VCE init error (%d).\n", r);
+ rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
+ rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
+ }
+
/* Enable IRQ */
if (!rdev->irq.installed) {
r = radeon_irq_kms_init(rdev);
@@ -6975,6 +6991,23 @@ static int si_startup(struct radeon_device *rdev)
}
}
+ r = -ENOENT;
+
+ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+ if (ring->ring_size)
+ r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+ VCE_CMD_NO_OP);
+
+ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+ if (ring->ring_size)
+ r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+ VCE_CMD_NO_OP);
+
+ if (!r)
+ r = vce_v1_0_init(rdev);
+ else if (r != -ENOENT)
+ DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+
r = radeon_ib_pool_init(rdev);
if (r) {
dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -7033,6 +7066,7 @@ int si_suspend(struct radeon_device *rdev)
if (rdev->has_uvd) {
uvd_v1_0_fini(rdev);
radeon_uvd_suspend(rdev);
+ radeon_vce_suspend(rdev);
}
si_fini_pg(rdev);
si_fini_cg(rdev);
@@ -7140,6 +7174,17 @@ int si_init(struct radeon_device *rdev)
}
}
+ r = radeon_vce_init(rdev);
+ if (!r) {
+ ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 4096);
+
+ ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+ ring->ring_obj = NULL;
+ r600_ring_init(rdev, ring, 4096);
+ }
+
rdev->ih.ring_obj = NULL;
r600_ih_ring_init(rdev, 64 * 1024);
@@ -7191,6 +7236,7 @@ void si_fini(struct radeon_device *rdev)
if (rdev->has_uvd) {
uvd_v1_0_fini(rdev);
radeon_uvd_fini(rdev);
+ radeon_vce_fini(rdev);
}
si_pcie_gart_fini(rdev);
r600_vram_scratch_fini(rdev);
@@ -7675,3 +7721,124 @@ static void si_program_aspm(struct radeon_device *rdev)
}
}
}
+
+int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
+{
+ unsigned i;
+
+ /* make sure VCEPLL_CTLREQ is deasserted */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+ mdelay(10);
+
+ /* assert UPLL_CTLREQ */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
+
+ /* wait for CTLACK and CTLACK2 to get asserted */
+ for (i = 0; i < 100; ++i) {
+ uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
+ if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
+ break;
+ mdelay(10);
+ }
+
+ /* deassert UPLL_CTLREQ */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
+
+ if (i == 100) {
+ DRM_ERROR("Timeout setting UVD clocks!\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
+{
+ unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
+ int r;
+
+ /* bypass evclk and ecclk with bclk */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+ EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
+ ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
+
+ /* put PLL in bypass mode */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
+ ~VCEPLL_BYPASS_EN_MASK);
+
+ if (!evclk || !ecclk) {
+ /* keep the Bypass mode, put PLL to sleep */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
+ ~VCEPLL_SLEEP_MASK);
+ return 0;
+ }
+
+ r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
+ 16384, 0x03FFFFFF, 0, 128, 5,
+ &fb_div, &evclk_div, &ecclk_div);
+ if (r)
+ return r;
+
+ /* set RESET_ANTI_MUX to 0 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
+
+ /* set VCO_MODE to 1 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
+ ~VCEPLL_VCO_MODE_MASK);
+
+ /* toggle VCEPLL_SLEEP to 1 then back to 0 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
+ ~VCEPLL_SLEEP_MASK);
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
+
+ /* deassert VCEPLL_RESET */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
+
+ mdelay(1);
+
+ r = si_vce_send_vcepll_ctlreq(rdev);
+ if (r)
+ return r;
+
+ /* assert VCEPLL_RESET again */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
+
+ /* disable spread spectrum. */
+ WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
+
+ /* set feedback divider */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
+
+ /* set ref divider to 0 */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
+
+ /* set PDIV_A and PDIV_B */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+ VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
+ ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
+
+ /* give the PLL some time to settle */
+ mdelay(15);
+
+ /* deassert PLL_RESET */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
+
+ mdelay(15);
+
+ /* switch from bypass mode to normal mode */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
+
+ r = si_vce_send_vcepll_ctlreq(rdev);
+ if (r)
+ return r;
+
+ /* switch VCLK and DCLK selection */
+ WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
+ EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
+ ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
+
+ mdelay(100);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index ff8b83f5e929..1dbdf3230dae 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -1740,6 +1740,7 @@ struct ni_power_info *ni_get_pi(struct radeon_device *rdev);
struct ni_ps *ni_get_ps(struct radeon_ps *rps);
extern int si_mc_load_microcode(struct radeon_device *rdev);
+extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable);
static int si_populate_voltage_value(struct radeon_device *rdev,
const struct atom_voltage_table *table,
@@ -2928,6 +2929,56 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = {
{ 0, 0, 0, 0 },
};
+static u16 si_get_lower_of_leakage_and_vce_voltage(struct radeon_device *rdev,
+ u16 vce_voltage)
+{
+ u16 highest_leakage = 0;
+ struct si_power_info *si_pi = si_get_pi(rdev);
+ int i;
+
+ for (i = 0; i < si_pi->leakage_voltage.count; i++){
+ if (highest_leakage < si_pi->leakage_voltage.entries[i].voltage)
+ highest_leakage = si_pi->leakage_voltage.entries[i].voltage;
+ }
+
+ if (si_pi->leakage_voltage.count && (highest_leakage < vce_voltage))
+ return highest_leakage;
+
+ return vce_voltage;
+}
+
+static int si_get_vce_clock_voltage(struct radeon_device *rdev,
+ u32 evclk, u32 ecclk, u16 *voltage)
+{
+ u32 i;
+ int ret = -EINVAL;
+ struct radeon_vce_clock_voltage_dependency_table *table =
+ &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+
+ if (((evclk == 0) && (ecclk == 0)) ||
+ (table && (table->count == 0))) {
+ *voltage = 0;
+ return 0;
+ }
+
+ for (i = 0; i < table->count; i++) {
+ if ((evclk <= table->entries[i].evclk) &&
+ (ecclk <= table->entries[i].ecclk)) {
+ *voltage = table->entries[i].v;
+ ret = 0;
+ break;
+ }
+ }
+
+ /* if no match return the highest voltage */
+ if (ret)
+ *voltage = table->entries[table->count - 1].v;
+
+ *voltage = si_get_lower_of_leakage_and_vce_voltage(rdev, *voltage);
+
+ return ret;
+}
+
static void si_apply_state_adjust_rules(struct radeon_device *rdev,
struct radeon_ps *rps)
{
@@ -2936,7 +2987,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
bool disable_mclk_switching = false;
bool disable_sclk_switching = false;
u32 mclk, sclk;
- u16 vddc, vddci;
+ u16 vddc, vddci, min_vce_voltage = 0;
u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
u32 max_sclk = 0, max_mclk = 0;
int i;
@@ -2955,6 +3006,16 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
++p;
}
+ if (rps->vce_active) {
+ rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
+ rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk;
+ si_get_vce_clock_voltage(rdev, rps->evclk, rps->ecclk,
+ &min_vce_voltage);
+ } else {
+ rps->evclk = 0;
+ rps->ecclk = 0;
+ }
+
if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
ni_dpm_vblank_too_short(rdev))
disable_mclk_switching = true;
@@ -3035,6 +3096,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
vddc = ps->performance_levels[0].vddc;
}
+ if (rps->vce_active) {
+ if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk)
+ sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk;
+ if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk)
+ mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk;
+ }
+
/* adjusted low state */
ps->performance_levels[0].sclk = sclk;
ps->performance_levels[0].mclk = mclk;
@@ -3084,6 +3152,8 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
&ps->performance_levels[i]);
for (i = 0; i < ps->performance_level_count; i++) {
+ if (ps->performance_levels[i].vddc < min_vce_voltage)
+ ps->performance_levels[i].vddc = min_vce_voltage;
btc_apply_voltage_dependency_rules(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
ps->performance_levels[i].sclk,
max_limits->vddc, &ps->performance_levels[i].vddc);
@@ -3110,7 +3180,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
if (ps->performance_levels[i].vddc > rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.vddc)
ps->dc_compatible = false;
}
-
}
#if 0
@@ -5859,6 +5928,21 @@ static void si_set_pcie_lane_width_in_smc(struct radeon_device *rdev,
}
}
+static void si_set_vce_clock(struct radeon_device *rdev,
+ struct radeon_ps *new_rps,
+ struct radeon_ps *old_rps)
+{
+ if ((old_rps->evclk != new_rps->evclk) ||
+ (old_rps->ecclk != new_rps->ecclk)) {
+ /* turn the clocks on when encoding, off otherwise */
+ if (new_rps->evclk || new_rps->ecclk)
+ vce_v1_0_enable_mgcg(rdev, false);
+ else
+ vce_v1_0_enable_mgcg(rdev, true);
+ radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk);
+ }
+}
+
void si_dpm_setup_asic(struct radeon_device *rdev)
{
int r;
@@ -6547,6 +6631,7 @@ int si_dpm_set_power_state(struct radeon_device *rdev)
return ret;
}
ni_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps);
+ si_set_vce_clock(rdev, new_ps, old_ps);
if (eg_pi->pcie_performance_request)
si_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps);
ret = si_set_power_state_conditionally_enable_ulv(rdev, new_ps);
@@ -6793,6 +6878,21 @@ static int si_parse_power_table(struct radeon_device *rdev)
power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
}
rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+
+ /* fill in the vce power states */
+ for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) {
+ u32 sclk, mclk;
+ clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx;
+ clock_info = (union pplib_clock_info *)
+ &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+ sclk = le16_to_cpu(clock_info->si.usEngineClockLow);
+ sclk |= clock_info->si.ucEngineClockHigh << 16;
+ mclk = le16_to_cpu(clock_info->si.usMemoryClockLow);
+ mclk |= clock_info->si.ucMemoryClockHigh << 16;
+ rdev->pm.dpm.vce_states[i].sclk = sclk;
+ rdev->pm.dpm.vce_states[i].mclk = mclk;
+ }
+
return 0;
}
@@ -6837,10 +6937,11 @@ int si_dpm_init(struct radeon_device *rdev)
if (ret)
return ret;
- ret = si_parse_power_table(rdev);
+ ret = r600_parse_extended_power_table(rdev);
if (ret)
return ret;
- ret = r600_parse_extended_power_table(rdev);
+
+ ret = si_parse_power_table(rdev);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 3afac3013983..4c4a7218a3bd 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -1879,6 +1879,7 @@
#define VCE_VCPU_CACHE_SIZE1 0x20030
#define VCE_VCPU_CACHE_OFFSET2 0x20034
#define VCE_VCPU_CACHE_SIZE2 0x20038
+#define VCE_VCPU_SCRATCH7 0x200dc
#define VCE_SOFT_RESET 0x20120
#define VCE_ECPU_SOFT_RESET (1 << 0)
#define VCE_FME_SOFT_RESET (1 << 2)
@@ -1893,6 +1894,7 @@
#define VCE_RB_RPTR 0x2018c
#define VCE_RB_WPTR 0x20190
#define VCE_CLOCK_GATING_A 0x202f8
+# define CGC_DYN_CLOCK_MODE (1 << 16)
#define VCE_CLOCK_GATING_B 0x202fc
#define VCE_UENC_CLOCK_GATING 0x205bc
#define VCE_UENC_REG_CLOCK_GATING 0x205c0
@@ -1917,4 +1919,31 @@
#define VCE_CMD_IB_AUTO 0x00000005
#define VCE_CMD_SEMAPHORE 0x00000006
+/* discrete vce clocks */
+#define CG_VCEPLL_FUNC_CNTL 0xc0030600
+# define VCEPLL_RESET_MASK 0x00000001
+# define VCEPLL_SLEEP_MASK 0x00000002
+# define VCEPLL_BYPASS_EN_MASK 0x00000004
+# define VCEPLL_CTLREQ_MASK 0x00000008
+# define VCEPLL_VCO_MODE_MASK 0x00000600
+# define VCEPLL_REF_DIV_MASK 0x003F0000
+# define VCEPLL_CTLACK_MASK 0x40000000
+# define VCEPLL_CTLACK2_MASK 0x80000000
+#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601
+# define VCEPLL_PDIV_A(x) ((x) << 0)
+# define VCEPLL_PDIV_A_MASK 0x0000007F
+# define VCEPLL_PDIV_B(x) ((x) << 8)
+# define VCEPLL_PDIV_B_MASK 0x00007F00
+# define EVCLK_SRC_SEL(x) ((x) << 20)
+# define EVCLK_SRC_SEL_MASK 0x01F00000
+# define ECCLK_SRC_SEL(x) ((x) << 25)
+# define ECCLK_SRC_SEL_MASK 0x3E000000
+#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602
+# define VCEPLL_FB_DIV(x) ((x) << 0)
+# define VCEPLL_FB_DIV_MASK 0x01FFFFFF
+#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603
+#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604
+#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606
+# define VCEPLL_SSEN_MASK 0x00000001
+
#endif
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index a5b02c575d77..d34bfcdab9be 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -336,6 +336,7 @@ static const u32 trinity_override_mgpg_sequences[] =
0x00000204, 0x00000000,
};
+extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable);
static void trinity_program_clk_gating_hw_sequence(struct radeon_device *rdev,
const u32 *seq, u32 count);
static void trinity_override_dynamic_mg_powergating(struct radeon_device *rdev);
@@ -985,6 +986,21 @@ static void trinity_set_uvd_clock_after_set_eng_clock(struct radeon_device *rdev
trinity_setup_uvd_clocks(rdev, new_rps, old_rps);
}
+static void trinity_set_vce_clock(struct radeon_device *rdev,
+ struct radeon_ps *new_rps,
+ struct radeon_ps *old_rps)
+{
+ if ((old_rps->evclk != new_rps->evclk) ||
+ (old_rps->ecclk != new_rps->ecclk)) {
+ /* turn the clocks on when encoding, off otherwise */
+ if (new_rps->evclk || new_rps->ecclk)
+ vce_v1_0_enable_mgcg(rdev, false);
+ else
+ vce_v1_0_enable_mgcg(rdev, true);
+ radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk);
+ }
+}
+
static void trinity_program_ttt(struct radeon_device *rdev)
{
struct trinity_power_info *pi = trinity_get_pi(rdev);
@@ -1246,6 +1262,7 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev)
trinity_force_level_0(rdev);
trinity_unforce_levels(rdev);
trinity_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps);
+ trinity_set_vce_clock(rdev, new_ps, old_ps);
}
trinity_release_mutex(rdev);
@@ -1483,7 +1500,35 @@ static void trinity_adjust_uvd_state(struct radeon_device *rdev,
}
}
+static int trinity_get_vce_clock_voltage(struct radeon_device *rdev,
+ u32 evclk, u32 ecclk, u16 *voltage)
+{
+ u32 i;
+ int ret = -EINVAL;
+ struct radeon_vce_clock_voltage_dependency_table *table =
+ &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+
+ if (((evclk == 0) && (ecclk == 0)) ||
+ (table && (table->count == 0))) {
+ *voltage = 0;
+ return 0;
+ }
+
+ for (i = 0; i < table->count; i++) {
+ if ((evclk <= table->entries[i].evclk) &&
+ (ecclk <= table->entries[i].ecclk)) {
+ *voltage = table->entries[i].v;
+ ret = 0;
+ break;
+ }
+ }
+
+ /* if no match return the highest voltage */
+ if (ret)
+ *voltage = table->entries[table->count - 1].v;
+ return ret;
+}
static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
struct radeon_ps *new_rps,
@@ -1496,6 +1541,7 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
u32 min_sclk = pi->sys_info.min_sclk; /* XXX check against disp reqs */
u32 sclk_in_sr = pi->sys_info.min_sclk; /* ??? */
u32 i;
+ u16 min_vce_voltage;
bool force_high;
u32 num_active_displays = rdev->pm.dpm.new_active_crtc_count;
@@ -1504,6 +1550,14 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
trinity_adjust_uvd_state(rdev, new_rps);
+ if (new_rps->vce_active) {
+ new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
+ new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk;
+ } else {
+ new_rps->evclk = 0;
+ new_rps->ecclk = 0;
+ }
+
for (i = 0; i < ps->num_levels; i++) {
if (ps->levels[i].vddc_index < min_voltage)
ps->levels[i].vddc_index = min_voltage;
@@ -1512,6 +1566,17 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev,
ps->levels[i].sclk =
trinity_get_valid_engine_clock(rdev, min_sclk);
+ /* patch in vce limits */
+ if (new_rps->vce_active) {
+ /* sclk */
+ if (ps->levels[i].sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk)
+ ps->levels[i].sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk;
+ /* vddc */
+ trinity_get_vce_clock_voltage(rdev, new_rps->evclk, new_rps->ecclk, &min_vce_voltage);
+ if (ps->levels[i].vddc_index < min_vce_voltage)
+ ps->levels[i].vddc_index = min_vce_voltage;
+ }
+
ps->levels[i].ds_divider_index =
sumo_get_sleep_divider_id_from_clock(rdev, ps->levels[i].sclk, sclk_in_sr);
@@ -1733,6 +1798,19 @@ static int trinity_parse_power_table(struct radeon_device *rdev)
power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
}
rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+
+ /* fill in the vce power states */
+ for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) {
+ u32 sclk;
+ clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx;
+ clock_info = (union pplib_clock_info *)
+ &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+ sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow);
+ sclk |= clock_info->sumo.ucEngineClockHigh << 16;
+ rdev->pm.dpm.vce_states[i].sclk = sclk;
+ rdev->pm.dpm.vce_states[i].mclk = 0;
+ }
+
return 0;
}
@@ -1914,6 +1992,10 @@ int trinity_dpm_init(struct radeon_device *rdev)
if (ret)
return ret;
+ ret = r600_parse_extended_power_table(rdev);
+ if (ret)
+ return ret;
+
ret = trinity_parse_power_table(rdev);
if (ret)
return ret;
@@ -2000,6 +2082,7 @@ void trinity_dpm_fini(struct radeon_device *rdev)
}
kfree(rdev->pm.dpm.ps);
kfree(rdev->pm.dpm.priv);
+ r600_free_extended_power_table(rdev);
}
u32 trinity_dpm_get_sclk(struct radeon_device *rdev, bool low)
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index b44d9c842f7b..07a0d378e122 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -31,6 +31,23 @@
#include "radeon_asic.h"
#include "sid.h"
+#define VCE_V1_0_FW_SIZE (256 * 1024)
+#define VCE_V1_0_STACK_SIZE (64 * 1024)
+#define VCE_V1_0_DATA_SIZE (7808 * (RADEON_MAX_VCE_HANDLES + 1))
+
+struct vce_v1_0_fw_signature
+{
+ int32_t off;
+ uint32_t len;
+ int32_t num;
+ struct {
+ uint32_t chip_id;
+ uint32_t keyselect;
+ uint32_t nonce[4];
+ uint32_t sigval[4];
+ } val[8];
+};
+
/**
* vce_v1_0_get_rptr - get read pointer
*
@@ -82,6 +99,186 @@ void vce_v1_0_set_wptr(struct radeon_device *rdev,
WREG32(VCE_RB_WPTR2, ring->wptr);
}
+void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable)
+{
+ u32 tmp;
+
+ if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) {
+ tmp = RREG32(VCE_CLOCK_GATING_A);
+ tmp |= CGC_DYN_CLOCK_MODE;
+ WREG32(VCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(VCE_UENC_CLOCK_GATING);
+ tmp &= ~0x1ff000;
+ tmp |= 0xff800000;
+ WREG32(VCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(VCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
+ } else {
+ tmp = RREG32(VCE_CLOCK_GATING_A);
+ tmp &= ~CGC_DYN_CLOCK_MODE;
+ WREG32(VCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(VCE_UENC_CLOCK_GATING);
+ tmp |= 0x1ff000;
+ tmp &= ~0xff800000;
+ WREG32(VCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(VCE_UENC_REG_CLOCK_GATING);
+ tmp |= 0x3ff;
+ WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
+ }
+}
+
+static void vce_v1_0_init_cg(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ tmp = RREG32(VCE_CLOCK_GATING_A);
+ tmp |= CGC_DYN_CLOCK_MODE;
+ WREG32(VCE_CLOCK_GATING_A, tmp);
+
+ tmp = RREG32(VCE_CLOCK_GATING_B);
+ tmp |= 0x1e;
+ tmp &= ~0xe100e1;
+ WREG32(VCE_CLOCK_GATING_B, tmp);
+
+ tmp = RREG32(VCE_UENC_CLOCK_GATING);
+ tmp &= ~0xff9ff000;
+ WREG32(VCE_UENC_CLOCK_GATING, tmp);
+
+ tmp = RREG32(VCE_UENC_REG_CLOCK_GATING);
+ tmp &= ~0x3ff;
+ WREG32(VCE_UENC_REG_CLOCK_GATING, tmp);
+}
+
+int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
+{
+ struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data;
+ uint32_t chip_id;
+ int i;
+
+ switch (rdev->family) {
+ case CHIP_TAHITI:
+ chip_id = 0x01000014;
+ break;
+ case CHIP_VERDE:
+ chip_id = 0x01000015;
+ break;
+ case CHIP_PITCAIRN:
+ case CHIP_OLAND:
+ chip_id = 0x01000016;
+ break;
+ case CHIP_ARUBA:
+ chip_id = 0x01000017;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < sign->num; ++i) {
+ if (sign->val[i].chip_id == chip_id)
+ break;
+ }
+
+ if (i == sign->num)
+ return -EINVAL;
+
+ data += (256 - 64) / 4;
+ data[0] = sign->val[i].nonce[0];
+ data[1] = sign->val[i].nonce[1];
+ data[2] = sign->val[i].nonce[2];
+ data[3] = sign->val[i].nonce[3];
+ data[4] = sign->len + 64;
+
+ memset(&data[5], 0, 44);
+ memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
+
+ data += data[4] / 4;
+ data[0] = sign->val[i].sigval[0];
+ data[1] = sign->val[i].sigval[1];
+ data[2] = sign->val[i].sigval[2];
+ data[3] = sign->val[i].sigval[3];
+
+ rdev->vce.keyselect = sign->val[i].keyselect;
+
+ return 0;
+}
+
+unsigned vce_v1_0_bo_size(struct radeon_device *rdev)
+{
+ WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size);
+ return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE;
+}
+
+int vce_v1_0_resume(struct radeon_device *rdev)
+{
+ uint64_t addr = rdev->vce.gpu_addr;
+ uint32_t size;
+ int i;
+
+ WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16));
+ WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+ WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+ WREG32(VCE_CLOCK_GATING_B, 0);
+
+ WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4);
+
+ WREG32(VCE_LMI_CTRL, 0x00398000);
+ WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+ WREG32(VCE_LMI_SWAP_CNTL, 0);
+ WREG32(VCE_LMI_SWAP_CNTL1, 0);
+ WREG32(VCE_LMI_VM_CTRL, 0);
+
+ WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES);
+
+ addr += 256;
+ size = VCE_V1_0_FW_SIZE;
+ WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
+ WREG32(VCE_VCPU_CACHE_SIZE0, size);
+
+ addr += size;
+ size = VCE_V1_0_STACK_SIZE;
+ WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
+ WREG32(VCE_VCPU_CACHE_SIZE1, size);
+
+ addr += size;
+ size = VCE_V1_0_DATA_SIZE;
+ WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
+ WREG32(VCE_VCPU_CACHE_SIZE2, size);
+
+ WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100);
+
+ WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect);
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE)
+ break;
+ }
+
+ if (i == 10)
+ return -ETIMEDOUT;
+
+ if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS))
+ return -EINVAL;
+
+ for (i = 0; i < 10; ++i) {
+ mdelay(10);
+ if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY))
+ break;
+ }
+
+ if (i == 10)
+ return -ETIMEDOUT;
+
+ vce_v1_0_init_cg(rdev);
+
+ return 0;
+}
+
/**
* vce_v1_0_start - start VCE block
*
diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c
index fbbe78fbd087..cdeaab7c7b1e 100644
--- a/drivers/gpu/drm/radeon/vce_v2_0.c
+++ b/drivers/gpu/drm/radeon/vce_v2_0.c
@@ -31,6 +31,10 @@
#include "radeon_asic.h"
#include "cikd.h"
+#define VCE_V2_0_FW_SIZE (256 * 1024)
+#define VCE_V2_0_STACK_SIZE (64 * 1024)
+#define VCE_V2_0_DATA_SIZE (23552 * RADEON_MAX_VCE_HANDLES)
+
static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated)
{
u32 tmp;
@@ -140,6 +144,12 @@ static void vce_v2_0_init_cg(struct radeon_device *rdev)
WREG32(VCE_CLOCK_GATING_B, tmp);
}
+unsigned vce_v2_0_bo_size(struct radeon_device *rdev)
+{
+ WARN_ON(rdev->vce_fw->size > VCE_V2_0_FW_SIZE);
+ return VCE_V2_0_FW_SIZE + VCE_V2_0_STACK_SIZE + VCE_V2_0_DATA_SIZE;
+}
+
int vce_v2_0_resume(struct radeon_device *rdev)
{
uint64_t addr = rdev->vce.gpu_addr;
@@ -159,17 +169,17 @@ int vce_v2_0_resume(struct radeon_device *rdev)
WREG32(VCE_LMI_VCPU_CACHE_40BIT_BAR, addr >> 8);
addr &= 0xff;
- size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size);
+ size = VCE_V2_0_FW_SIZE;
WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
WREG32(VCE_VCPU_CACHE_SIZE0, size);
addr += size;
- size = RADEON_VCE_STACK_SIZE;
+ size = VCE_V2_0_STACK_SIZE;
WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
WREG32(VCE_VCPU_CACHE_SIZE1, size);
addr += size;
- size = RADEON_VCE_HEAP_SIZE;
+ size = VCE_V2_0_DATA_SIZE;
WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
WREG32(VCE_VCPU_CACHE_SIZE2, size);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 7d0b8ef9bea2..e6a32c4e4040 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -195,26 +195,27 @@ void rcar_du_crtc_route_output(struct drm_crtc *crtc,
static unsigned int plane_zpos(struct rcar_du_plane *plane)
{
- return to_rcar_du_plane_state(plane->plane.state)->zpos;
+ return to_rcar_plane_state(plane->plane.state)->zpos;
}
static const struct rcar_du_format_info *
plane_format(struct rcar_du_plane *plane)
{
- return to_rcar_du_plane_state(plane->plane.state)->format;
+ return to_rcar_plane_state(plane->plane.state)->format;
}
static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
{
struct rcar_du_plane *planes[RCAR_DU_NUM_HW_PLANES];
unsigned int num_planes = 0;
+ unsigned int dptsr_planes;
+ unsigned int hwplanes = 0;
unsigned int prio = 0;
unsigned int i;
- u32 dptsr = 0;
u32 dspr = 0;
- for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) {
- struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i];
+ for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes); ++i) {
+ struct rcar_du_plane *plane = &rcrtc->group->planes[i];
unsigned int j;
if (plane->plane.state->crtc != &rcrtc->crtc)
@@ -234,41 +235,45 @@ static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc)
for (i = 0; i < num_planes; ++i) {
struct rcar_du_plane *plane = planes[i];
struct drm_plane_state *state = plane->plane.state;
- unsigned int index = to_rcar_du_plane_state(state)->hwindex;
+ unsigned int index = to_rcar_plane_state(state)->hwindex;
prio -= 4;
dspr |= (index + 1) << prio;
- dptsr |= DPTSR_PnDK(index) | DPTSR_PnTS(index);
+ hwplanes |= 1 << index;
if (plane_format(plane)->planes == 2) {
index = (index + 1) % 8;
prio -= 4;
dspr |= (index + 1) << prio;
- dptsr |= DPTSR_PnDK(index) | DPTSR_PnTS(index);
+ hwplanes |= 1 << index;
}
}
- /* Select display timing and dot clock generator 2 for planes associated
- * with superposition controller 2.
+ /* Update the planes to display timing and dot clock generator
+ * associations.
+ *
+ * Updating the DPTSR register requires restarting the CRTC group,
+ * resulting in visible flicker. To mitigate the issue only update the
+ * association if needed by enabled planes. Planes being disabled will
+ * keep their current association.
*/
- if (rcrtc->index % 2) {
- /* The DPTSR register is updated when the display controller is
- * stopped. We thus need to restart the DU. Once again, sorry
- * for the flicker. One way to mitigate the issue would be to
- * pre-associate planes with CRTCs (either with a fixed 4/4
- * split, or through a module parameter). Flicker would then
- * occur only if we need to break the pre-association.
- */
- mutex_lock(&rcrtc->group->lock);
- if (rcar_du_group_read(rcrtc->group, DPTSR) != dptsr) {
- rcar_du_group_write(rcrtc->group, DPTSR, dptsr);
- if (rcrtc->group->used_crtcs)
- rcar_du_group_restart(rcrtc->group);
- }
- mutex_unlock(&rcrtc->group->lock);
+ mutex_lock(&rcrtc->group->lock);
+
+ dptsr_planes = rcrtc->index % 2 ? rcrtc->group->dptsr_planes | hwplanes
+ : rcrtc->group->dptsr_planes & ~hwplanes;
+
+ if (dptsr_planes != rcrtc->group->dptsr_planes) {
+ rcar_du_group_write(rcrtc->group, DPTSR,
+ (dptsr_planes << 16) | dptsr_planes);
+ rcrtc->group->dptsr_planes = dptsr_planes;
+
+ if (rcrtc->group->used_crtcs)
+ rcar_du_group_restart(rcrtc->group);
}
+ mutex_unlock(&rcrtc->group->lock);
+
rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR,
dspr);
}
@@ -427,8 +432,8 @@ void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc)
rcar_du_crtc_start(rcrtc);
/* Commit the planes state. */
- for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) {
- struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i];
+ for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes); ++i) {
+ struct rcar_du_plane *plane = &rcrtc->group->planes[i];
if (plane->plane.state->crtc != &rcrtc->crtc)
continue;
@@ -592,7 +597,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
rcrtc->enabled = false;
ret = drm_crtc_init_with_planes(rcdu->ddev, crtc,
- &rgrp->planes.planes[index % 2].plane,
+ &rgrp->planes[index % 2].plane,
NULL, &crtc_funcs);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index 5d9aa9b33769..4b95d9d08c49 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -22,6 +22,20 @@
struct rcar_du_group;
+/**
+ * struct rcar_du_crtc - the CRTC, representing a DU superposition processor
+ * @crtc: base DRM CRTC
+ * @clock: the CRTC functional clock
+ * @extclock: external pixel dot clock (optional)
+ * @mmio_offset: offset of the CRTC registers in the DU MMIO block
+ * @index: CRTC software and hardware index
+ * @started: whether the CRTC has been started and is running
+ * @event: event to post when the pending page flip completes
+ * @flip_wait: wait queue used to signal page flip completion
+ * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC
+ * @enabled: whether the CRTC is enabled, used to control system resume
+ * @group: CRTC group this CRTC belongs to
+ */
struct rcar_du_crtc {
struct drm_crtc crtc;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
index c7c538dd2e68..9f34fc86436a 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
@@ -83,6 +83,12 @@ struct rcar_du_device {
struct rcar_du_group groups[RCAR_DU_MAX_GROUPS];
+ struct {
+ struct drm_property *alpha;
+ struct drm_property *colorkey;
+ struct drm_property *zpos;
+ } props;
+
unsigned int dpad0_source;
struct rcar_du_lvdsenc *lvds[RCAR_DU_MAX_LVDS];
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
index 1bdc0ee0c248..7fd39a7d91c8 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -85,6 +85,12 @@ static void rcar_du_group_setup(struct rcar_du_group *rgrp)
* superposition 0 to DU0 pins. DU1 pins will be configured dynamically.
*/
rcar_du_group_write(rgrp, DORCR, DORCR_PG1D_DS1 | DORCR_DPRS);
+
+ /* Apply planes to CRTCs association. */
+ mutex_lock(&rgrp->lock);
+ rcar_du_group_write(rgrp, DPTSR, (rgrp->dptsr_planes << 16) |
+ rgrp->dptsr_planes);
+ mutex_unlock(&rgrp->lock);
}
/*
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h
index ed36433fbe84..7b414b31c3be 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h
@@ -25,9 +25,11 @@ struct rcar_du_device;
* @dev: the DU device
* @mmio_offset: registers offset in the device memory map
* @index: group index
+ * @num_crtcs: number of CRTCs in this group (1 or 2)
* @use_count: number of users of the group (rcar_du_group_(get|put))
* @used_crtcs: number of CRTCs currently in use
- * @lock: protects the DPTSR register
+ * @lock: protects the dptsr_planes field and the DPTSR register
+ * @dptsr_planes: bitmask of planes driven by dot-clock and timing generator 1
* @planes: planes handled by the group
*/
struct rcar_du_group {
@@ -35,12 +37,14 @@ struct rcar_du_group {
unsigned int mmio_offset;
unsigned int index;
+ unsigned int num_crtcs;
unsigned int use_count;
unsigned int used_crtcs;
struct mutex lock;
+ unsigned int dptsr_planes;
- struct rcar_du_planes planes;
+ struct rcar_du_plane planes[RCAR_DU_NUM_KMS_PLANES];
};
u32 rcar_du_group_read(struct rcar_du_group *rgrp, u32 reg);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index 93117f159a3b..20859aae882e 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -221,7 +221,7 @@ static bool rcar_du_plane_needs_realloc(struct rcar_du_plane *plane,
{
const struct rcar_du_format_info *cur_format;
- cur_format = to_rcar_du_plane_state(plane->plane.state)->format;
+ cur_format = to_rcar_plane_state(plane->plane.state)->format;
/* Lowering the number of planes doesn't strictly require reallocation
* as the extra hardware plane will be freed when committing, but doing
@@ -284,14 +284,19 @@ static int rcar_du_atomic_check(struct drm_device *dev,
continue;
plane = to_rcar_plane(state->planes[i]);
- plane_state = to_rcar_du_plane_state(state->plane_states[i]);
+ plane_state = to_rcar_plane_state(state->plane_states[i]);
+
+ dev_dbg(rcdu->dev, "%s: checking plane (%u,%u)\n", __func__,
+ plane->group->index, plane - plane->group->planes);
/* If the plane is being disabled we don't need to go through
* the full reallocation procedure. Just mark the hardware
* plane(s) as freed.
*/
if (!plane_state->format) {
- index = plane - plane->group->planes.planes;
+ dev_dbg(rcdu->dev, "%s: plane is being disabled\n",
+ __func__);
+ index = plane - plane->group->planes;
group_freed_planes[plane->group->index] |= 1 << index;
plane_state->hwindex = -1;
continue;
@@ -301,10 +306,12 @@ static int rcar_du_atomic_check(struct drm_device *dev,
* mark the hardware plane(s) as free.
*/
if (rcar_du_plane_needs_realloc(plane, plane_state)) {
+ dev_dbg(rcdu->dev, "%s: plane needs reallocation\n",
+ __func__);
groups |= 1 << plane->group->index;
needs_realloc = true;
- index = plane - plane->group->planes.planes;
+ index = plane - plane->group->planes;
group_freed_planes[plane->group->index] |= 1 << index;
plane_state->hwindex = -1;
}
@@ -326,8 +333,11 @@ static int rcar_du_atomic_check(struct drm_device *dev,
struct rcar_du_group *group = &rcdu->groups[index];
unsigned int used_planes = 0;
+ dev_dbg(rcdu->dev, "%s: finding free planes for group %u\n",
+ __func__, index);
+
for (i = 0; i < RCAR_DU_NUM_KMS_PLANES; ++i) {
- struct rcar_du_plane *plane = &group->planes.planes[i];
+ struct rcar_du_plane *plane = &group->planes[i];
struct rcar_du_plane_state *plane_state;
struct drm_plane_state *s;
@@ -342,28 +352,49 @@ static int rcar_du_atomic_check(struct drm_device *dev,
* above. Use the local freed planes list to check for
* that condition instead.
*/
- if (group_freed_planes[index] & (1 << i))
+ if (group_freed_planes[index] & (1 << i)) {
+ dev_dbg(rcdu->dev,
+ "%s: plane (%u,%u) has been freed, skipping\n",
+ __func__, plane->group->index,
+ plane - plane->group->planes);
continue;
+ }
- plane_state = to_rcar_du_plane_state(plane->plane.state);
+ plane_state = to_rcar_plane_state(plane->plane.state);
used_planes |= rcar_du_plane_hwmask(plane_state);
+
+ dev_dbg(rcdu->dev,
+ "%s: plane (%u,%u) uses %u hwplanes (index %d)\n",
+ __func__, plane->group->index,
+ plane - plane->group->planes,
+ plane_state->format ?
+ plane_state->format->planes : 0,
+ plane_state->hwindex);
}
group_free_planes[index] = 0xff & ~used_planes;
groups &= ~(1 << index);
+
+ dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
+ __func__, index, group_free_planes[index]);
}
/* Reallocate hardware planes for each plane that needs it. */
for (i = 0; i < dev->mode_config.num_total_plane; ++i) {
struct rcar_du_plane_state *plane_state;
struct rcar_du_plane *plane;
+ unsigned int crtc_planes;
+ unsigned int free;
int idx;
if (!state->planes[i])
continue;
plane = to_rcar_plane(state->planes[i]);
- plane_state = to_rcar_du_plane_state(state->plane_states[i]);
+ plane_state = to_rcar_plane_state(state->plane_states[i]);
+
+ dev_dbg(rcdu->dev, "%s: allocating plane (%u,%u)\n", __func__,
+ plane->group->index, plane - plane->group->planes);
/* Skip planes that are being disabled or don't need to be
* reallocated.
@@ -372,18 +403,38 @@ static int rcar_du_atomic_check(struct drm_device *dev,
!rcar_du_plane_needs_realloc(plane, plane_state))
continue;
+ /* Try to allocate the plane from the free planes currently
+ * associated with the target CRTC to avoid restarting the CRTC
+ * group and thus minimize flicker. If it fails fall back to
+ * allocating from all free planes.
+ */
+ crtc_planes = to_rcar_crtc(plane_state->state.crtc)->index % 2
+ ? plane->group->dptsr_planes
+ : ~plane->group->dptsr_planes;
+ free = group_free_planes[plane->group->index];
+
idx = rcar_du_plane_hwalloc(plane_state->format->planes,
- group_free_planes[plane->group->index]);
+ free & crtc_planes);
+ if (idx < 0)
+ idx = rcar_du_plane_hwalloc(plane_state->format->planes,
+ free);
if (idx < 0) {
dev_dbg(rcdu->dev, "%s: no available hardware plane\n",
__func__);
return idx;
}
+ dev_dbg(rcdu->dev, "%s: allocated %u hwplanes (index %u)\n",
+ __func__, plane_state->format->planes, idx);
+
plane_state->hwindex = idx;
group_free_planes[plane->group->index] &=
~rcar_du_plane_hwmask(plane_state);
+
+ dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n",
+ __func__, plane->group->index,
+ group_free_planes[plane->group->index]);
}
return 0;
@@ -648,6 +699,31 @@ static int rcar_du_encoders_init(struct rcar_du_device *rcdu)
return num_encoders;
}
+static int rcar_du_properties_init(struct rcar_du_device *rcdu)
+{
+ rcdu->props.alpha =
+ drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255);
+ if (rcdu->props.alpha == NULL)
+ return -ENOMEM;
+
+ /* The color key is expressed as an RGB888 triplet stored in a 32-bit
+ * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0)
+ * or enable source color keying (1).
+ */
+ rcdu->props.colorkey =
+ drm_property_create_range(rcdu->ddev, 0, "colorkey",
+ 0, 0x01ffffff);
+ if (rcdu->props.colorkey == NULL)
+ return -ENOMEM;
+
+ rcdu->props.zpos =
+ drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7);
+ if (rcdu->props.zpos == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
int rcar_du_modeset_init(struct rcar_du_device *rcdu)
{
static const unsigned int mmio_offsets[] = {
@@ -672,6 +748,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
rcdu->num_crtcs = rcdu->info->num_crtcs;
+ ret = rcar_du_properties_init(rcdu);
+ if (ret < 0)
+ return ret;
+
/* Initialize the groups. */
num_groups = DIV_ROUND_UP(rcdu->num_crtcs, 2);
@@ -683,6 +763,13 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
rgrp->dev = rcdu;
rgrp->mmio_offset = mmio_offsets[i];
rgrp->index = i;
+ rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U);
+
+ /* If we have more than one CRTCs in this group pre-associate
+ * planes 0-3 with CRTC 0 and planes 4-7 with CRTC 1 to minimize
+ * flicker occurring when the association is changed.
+ */
+ rgrp->dptsr_planes = rgrp->num_crtcs > 1 ? 0xf0 : 0;
ret = rcar_du_planes_init(rgrp);
if (ret < 0)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index 210e5c3fd982..3e30d84b798f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -45,7 +45,7 @@ static void rcar_du_plane_write(struct rcar_du_group *rgrp,
static void rcar_du_plane_setup_fb(struct rcar_du_plane *plane)
{
struct rcar_du_plane_state *state =
- to_rcar_du_plane_state(plane->plane.state);
+ to_rcar_plane_state(plane->plane.state);
struct drm_framebuffer *fb = plane->plane.state->fb;
struct rcar_du_group *rgrp = plane->group;
unsigned int src_x = state->state.src_x >> 16;
@@ -109,7 +109,7 @@ static void rcar_du_plane_setup_mode(struct rcar_du_plane *plane,
unsigned int index)
{
struct rcar_du_plane_state *state =
- to_rcar_du_plane_state(plane->plane.state);
+ to_rcar_plane_state(plane->plane.state);
struct rcar_du_group *rgrp = plane->group;
u32 colorkey;
u32 pnmr;
@@ -172,7 +172,7 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
unsigned int index)
{
struct rcar_du_plane_state *state =
- to_rcar_du_plane_state(plane->plane.state);
+ to_rcar_plane_state(plane->plane.state);
struct rcar_du_group *rgrp = plane->group;
u32 ddcr2 = PnDDCR2_CODE;
u32 ddcr4;
@@ -222,7 +222,7 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
void rcar_du_plane_setup(struct rcar_du_plane *plane)
{
struct rcar_du_plane_state *state =
- to_rcar_du_plane_state(plane->plane.state);
+ to_rcar_plane_state(plane->plane.state);
__rcar_du_plane_setup(plane, state->hwindex);
if (state->format->planes == 2)
@@ -234,7 +234,7 @@ void rcar_du_plane_setup(struct rcar_du_plane *plane)
static int rcar_du_plane_atomic_check(struct drm_plane *plane,
struct drm_plane_state *state)
{
- struct rcar_du_plane_state *rstate = to_rcar_du_plane_state(state);
+ struct rcar_du_plane_state *rstate = to_rcar_plane_state(state);
struct rcar_du_plane *rplane = to_rcar_plane(plane);
struct rcar_du_device *rcdu = rplane->group->dev;
@@ -302,7 +302,7 @@ rcar_du_plane_atomic_duplicate_state(struct drm_plane *plane)
struct rcar_du_plane_state *state;
struct rcar_du_plane_state *copy;
- state = to_rcar_du_plane_state(plane->state);
+ state = to_rcar_plane_state(plane->state);
copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
if (copy == NULL)
return NULL;
@@ -319,7 +319,7 @@ static void rcar_du_plane_atomic_destroy_state(struct drm_plane *plane,
if (state->fb)
drm_framebuffer_unreference(state->fb);
- kfree(to_rcar_du_plane_state(state));
+ kfree(to_rcar_plane_state(state));
}
static int rcar_du_plane_atomic_set_property(struct drm_plane *plane,
@@ -327,15 +327,14 @@ static int rcar_du_plane_atomic_set_property(struct drm_plane *plane,
struct drm_property *property,
uint64_t val)
{
- struct rcar_du_plane_state *rstate = to_rcar_du_plane_state(state);
- struct rcar_du_plane *rplane = to_rcar_plane(plane);
- struct rcar_du_group *rgrp = rplane->group;
+ struct rcar_du_plane_state *rstate = to_rcar_plane_state(state);
+ struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev;
- if (property == rgrp->planes.alpha)
+ if (property == rcdu->props.alpha)
rstate->alpha = val;
- else if (property == rgrp->planes.colorkey)
+ else if (property == rcdu->props.colorkey)
rstate->colorkey = val;
- else if (property == rgrp->planes.zpos)
+ else if (property == rcdu->props.zpos)
rstate->zpos = val;
else
return -EINVAL;
@@ -349,14 +348,13 @@ static int rcar_du_plane_atomic_get_property(struct drm_plane *plane,
{
const struct rcar_du_plane_state *rstate =
container_of(state, const struct rcar_du_plane_state, state);
- struct rcar_du_plane *rplane = to_rcar_plane(plane);
- struct rcar_du_group *rgrp = rplane->group;
+ struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev;
- if (property == rgrp->planes.alpha)
+ if (property == rcdu->props.alpha)
*val = rstate->alpha;
- else if (property == rgrp->planes.colorkey)
+ else if (property == rcdu->props.colorkey)
*val = rstate->colorkey;
- else if (property == rgrp->planes.zpos)
+ else if (property == rcdu->props.zpos)
*val = rstate->zpos;
else
return -EINVAL;
@@ -391,47 +389,24 @@ static const uint32_t formats[] = {
int rcar_du_planes_init(struct rcar_du_group *rgrp)
{
- struct rcar_du_planes *planes = &rgrp->planes;
struct rcar_du_device *rcdu = rgrp->dev;
unsigned int num_planes;
- unsigned int num_crtcs;
unsigned int crtcs;
unsigned int i;
int ret;
- planes->alpha =
- drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255);
- if (planes->alpha == NULL)
- return -ENOMEM;
-
- /* The color key is expressed as an RGB888 triplet stored in a 32-bit
- * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0)
- * or enable source color keying (1).
- */
- planes->colorkey =
- drm_property_create_range(rcdu->ddev, 0, "colorkey",
- 0, 0x01ffffff);
- if (planes->colorkey == NULL)
- return -ENOMEM;
-
- planes->zpos =
- drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7);
- if (planes->zpos == NULL)
- return -ENOMEM;
-
- /* Create one primary plane per in this group CRTC and seven overlay
+ /* Create one primary plane per CRTC in this group and seven overlay
* planes.
*/
- num_crtcs = min(rcdu->num_crtcs - 2 * rgrp->index, 2U);
- num_planes = num_crtcs + 7;
+ num_planes = rgrp->num_crtcs + 7;
crtcs = ((1 << rcdu->num_crtcs) - 1) & (3 << (2 * rgrp->index));
for (i = 0; i < num_planes; ++i) {
- enum drm_plane_type type = i < num_crtcs
+ enum drm_plane_type type = i < rgrp->num_crtcs
? DRM_PLANE_TYPE_PRIMARY
: DRM_PLANE_TYPE_OVERLAY;
- struct rcar_du_plane *plane = &planes->planes[i];
+ struct rcar_du_plane *plane = &rgrp->planes[i];
plane->group = rgrp;
@@ -448,12 +423,12 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp)
continue;
drm_object_attach_property(&plane->plane.base,
- planes->alpha, 255);
+ rcdu->props.alpha, 255);
drm_object_attach_property(&plane->plane.base,
- planes->colorkey,
+ rcdu->props.colorkey,
RCAR_DU_COLORKEY_NONE);
drm_object_attach_property(&plane->plane.base,
- planes->zpos, 1);
+ rcdu->props.zpos, 1);
}
return 0;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
index abff0ebeb195..9732bff1911b 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
@@ -38,19 +38,20 @@ static inline struct rcar_du_plane *to_rcar_plane(struct drm_plane *plane)
return container_of(plane, struct rcar_du_plane, plane);
}
-struct rcar_du_planes {
- struct rcar_du_plane planes[RCAR_DU_NUM_KMS_PLANES];
-
- struct drm_property *alpha;
- struct drm_property *colorkey;
- struct drm_property *zpos;
-};
-
+/**
+ * struct rcar_du_plane_state - Driver-specific plane state
+ * @state: base DRM plane state
+ * @format: information about the pixel format used by the plane
+ * @hwindex: 0-based hardware plane index, -1 means unused
+ * @alpha: value of the plane alpha property
+ * @colorkey: value of the plane colorkey property
+ * @zpos: value of the plane zpos property
+ */
struct rcar_du_plane_state {
struct drm_plane_state state;
const struct rcar_du_format_info *format;
- int hwindex; /* 0-based, -1 means unused */
+ int hwindex;
unsigned int alpha;
unsigned int colorkey;
@@ -58,7 +59,7 @@ struct rcar_du_plane_state {
};
static inline struct rcar_du_plane_state *
-to_rcar_du_plane_state(struct drm_plane_state *state)
+to_rcar_plane_state(struct drm_plane_state *state)
{
return container_of(state, struct rcar_du_plane_state, state);
}
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index 01e1d27eb078..3077f1554099 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -342,9 +342,12 @@ static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool)
d_page->vaddr = dma_alloc_coherent(pool->dev, pool->size,
&d_page->dma,
pool->gfp_flags);
- if (d_page->vaddr)
- d_page->p = virt_to_page(d_page->vaddr);
- else {
+ if (d_page->vaddr) {
+ if (is_vmalloc_addr(d_page->vaddr))
+ d_page->p = vmalloc_to_page(d_page->vaddr);
+ else
+ d_page->p = virt_to_page(d_page->vaddr);
+ } else {
kfree(d_page);
d_page = NULL;
}
diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
index bd0d644d2a03..17c445612e01 100644
--- a/include/drm/i915_pciids.h
+++ b/include/drm/i915_pciids.h
@@ -286,11 +286,9 @@
INTEL_SKL_GT2_IDS(info), \
INTEL_SKL_GT3_IDS(info)
-
#define INTEL_BXT_IDS(info) \
INTEL_VGA_DEVICE(0x0A84, info), \
- INTEL_VGA_DEVICE(0x0A85, info), \
- INTEL_VGA_DEVICE(0x0A86, info), \
- INTEL_VGA_DEVICE(0x0A87, info)
+ INTEL_VGA_DEVICE(0x1A84, info), \
+ INTEL_VGA_DEVICE(0x5A84, info)
#endif /* _I915_PCIIDS_H */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4851d660243c..6e1a2ed116cb 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -171,8 +171,12 @@ typedef struct _drm_i915_sarea {
#define I915_BOX_TEXTURE_LOAD 0x8
#define I915_BOX_LOST_CONTEXT 0x10
-/* I915 specific ioctls
- * The device specific ioctl range is 0x40 to 0x79.
+/*
+ * i915 specific ioctls.
+ *
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
*/
#define DRM_I915_INIT 0x00
#define DRM_I915_FLUSH 0x01
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 871e73f99a4d..573cb86a3d6e 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -1038,6 +1038,7 @@ struct drm_radeon_cs {
#define RADEON_INFO_CURRENT_GPU_SCLK 0x22
#define RADEON_INFO_CURRENT_GPU_MCLK 0x23
#define RADEON_INFO_READ_REG 0x24
+#define RADEON_INFO_GPU_RESET_COUNTER 0x25
struct drm_radeon_info {
uint32_t request;
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 4ca35a8f9891..d6833426fdef 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -128,6 +128,32 @@ struct kfd_ioctl_get_process_apertures_args {
uint32_t pad;
};
+#define MAX_ALLOWED_NUM_POINTS 100
+#define MAX_ALLOWED_AW_BUFF_SIZE 4096
+#define MAX_ALLOWED_WAC_BUFF_SIZE 128
+
+struct kfd_ioctl_dbg_register_args {
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_unregister_args {
+ uint32_t gpu_id; /* to KFD */
+ uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_address_watch_args {
+ uint64_t content_ptr; /* a pointer to the actual content */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
+};
+
+struct kfd_ioctl_dbg_wave_control_args {
+ uint64_t content_ptr; /* a pointer to the actual content */
+ uint32_t gpu_id; /* to KFD */
+ uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */
+};
+
/* Matching HSA_EVENTTYPE */
#define KFD_IOC_EVENT_SIGNAL 0
#define KFD_IOC_EVENT_NODECHANGE 1
@@ -198,7 +224,8 @@ struct kfd_event_data {
};
struct kfd_ioctl_wait_events_args {
- uint64_t events_ptr; /* to KFD */
+ uint64_t events_ptr; /* pointed to struct
+ kfd_event_data array, to KFD */
uint32_t num_events; /* to KFD */
uint32_t wait_for_all; /* to KFD */
uint32_t timeout; /* to KFD */
@@ -247,7 +274,19 @@ struct kfd_ioctl_wait_events_args {
#define AMDKFD_IOC_WAIT_EVENTS \
AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
+#define AMDKFD_IOC_DBG_REGISTER \
+ AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
+
+#define AMDKFD_IOC_DBG_UNREGISTER \
+ AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
+
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH \
+ AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
+
+#define AMDKFD_IOC_DBG_WAVE_CONTROL \
+ AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
+
#define AMDKFD_COMMAND_START 0x01
-#define AMDKFD_COMMAND_END 0x0D
+#define AMDKFD_COMMAND_END 0x11
#endif