diff options
96 files changed, 5697 insertions, 1653 deletions
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index 5d9d851d8623..109fde8b4a28 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -4165,6 +4165,12 @@ int num_ioctls;</synopsis> </tgroup> </table> </sect2> + + <sect2> + <title>CSR firmware support for DMC</title> +!Pdrivers/gpu/drm/i915/intel_csr.c csr support for dmc +!Idrivers/gpu/drm/i915/intel_csr.c + </sect2> </sect1> <sect1> @@ -4216,7 +4222,6 @@ int num_ioctls;</synopsis> !Idrivers/gpu/drm/i915/i915_gem_shrinker.c </sect2> </sect1> - <sect1> <title> Tracing </title> <para> diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 652d25478fd5..28551153ec6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -12,6 +12,7 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_kernel_queue_vi.o kfd_packet_manager.o \ kfd_process_queue_manager.o kfd_device_queue_manager.o \ kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ - kfd_interrupt.o kfd_events.o cik_event_interrupt.o + kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ + kfd_dbgdev.o kfd_dbgmgr.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b2c6109bd7af..96c904b3acb7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -35,6 +35,7 @@ #include <asm/processor.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" +#include "kfd_dbgmgr.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -432,6 +433,301 @@ out: return err; } +static int kfd_ioctl_dbg_register(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_register_args *args = data; + struct kfd_dev *dev; + struct kfd_dbgmgr *dbgmgr_ptr; + struct kfd_process_device *pdd; + bool create_ok; + long status = 0; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); + return -EINVAL; + } + + mutex_lock(kfd_get_dbgmgr_mutex()); + mutex_lock(&p->mutex); + + /* + * make sure that we have pdd, if this the first queue created for + * this process + */ + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + mutex_unlock(&p->mutex); + mutex_unlock(kfd_get_dbgmgr_mutex()); + return PTR_ERR(pdd); + } + + if (dev->dbgmgr == NULL) { + /* In case of a legal call, we have no dbgmgr yet */ + create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); + if (create_ok) { + status = kfd_dbgmgr_register(dbgmgr_ptr, p); + if (status != 0) + kfd_dbgmgr_destroy(dbgmgr_ptr); + else + dev->dbgmgr = dbgmgr_ptr; + } + } else { + pr_debug("debugger already registered\n"); + status = -EINVAL; + } + + mutex_unlock(&p->mutex); + mutex_unlock(kfd_get_dbgmgr_mutex()); + + return status; +} + +static int kfd_ioctl_dbg_unrgesiter(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_unregister_args *args = data; + struct kfd_dev *dev; + long status; + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_unrgesiter not supported on CZ\n"); + return -EINVAL; + } + + mutex_lock(kfd_get_dbgmgr_mutex()); + + status = kfd_dbgmgr_unregister(dev->dbgmgr, p); + if (status == 0) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + return status; +} + +/* + * Parse and generate variable size data structure for address watch. + * Total size of the buffer and # watch points is limited in order + * to prevent kernel abuse. (no bearing to the much smaller HW limitation + * which is enforced by dbgdev module) + * please also note that the watch address itself are not "copied from user", + * since it be set into the HW in user mode values. + * + */ +static int kfd_ioctl_dbg_address_watch(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_address_watch_args *args = data; + struct kfd_dev *dev; + struct dbg_address_watch_info aw_info; + unsigned char *args_buff; + long status; + void __user *cmd_from_user; + uint64_t watch_mask_value = 0; + unsigned int args_idx = 0; + + memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); + return -EINVAL; + } + + cmd_from_user = (void __user *) args->content_ptr; + + /* Validate arguments */ + + if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || + (args->buf_size_in_bytes <= sizeof(*args)) || + (cmd_from_user == NULL)) + return -EINVAL; + + /* this is the actual buffer to work with */ + + args_buff = kmalloc(args->buf_size_in_bytes - + sizeof(*args), GFP_KERNEL); + if (args_buff == NULL) + return -ENOMEM; + + status = copy_from_user(args_buff, cmd_from_user, + args->buf_size_in_bytes - sizeof(*args)); + + if (status != 0) { + pr_debug("Failed to copy address watch user data\n"); + kfree(args_buff); + return -EINVAL; + } + + aw_info.process = p; + + aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(aw_info.num_watch_points); + + aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; + args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; + + /* + * set watch address base pointer to point on the array base + * within args_buff + */ + aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; + + /* skip over the addresses buffer */ + args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; + + if (args_idx >= args->buf_size_in_bytes) { + kfree(args_buff); + return -EINVAL; + } + + watch_mask_value = (uint64_t) args_buff[args_idx]; + + if (watch_mask_value > 0) { + /* + * There is an array of masks. + * set watch mask base pointer to point on the array base + * within args_buff + */ + aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; + + /* skip over the masks buffer */ + args_idx += sizeof(aw_info.watch_mask) * + aw_info.num_watch_points; + } else { + /* just the NULL mask, set to NULL and skip over it */ + aw_info.watch_mask = NULL; + args_idx += sizeof(aw_info.watch_mask); + } + + if (args_idx > args->buf_size_in_bytes) { + kfree(args_buff); + return -EINVAL; + } + + /* Currently HSA Event is not supported for DBG */ + aw_info.watch_event = NULL; + + mutex_lock(kfd_get_dbgmgr_mutex()); + + status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + kfree(args_buff); + + return status; +} + +/* Parse and generate fixed size data structure for wave control */ +static int kfd_ioctl_dbg_wave_control(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_wave_control_args *args = data; + struct kfd_dev *dev; + struct dbg_wave_control_info wac_info; + unsigned char *args_buff; + uint32_t computed_buff_size; + long status; + void __user *cmd_from_user; + unsigned int args_idx = 0; + + memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); + + /* we use compact form, independent of the packing attribute value */ + computed_buff_size = sizeof(*args) + + sizeof(wac_info.mode) + + sizeof(wac_info.operand) + + sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + + sizeof(wac_info.dbgWave_msg.MemoryVA) + + sizeof(wac_info.trapId); + + dev = kfd_device_by_id(args->gpu_id); + if (dev == NULL) + return -EINVAL; + + if (dev->device_info->asic_family == CHIP_CARRIZO) { + pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); + return -EINVAL; + } + + /* input size must match the computed "compact" size */ + if (args->buf_size_in_bytes != computed_buff_size) { + pr_debug("size mismatch, computed : actual %u : %u\n", + args->buf_size_in_bytes, computed_buff_size); + return -EINVAL; + } + + cmd_from_user = (void __user *) args->content_ptr; + + if (cmd_from_user == NULL) + return -EINVAL; + + /* this is the actual buffer to work with */ + + args_buff = kmalloc(args->buf_size_in_bytes - sizeof(*args), + GFP_KERNEL); + + if (args_buff == NULL) + return -ENOMEM; + + /* Now copy the entire buffer from user */ + status = copy_from_user(args_buff, cmd_from_user, + args->buf_size_in_bytes - sizeof(*args)); + if (status != 0) { + pr_debug("Failed to copy wave control user data\n"); + kfree(args_buff); + return -EINVAL; + } + + /* move ptr to the start of the "pay-load" area */ + wac_info.process = p; + + wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.operand); + + wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.mode); + + wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); + args_idx += sizeof(wac_info.trapId); + + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = + *((uint32_t *)(&args_buff[args_idx])); + wac_info.dbgWave_msg.MemoryVA = NULL; + + mutex_lock(kfd_get_dbgmgr_mutex()); + + pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", + wac_info.process, wac_info.operand, + wac_info.mode, wac_info.trapId, + wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + + status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); + + pr_debug("Returned status of dbg manager is %ld\n", status); + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + kfree(args_buff); + + return status; +} + static int kfd_ioctl_get_clock_counters(struct file *filep, struct kfd_process *p, void *data) { @@ -612,6 +908,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, kfd_ioctl_wait_events, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, + kfd_ioctl_dbg_register, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, + kfd_ioctl_dbg_unrgesiter, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, + kfd_ioctl_dbg_address_watch, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, + kfd_ioctl_dbg_wave_control, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c new file mode 100644 index 000000000000..96153f28d73f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -0,0 +1,886 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/device.h> + +#include "kfd_pm4_headers.h" +#include "kfd_pm4_headers_diq.h" +#include "kfd_kernel_queue.h" +#include "kfd_priv.h" +#include "kfd_pm4_opcodes.h" +#include "cik_regs.h" +#include "kfd_dbgmgr.h" +#include "kfd_dbgdev.h" +#include "kfd_device_queue_manager.h" +#include "../../radeon/cik_reg.h" + +static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) +{ + BUG_ON(!dev || !dev->kfd2kgd); + + dev->kfd2kgd->address_watch_disable(dev->kgd); +} + +static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, + unsigned int pasid, uint64_t vmid0_address, + uint32_t *packet_buff, size_t size_in_bytes) +{ + struct pm4__release_mem *rm_packet; + struct pm4__indirect_buffer_pasid *ib_packet; + struct kfd_mem_obj *mem_obj; + size_t pq_packets_size_in_bytes; + union ULARGE_INTEGER *largep; + union ULARGE_INTEGER addr; + struct kernel_queue *kq; + uint64_t *rm_state; + unsigned int *ib_packet_buff; + int status; + + BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes); + + kq = dbgdev->kq; + + pq_packets_size_in_bytes = sizeof(struct pm4__release_mem) + + sizeof(struct pm4__indirect_buffer_pasid); + + /* + * We acquire a buffer from DIQ + * The receive packet buff will be sitting on the Indirect Buffer + * and in the PQ we put the IB packet + sync packet(s). + */ + status = kq->ops.acquire_packet_buffer(kq, + pq_packets_size_in_bytes / sizeof(uint32_t), + &ib_packet_buff); + if (status != 0) { + pr_err("amdkfd: acquire_packet_buffer failed\n"); + return status; + } + + memset(ib_packet_buff, 0, pq_packets_size_in_bytes); + + ib_packet = (struct pm4__indirect_buffer_pasid *) (ib_packet_buff); + + ib_packet->header.count = 3; + ib_packet->header.opcode = IT_INDIRECT_BUFFER_PASID; + ib_packet->header.type = PM4_TYPE_3; + + largep = (union ULARGE_INTEGER *) &vmid0_address; + + ib_packet->bitfields2.ib_base_lo = largep->u.low_part >> 2; + ib_packet->bitfields3.ib_base_hi = largep->u.high_part; + + ib_packet->control = (1 << 23) | (1 << 31) | + ((size_in_bytes / sizeof(uint32_t)) & 0xfffff); + + ib_packet->bitfields5.pasid = pasid; + + /* + * for now we use release mem for GPU-CPU synchronization + * Consider WaitRegMem + WriteData as a better alternative + * we get a GART allocations ( gpu/cpu mapping), + * for the sync variable, and wait until: + * (a) Sync with HW + * (b) Sync var is written by CP to mem. + */ + rm_packet = (struct pm4__release_mem *) (ib_packet_buff + + (sizeof(struct pm4__indirect_buffer_pasid) / + sizeof(unsigned int))); + + status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t), + &mem_obj); + + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); + kq->ops.rollback_packet(kq); + return status; + } + + rm_state = (uint64_t *) mem_obj->cpu_ptr; + + *rm_state = QUEUESTATE__ACTIVE_COMPLETION_PENDING; + + rm_packet->header.opcode = IT_RELEASE_MEM; + rm_packet->header.type = PM4_TYPE_3; + rm_packet->header.count = sizeof(struct pm4__release_mem) / + sizeof(unsigned int) - 2; + + rm_packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + rm_packet->bitfields2.event_index = + event_index___release_mem__end_of_pipe; + + rm_packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + rm_packet->bitfields2.atc = 0; + rm_packet->bitfields2.tc_wb_action_ena = 1; + + addr.quad_part = mem_obj->gpu_addr; + + rm_packet->bitfields4.address_lo_32b = addr.u.low_part >> 2; + rm_packet->address_hi = addr.u.high_part; + + rm_packet->bitfields3.data_sel = + data_sel___release_mem__send_64_bit_data; + + rm_packet->bitfields3.int_sel = + int_sel___release_mem__send_data_after_write_confirm; + + rm_packet->bitfields3.dst_sel = + dst_sel___release_mem__memory_controller; + + rm_packet->data_lo = QUEUESTATE__ACTIVE; + + kq->ops.submit_packet(kq); + + /* Wait till CP writes sync code: */ + status = amdkfd_fence_wait_timeout( + (unsigned int *) rm_state, + QUEUESTATE__ACTIVE, 1500); + + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + + return status; +} + +static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev) +{ + BUG_ON(!dbgdev); + + /* + * no action is needed in this case, + * just make sure diq will not be used + */ + + dbgdev->kq = NULL; + + return 0; +} + +static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) +{ + struct queue_properties properties; + unsigned int qid; + struct kernel_queue *kq = NULL; + int status; + + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev); + + status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, + &properties, 0, KFD_QUEUE_TYPE_DIQ, + &qid); + + if (status) { + pr_err("amdkfd: Failed to create DIQ\n"); + return status; + } + + pr_debug("DIQ Created with queue id: %d\n", qid); + + kq = pqm_get_kernel_queue(dbgdev->pqm, qid); + + if (kq == NULL) { + pr_err("amdkfd: Error getting DIQ\n"); + pqm_destroy_queue(dbgdev->pqm, qid); + return -EFAULT; + } + + dbgdev->kq = kq; + + return status; +} + +static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev) +{ + BUG_ON(!dbgdev || !dbgdev->dev); + + /* disable watch address */ + dbgdev_address_watch_disable_nodiq(dbgdev->dev); + return 0; +} + +static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev) +{ + /* todo - disable address watch */ + int status; + + BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq); + + status = pqm_destroy_queue(dbgdev->pqm, + dbgdev->kq->queue->properties.queue_id); + dbgdev->kq = NULL; + + return status; +} + +static void dbgdev_address_watch_set_registers( + const struct dbg_address_watch_info *adw_info, + union TCP_WATCH_ADDR_H_BITS *addrHi, + union TCP_WATCH_ADDR_L_BITS *addrLo, + union TCP_WATCH_CNTL_BITS *cntl, + unsigned int index, unsigned int vmid) +{ + union ULARGE_INTEGER addr; + + BUG_ON(!adw_info || !addrHi || !addrLo || !cntl); + + addr.quad_part = 0; + addrHi->u32All = 0; + addrLo->u32All = 0; + cntl->u32All = 0; + + if (adw_info->watch_mask != NULL) + cntl->bitfields.mask = + (uint32_t) (adw_info->watch_mask[index] & + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK); + else + cntl->bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; + + addr.quad_part = (unsigned long long) adw_info->watch_address[index]; + + addrHi->bitfields.addr = addr.u.high_part & + ADDRESS_WATCH_REG_ADDHIGH_MASK; + addrLo->bitfields.addr = + (addr.u.low_part >> ADDRESS_WATCH_REG_ADDLOW_SHIFT); + + cntl->bitfields.mode = adw_info->watch_mode[index]; + cntl->bitfields.vmid = (uint32_t) vmid; + /* for now assume it is an ATC address */ + cntl->u32All |= ADDRESS_WATCH_REG_CNTL_ATC_BIT; + + pr_debug("\t\t%20s %08x\n", "set reg mask :", cntl->bitfields.mask); + pr_debug("\t\t%20s %08x\n", "set reg add high :", + addrHi->bitfields.addr); + pr_debug("\t\t%20s %08x\n", "set reg add low :", + addrLo->bitfields.addr); +} + +static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info) +{ + union TCP_WATCH_ADDR_H_BITS addrHi; + union TCP_WATCH_ADDR_L_BITS addrLo; + union TCP_WATCH_CNTL_BITS cntl; + struct kfd_process_device *pdd; + unsigned int i; + + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); + + /* taking the vmid for that process on the safe way using pdd */ + pdd = kfd_get_process_device_data(dbgdev->dev, + adw_info->process); + if (!pdd) { + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + return -EFAULT; + } + + addrHi.u32All = 0; + addrLo.u32All = 0; + cntl.u32All = 0; + + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0)) { + pr_err("amdkfd: num_watch_points is invalid\n"); + return -EINVAL; + } + + if ((adw_info->watch_mode == NULL) || + (adw_info->watch_address == NULL)) { + pr_err("amdkfd: adw_info fields are not valid\n"); + return -EINVAL; + } + + for (i = 0 ; i < adw_info->num_watch_points ; i++) { + dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo, + &cntl, i, pdd->qpd.vmid); + + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + pr_debug("\t\t%20s %08x\n", "register index :", i); + pr_debug("\t\t%20s %08x\n", "vmid is :", pdd->qpd.vmid); + pr_debug("\t\t%20s %08x\n", "Address Low is :", + addrLo.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Control Mask is :", + cntl.bitfields.mask); + pr_debug("\t\t%20s %08x\n", "Control Mode is :", + cntl.bitfields.mode); + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", + cntl.bitfields.vmid); + pr_debug("\t\t%20s %08x\n", "Control atc is :", + cntl.bitfields.atc); + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + + pdd->dev->kfd2kgd->address_watch_execute( + dbgdev->dev->kgd, + i, + cntl.u32All, + addrHi.u32All, + addrLo.u32All); + } + + return 0; +} + +static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info) +{ + struct pm4__set_config_reg *packets_vec; + union TCP_WATCH_ADDR_H_BITS addrHi; + union TCP_WATCH_ADDR_L_BITS addrLo; + union TCP_WATCH_CNTL_BITS cntl; + struct kfd_mem_obj *mem_obj; + unsigned int aw_reg_add_dword; + uint32_t *packet_buff_uint; + unsigned int i; + int status; + size_t ib_size = sizeof(struct pm4__set_config_reg) * 4; + /* we do not control the vmid in DIQ mode, just a place holder */ + unsigned int vmid = 0; + + BUG_ON(!dbgdev || !dbgdev->dev || !adw_info); + + addrHi.u32All = 0; + addrLo.u32All = 0; + cntl.u32All = 0; + + if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) || + (adw_info->num_watch_points == 0)) { + pr_err("amdkfd: num_watch_points is invalid\n"); + return -EINVAL; + } + + if ((NULL == adw_info->watch_mode) || + (NULL == adw_info->watch_address)) { + pr_err("amdkfd: adw_info fields are not valid\n"); + return -EINVAL; + } + + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); + + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); + return status; + } + + packet_buff_uint = mem_obj->cpu_ptr; + + memset(packet_buff_uint, 0, ib_size); + + packets_vec = (struct pm4__set_config_reg *) (packet_buff_uint); + + packets_vec[0].header.count = 1; + packets_vec[0].header.opcode = IT_SET_CONFIG_REG; + packets_vec[0].header.type = PM4_TYPE_3; + packets_vec[0].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[0].bitfields2.insert_vmid = 1; + packets_vec[1].ordinal1 = packets_vec[0].ordinal1; + packets_vec[1].bitfields2.insert_vmid = 0; + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; + packets_vec[2].bitfields2.insert_vmid = 0; + packets_vec[3].ordinal1 = packets_vec[0].ordinal1; + packets_vec[3].bitfields2.vmid_shift = ADDRESS_WATCH_CNTL_OFFSET; + packets_vec[3].bitfields2.insert_vmid = 1; + + for (i = 0; i < adw_info->num_watch_points; i++) { + dbgdev_address_watch_set_registers(adw_info, + &addrHi, + &addrLo, + &cntl, + i, + vmid); + + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + pr_debug("\t\t%20s %08x\n", "register index :", i); + pr_debug("\t\t%20s %08x\n", "vmid is :", vmid); + pr_debug("\t\t%20s %p\n", "Add ptr is :", + adw_info->watch_address); + pr_debug("\t\t%20s %08llx\n", "Add is :", + adw_info->watch_address[i]); + pr_debug("\t\t%20s %08x\n", "Address Low is :", + addrLo.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Address high is :", + addrHi.bitfields.addr); + pr_debug("\t\t%20s %08x\n", "Control Mask is :", + cntl.bitfields.mask); + pr_debug("\t\t%20s %08x\n", "Control Mode is :", + cntl.bitfields.mode); + pr_debug("\t\t%20s %08x\n", "Control Vmid is :", + cntl.bitfields.vmid); + pr_debug("\t\t%20s %08x\n", "Control atc is :", + cntl.bitfields.atc); + pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *"); + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_CNTL); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[0].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + + packets_vec[0].reg_data[0] = cntl.u32All; + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_ADDR_HI); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[1].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[1].reg_data[0] = addrHi.u32All; + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_ADDR_LO); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[2].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[2].reg_data[0] = addrLo.u32All; + + /* enable watch flag if address is not zero*/ + if (adw_info->watch_address[i] > 0) + cntl.bitfields.valid = 1; + else + cntl.bitfields.valid = 0; + + aw_reg_add_dword = + dbgdev->dev->kfd2kgd->address_watch_get_offset( + dbgdev->dev->kgd, + i, + ADDRESS_WATCH_REG_CNTL); + + aw_reg_add_dword /= sizeof(uint32_t); + + packets_vec[3].bitfields2.reg_offset = + aw_reg_add_dword - CONFIG_REG_BASE; + packets_vec[3].reg_data[0] = cntl.u32All; + + status = dbgdev_diq_submit_ib( + dbgdev, + adw_info->process->pasid, + mem_obj->gpu_addr, + packet_buff_uint, + ib_size); + + if (status != 0) { + pr_err("amdkfd: Failed to submit IB to DIQ\n"); + break; + } + } + + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + return status; +} + +static int dbgdev_wave_control_set_registers( + struct dbg_wave_control_info *wac_info, + union SQ_CMD_BITS *in_reg_sq_cmd, + union GRBM_GFX_INDEX_BITS *in_reg_gfx_index) +{ + int status; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct HsaDbgWaveMsgAMDGen2 *pMsg; + + BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index); + + reg_sq_cmd.u32All = 0; + reg_gfx_index.u32All = 0; + pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2; + + switch (wac_info->mode) { + /* Send command to single wave */ + case HSA_DBG_WAVEMODE_SINGLE: + /* + * Limit access to the process waves only, + * by setting vmid check + */ + reg_sq_cmd.bits.check_vmid = 1; + reg_sq_cmd.bits.simd_id = pMsg->ui32.SIMD; + reg_sq_cmd.bits.wave_id = pMsg->ui32.WaveId; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_SINGLE; + + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; + + break; + + /* Send command to all waves with matching VMID */ + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS: + + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + + break; + + /* Send command to all CU waves with matching VMID */ + case HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU: + + reg_sq_cmd.bits.check_vmid = 1; + reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; + + reg_gfx_index.bits.sh_index = pMsg->ui32.ShaderArray; + reg_gfx_index.bits.se_index = pMsg->ui32.ShaderEngine; + reg_gfx_index.bits.instance_index = pMsg->ui32.HSACU; + + break; + + default: + return -EINVAL; + } + + switch (wac_info->operand) { + case HSA_DBG_WAVEOP_HALT: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_HALT; + break; + + case HSA_DBG_WAVEOP_RESUME: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_RESUME; + break; + + case HSA_DBG_WAVEOP_KILL: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; + break; + + case HSA_DBG_WAVEOP_DEBUG: + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_DEBUG; + break; + + case HSA_DBG_WAVEOP_TRAP: + if (wac_info->trapId < MAX_TRAPID) { + reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_TRAP; + reg_sq_cmd.bits.trap_id = wac_info->trapId; + } else { + status = -EINVAL; + } + break; + + default: + status = -EINVAL; + break; + } + + if (status == 0) { + *in_reg_sq_cmd = reg_sq_cmd; + *in_reg_gfx_index = reg_gfx_index; + } + + return status; +} + +static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev, + struct dbg_wave_control_info *wac_info) +{ + + int status; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_mem_obj *mem_obj; + uint32_t *packet_buff_uint; + struct pm4__set_config_reg *packets_vec; + size_t ib_size = sizeof(struct pm4__set_config_reg) * 3; + + BUG_ON(!dbgdev || !wac_info); + + reg_sq_cmd.u32All = 0; + + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index); + if (status) { + pr_err("amdkfd: Failed to set wave control registers\n"); + return status; + } + + /* we do not control the VMID in DIQ,so reset it to a known value */ + reg_sq_cmd.bits.vm_id = 0; + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + pr_debug("\t\t mode is: %u\n", wac_info->mode); + pr_debug("\t\t operand is: %u\n", wac_info->operand); + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); + pr_debug("\t\t msg value is: %u\n", + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t vmid is: N/A\n"); + + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); + + pr_debug("\t\t ibw is : %u\n", + reg_gfx_index.bitfields.instance_broadcast_writes); + pr_debug("\t\t ii is : %u\n", + reg_gfx_index.bitfields.instance_index); + pr_debug("\t\t sebw is : %u\n", + reg_gfx_index.bitfields.se_broadcast_writes); + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); + pr_debug("\t\t sbw is : %u\n", + reg_gfx_index.bitfields.sh_broadcast_writes); + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj); + + if (status != 0) { + pr_err("amdkfd: Failed to allocate GART memory\n"); + return status; + } + + packet_buff_uint = mem_obj->cpu_ptr; + + memset(packet_buff_uint, 0, ib_size); + + packets_vec = (struct pm4__set_config_reg *) packet_buff_uint; + packets_vec[0].header.count = 1; + packets_vec[0].header.opcode = IT_SET_UCONFIG_REG; + packets_vec[0].header.type = PM4_TYPE_3; + packets_vec[0].bitfields2.reg_offset = + GRBM_GFX_INDEX / (sizeof(uint32_t)) - + USERCONFIG_REG_BASE; + + packets_vec[0].bitfields2.insert_vmid = 0; + packets_vec[0].reg_data[0] = reg_gfx_index.u32All; + + packets_vec[1].header.count = 1; + packets_vec[1].header.opcode = IT_SET_CONFIG_REG; + packets_vec[1].header.type = PM4_TYPE_3; + packets_vec[1].bitfields2.reg_offset = SQ_CMD / (sizeof(uint32_t)) - + CONFIG_REG_BASE; + + packets_vec[1].bitfields2.vmid_shift = SQ_CMD_VMID_OFFSET; + packets_vec[1].bitfields2.insert_vmid = 1; + packets_vec[1].reg_data[0] = reg_sq_cmd.u32All; + + /* Restore the GRBM_GFX_INDEX register */ + + reg_gfx_index.u32All = 0; + reg_gfx_index.bits.sh_broadcast_writes = 1; + reg_gfx_index.bits.instance_broadcast_writes = 1; + reg_gfx_index.bits.se_broadcast_writes = 1; + + + packets_vec[2].ordinal1 = packets_vec[0].ordinal1; + packets_vec[2].bitfields2.reg_offset = + GRBM_GFX_INDEX / (sizeof(uint32_t)) - + USERCONFIG_REG_BASE; + + packets_vec[2].bitfields2.insert_vmid = 0; + packets_vec[2].reg_data[0] = reg_gfx_index.u32All; + + status = dbgdev_diq_submit_ib( + dbgdev, + wac_info->process->pasid, + mem_obj->gpu_addr, + packet_buff_uint, + ib_size); + + if (status != 0) + pr_err("amdkfd: Failed to submit IB to DIQ\n"); + + kfd_gtt_sa_free(dbgdev->dev, mem_obj); + + return status; +} + +static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev, + struct dbg_wave_control_info *wac_info) +{ + int status; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + + BUG_ON(!dbgdev || !dbgdev->dev || !wac_info); + + reg_sq_cmd.u32All = 0; + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process); + + if (!pdd) { + pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n"); + return -EFAULT; + } + status = dbgdev_wave_control_set_registers(wac_info, ®_sq_cmd, + ®_gfx_index); + if (status) { + pr_err("amdkfd: Failed to set wave control registers\n"); + return status; + } + + /* for non DIQ we need to patch the VMID: */ + + reg_sq_cmd.bits.vm_id = pdd->qpd.vmid; + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + pr_debug("\t\t mode is: %u\n", wac_info->mode); + pr_debug("\t\t operand is: %u\n", wac_info->operand); + pr_debug("\t\t trap id is: %u\n", wac_info->trapId); + pr_debug("\t\t msg value is: %u\n", + wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); + pr_debug("\t\t vmid is: %u\n", pdd->qpd.vmid); + + pr_debug("\t\t chk_vmid is : %u\n", reg_sq_cmd.bitfields.check_vmid); + pr_debug("\t\t command is : %u\n", reg_sq_cmd.bitfields.cmd); + pr_debug("\t\t queue id is : %u\n", reg_sq_cmd.bitfields.queue_id); + pr_debug("\t\t simd id is : %u\n", reg_sq_cmd.bitfields.simd_id); + pr_debug("\t\t mode is : %u\n", reg_sq_cmd.bitfields.mode); + pr_debug("\t\t vm_id is : %u\n", reg_sq_cmd.bitfields.vm_id); + pr_debug("\t\t wave_id is : %u\n", reg_sq_cmd.bitfields.wave_id); + + pr_debug("\t\t ibw is : %u\n", + reg_gfx_index.bitfields.instance_broadcast_writes); + pr_debug("\t\t ii is : %u\n", + reg_gfx_index.bitfields.instance_index); + pr_debug("\t\t sebw is : %u\n", + reg_gfx_index.bitfields.se_broadcast_writes); + pr_debug("\t\t se_ind is : %u\n", reg_gfx_index.bitfields.se_index); + pr_debug("\t\t sh_ind is : %u\n", reg_gfx_index.bitfields.sh_index); + pr_debug("\t\t sbw is : %u\n", + reg_gfx_index.bitfields.sh_broadcast_writes); + + pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); + + return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); +} + +int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p) +{ + int status = 0; + unsigned int vmid; + union SQ_CMD_BITS reg_sq_cmd; + union GRBM_GFX_INDEX_BITS reg_gfx_index; + struct kfd_process_device *pdd; + struct dbg_wave_control_info wac_info; + int temp; + int first_vmid_to_scan = 8; + int last_vmid_to_scan = 15; + + first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1; + temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan; + last_vmid_to_scan = first_vmid_to_scan + ffz(temp); + + reg_sq_cmd.u32All = 0; + status = 0; + + wac_info.mode = HSA_DBG_WAVEMODE_BROADCAST_PROCESS; + wac_info.operand = HSA_DBG_WAVEOP_KILL; + + pr_debug("Killing all process wavefronts\n"); + + /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. + * ATC_VMID15_PASID_MAPPING + * to check which VMID the current process is mapped to. */ + + for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + (dev->kgd, vmid)) { + if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid + (dev->kgd, vmid) == p->pasid) { + pr_debug("Killing wave fronts of vmid %d and pasid %d\n", + vmid, p->pasid); + break; + } + } + } + + if (vmid > last_vmid_to_scan) { + pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid); + return -EFAULT; + } + + /* taking the VMID for that process on the safe way using PDD */ + pdd = kfd_get_process_device_data(dev, p); + if (!pdd) + return -EFAULT; + + status = dbgdev_wave_control_set_registers(&wac_info, ®_sq_cmd, + ®_gfx_index); + if (status != 0) + return -EINVAL; + + /* for non DIQ we need to patch the VMID: */ + reg_sq_cmd.bits.vm_id = vmid; + + dev->kfd2kgd->wave_control_execute(dev->kgd, + reg_gfx_index.u32All, + reg_sq_cmd.u32All); + + return 0; +} + +void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, + enum DBGDEV_TYPE type) +{ + BUG_ON(!pdbgdev || !pdev); + + pdbgdev->dev = pdev; + pdbgdev->kq = NULL; + pdbgdev->type = type; + pdbgdev->pqm = NULL; + + switch (type) { + case DBGDEV_TYPE_NODIQ: + pdbgdev->dbgdev_register = dbgdev_register_nodiq; + pdbgdev->dbgdev_unregister = dbgdev_unregister_nodiq; + pdbgdev->dbgdev_wave_control = dbgdev_wave_control_nodiq; + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_nodiq; + break; + case DBGDEV_TYPE_DIQ: + default: + pdbgdev->dbgdev_register = dbgdev_register_diq; + pdbgdev->dbgdev_unregister = dbgdev_unregister_diq; + pdbgdev->dbgdev_wave_control = dbgdev_wave_control_diq; + pdbgdev->dbgdev_address_watch = dbgdev_address_watch_diq; + break; + } + +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h new file mode 100644 index 000000000000..4b0dd5aa5306 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.h @@ -0,0 +1,193 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_DBGDEV_H_ +#define KFD_DBGDEV_H_ + +enum { + SQ_CMD_VMID_OFFSET = 28, + ADDRESS_WATCH_CNTL_OFFSET = 24 +}; + +enum { + PRIV_QUEUE_SYNC_TIME_MS = 200 +}; + +/* CONTEXT reg space definition */ +enum { + CONTEXT_REG_BASE = 0xA000, + CONTEXT_REG_END = 0xA400, + CONTEXT_REG_SIZE = CONTEXT_REG_END - CONTEXT_REG_BASE +}; + +/* USER CONFIG reg space definition */ +enum { + USERCONFIG_REG_BASE = 0xC000, + USERCONFIG_REG_END = 0x10000, + USERCONFIG_REG_SIZE = USERCONFIG_REG_END - USERCONFIG_REG_BASE +}; + +/* CONFIG reg space definition */ +enum { + CONFIG_REG_BASE = 0x2000, /* in dwords */ + CONFIG_REG_END = 0x2B00, + CONFIG_REG_SIZE = CONFIG_REG_END - CONFIG_REG_BASE +}; + +/* SH reg space definition */ +enum { + SH_REG_BASE = 0x2C00, + SH_REG_END = 0x3000, + SH_REG_SIZE = SH_REG_END - SH_REG_BASE +}; + +enum SQ_IND_CMD_CMD { + SQ_IND_CMD_CMD_NULL = 0x00000000, + SQ_IND_CMD_CMD_HALT = 0x00000001, + SQ_IND_CMD_CMD_RESUME = 0x00000002, + SQ_IND_CMD_CMD_KILL = 0x00000003, + SQ_IND_CMD_CMD_DEBUG = 0x00000004, + SQ_IND_CMD_CMD_TRAP = 0x00000005, +}; + +enum SQ_IND_CMD_MODE { + SQ_IND_CMD_MODE_SINGLE = 0x00000000, + SQ_IND_CMD_MODE_BROADCAST = 0x00000001, + SQ_IND_CMD_MODE_BROADCAST_QUEUE = 0x00000002, + SQ_IND_CMD_MODE_BROADCAST_PIPE = 0x00000003, + SQ_IND_CMD_MODE_BROADCAST_ME = 0x00000004, +}; + +union SQ_IND_INDEX_BITS { + struct { + uint32_t wave_id:4; + uint32_t simd_id:2; + uint32_t thread_id:6; + uint32_t:1; + uint32_t force_read:1; + uint32_t read_timeout:1; + uint32_t unindexed:1; + uint32_t index:16; + + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union SQ_IND_CMD_BITS { + struct { + uint32_t data:32; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union SQ_CMD_BITS { + struct { + uint32_t cmd:3; + uint32_t:1; + uint32_t mode:3; + uint32_t check_vmid:1; + uint32_t trap_id:3; + uint32_t:5; + uint32_t wave_id:4; + uint32_t simd_id:2; + uint32_t:2; + uint32_t queue_id:3; + uint32_t:1; + uint32_t vm_id:4; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union SQ_IND_DATA_BITS { + struct { + uint32_t data:32; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union GRBM_GFX_INDEX_BITS { + struct { + uint32_t instance_index:8; + uint32_t sh_index:8; + uint32_t se_index:8; + uint32_t:5; + uint32_t sh_broadcast_writes:1; + uint32_t instance_broadcast_writes:1; + uint32_t se_broadcast_writes:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union TCP_WATCH_ADDR_H_BITS { + struct { + uint32_t addr:16; + uint32_t:16; + + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +union TCP_WATCH_ADDR_L_BITS { + struct { + uint32_t:6; + uint32_t addr:26; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + +enum { + QUEUESTATE__INVALID = 0, /* so by default we'll get invalid state */ + QUEUESTATE__ACTIVE_COMPLETION_PENDING, + QUEUESTATE__ACTIVE +}; + +union ULARGE_INTEGER { + struct { + uint32_t low_part; + uint32_t high_part; + } u; + unsigned long long quad_part; +}; + + +#define KFD_CIK_VMID_START_OFFSET (8) +#define KFD_CIK_VMID_END_OFFSET (KFD_CIK_VMID_START_OFFSET + (8)) + + +void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev, + enum DBGDEV_TYPE type); + +#endif /* KFD_DBGDEV_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c new file mode 100644 index 000000000000..56d676396342 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -0,0 +1,168 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/device.h> + +#include "kfd_priv.h" +#include "cik_regs.h" +#include "kfd_pm4_headers.h" +#include "kfd_pm4_headers_diq.h" +#include "kfd_dbgmgr.h" +#include "kfd_dbgdev.h" + +static DEFINE_MUTEX(kfd_dbgmgr_mutex); + +struct mutex *kfd_get_dbgmgr_mutex(void) +{ + return &kfd_dbgmgr_mutex; +} + + +static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr) +{ + BUG_ON(!pmgr); + + kfree(pmgr->dbgdev); + + pmgr->dbgdev = NULL; + pmgr->pasid = 0; + pmgr->dev = NULL; +} + +void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr) +{ + if (pmgr != NULL) { + kfd_dbgmgr_uninitialize(pmgr); + kfree(pmgr); + } +} + +bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) +{ + enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ; + struct kfd_dbgmgr *new_buff; + + BUG_ON(pdev == NULL); + BUG_ON(!pdev->init_complete); + + new_buff = kfd_alloc_struct(new_buff); + if (!new_buff) { + pr_err("amdkfd: Failed to allocate dbgmgr instance\n"); + return false; + } + + new_buff->pasid = 0; + new_buff->dev = pdev; + new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev); + if (!new_buff->dbgdev) { + pr_err("amdkfd: Failed to allocate dbgdev instance\n"); + kfree(new_buff); + return false; + } + + /* get actual type of DBGDevice cpsch or not */ + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) + type = DBGDEV_TYPE_NODIQ; + + kfd_dbgdev_init(new_buff->dbgdev, pdev, type); + *ppmgr = new_buff; + + return true; +} + +long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p) +{ + BUG_ON(!p || !pmgr || !pmgr->dbgdev); + + if (pmgr->pasid != 0) { + pr_debug("H/W debugger is already active using pasid %d\n", + pmgr->pasid); + return -EBUSY; + } + + /* remember pasid */ + pmgr->pasid = p->pasid; + + /* provide the pqm for diq generation */ + pmgr->dbgdev->pqm = &p->pqm; + + /* activate the actual registering */ + pmgr->dbgdev->dbgdev_register(pmgr->dbgdev); + + return 0; +} + +long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p) +{ + BUG_ON(!p || !pmgr || !pmgr->dbgdev); + + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != p->pasid) { + pr_debug("H/W debugger is not registered by calling pasid %d\n", + p->pasid); + return -EINVAL; + } + + pmgr->dbgdev->dbgdev_unregister(pmgr->dbgdev); + + pmgr->pasid = 0; + + return 0; +} + +long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + struct dbg_wave_control_info *wac_info) +{ + BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info); + + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != wac_info->process->pasid) { + pr_debug("H/W debugger support was not registered for requester pasid %d\n", + wac_info->process->pasid); + return -EINVAL; + } + + return (long) pmgr->dbgdev->dbgdev_wave_control(pmgr->dbgdev, wac_info); +} + +long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, + struct dbg_address_watch_info *adw_info) +{ + BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info); + + + /* Is the requests coming from the already registered process? */ + if (pmgr->pasid != adw_info->process->pasid) { + pr_debug("H/W debugger support was not registered for requester pasid %d\n", + adw_info->process->pasid); + return -EINVAL; + } + + return (long) pmgr->dbgdev->dbgdev_address_watch(pmgr->dbgdev, + adw_info); +} + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h new file mode 100644 index 000000000000..257a745ad0b5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h @@ -0,0 +1,294 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_DBGMGR_H_ +#define KFD_DBGMGR_H_ + +#include "kfd_priv.h" + +/* must align with hsakmttypes definition */ +#pragma pack(push, 4) + +enum HSA_DBG_WAVEOP { + HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */ + HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */ + HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */ + HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter + debug mode */ + HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take + a trap */ + HSA_DBG_NUM_WAVEOP = 5, + HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF +}; + +enum HSA_DBG_WAVEMODE { + /* send command to a single wave */ + HSA_DBG_WAVEMODE_SINGLE = 0, + /* + * Broadcast to all wavefronts of all processes is not + * supported for HSA user mode + */ + + /* send to waves within current process */ + HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, + /* send to waves within current process on CU */ + HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, + HSA_DBG_NUM_WAVEMODE = 3, + HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF +}; + +enum HSA_DBG_WAVEMSG_TYPE { + HSA_DBG_WAVEMSG_AUTO = 0, + HSA_DBG_WAVEMSG_USER = 1, + HSA_DBG_WAVEMSG_ERROR = 2, + HSA_DBG_NUM_WAVEMSG, + HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF +}; + +enum HSA_DBG_WATCH_MODE { + HSA_DBG_WATCH_READ = 0, /* Read operations only */ + HSA_DBG_WATCH_NONREAD = 1, /* Write or Atomic operations only */ + HSA_DBG_WATCH_ATOMIC = 2, /* Atomic Operations only */ + HSA_DBG_WATCH_ALL = 3, /* Read, Write or Atomic operations */ + HSA_DBG_WATCH_NUM, + HSA_DBG_WATCH_SIZE = 0xFFFFFFFF +}; + +/* This structure is hardware specific and may change in the future */ +struct HsaDbgWaveMsgAMDGen2 { + union { + struct ui32 { + uint32_t UserData:8; /* user data */ + uint32_t ShaderArray:1; /* Shader array */ + uint32_t Priv:1; /* Privileged */ + uint32_t Reserved0:4; /* This field is reserved, + should be 0 */ + uint32_t WaveId:4; /* wave id */ + uint32_t SIMD:2; /* SIMD id */ + uint32_t HSACU:4; /* Compute unit */ + uint32_t ShaderEngine:2;/* Shader engine */ + uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */ + uint32_t Reserved1:4; /* This field is reserved, + should be 0 */ + } ui32; + uint32_t Value; + }; + uint32_t Reserved2; +}; + +union HsaDbgWaveMessageAMD { + struct HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; + /* for future HsaDbgWaveMsgAMDGen3; */ +}; + +struct HsaDbgWaveMessage { + void *MemoryVA; /* ptr to associated host-accessible data */ + union HsaDbgWaveMessageAMD DbgWaveMsg; +}; + +/* + * TODO: This definitions to be MOVED to kfd_event, once it is implemented. + * + * HSA sync primitive, Event and HW Exception notification API definitions. + * The API functions allow the runtime to define a so-called sync-primitive, + * a SW object combining a user-mode provided "syncvar" and a scheduler event + * that can be signaled through a defined GPU interrupt. A syncvar is + * a process virtual memory location of a certain size that can be accessed + * by CPU and GPU shader code within the process to set and query the content + * within that memory. The definition of the content is determined by the HSA + * runtime and potentially GPU shader code interfacing with the HSA runtime. + * The syncvar values may be commonly written through an PM4 WRITE_DATA packet + * in the user mode instruction stream. The OS scheduler event is typically + * associated and signaled by an interrupt issued by the GPU, but other HSA + * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced + * by the KFD by this mechanism, too. */ + +/* these are the new definitions for events */ +enum HSA_EVENTTYPE { + HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */ + HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */ + HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change + (start/stop) */ + HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */ + HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */ + HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */ + HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */ + HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state + (EOP pm4) */ + /* ... */ + HSA_EVENTTYPE_MAXID, + HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF +}; + +/* Sub-definitions for various event types: Syncvar */ +struct HsaSyncVar { + union SyncVar { + void *UserData; /* pointer to user mode data */ + uint64_t UserDataPtrValue; /* 64bit compatibility of value */ + } SyncVar; + uint64_t SyncVarSize; +}; + +/* Sub-definitions for various event types: NodeChange */ + +enum HSA_EVENTTYPE_NODECHANGE_FLAGS { + HSA_EVENTTYPE_NODECHANGE_ADD = 0, + HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, + HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF +}; + +struct HsaNodeChange { + /* HSA node added/removed on the platform */ + enum HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; +}; + +/* Sub-definitions for various event types: DeviceStateChange */ +enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS { + /* device started (and available) */ + HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, + /* device stopped (i.e. unavailable) */ + HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, + HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF +}; + +enum HSA_DEVICE { + HSA_DEVICE_CPU = 0, + HSA_DEVICE_GPU = 1, + MAX_HSA_DEVICE = 2 +}; + +struct HsaDeviceStateChange { + uint32_t NodeId; /* F-NUMA node that contains the device */ + enum HSA_DEVICE Device; /* device type: GPU or CPU */ + enum HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; /* event flags */ +}; + +struct HsaEventData { + enum HSA_EVENTTYPE EventType; /* event type */ + union EventData { + /* + * return data associated with HSA_EVENTTYPE_SIGNAL + * and other events + */ + struct HsaSyncVar SyncVar; + + /* data associated with HSA_EVENTTYPE_NODE_CHANGE */ + struct HsaNodeChange NodeChangeState; + + /* data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE */ + struct HsaDeviceStateChange DeviceState; + } EventData; + + /* the following data entries are internal to the KFD & thunk itself */ + + /* internal thunk store for Event data (OsEventHandle) */ + uint64_t HWData1; + /* internal thunk store for Event data (HWAddress) */ + uint64_t HWData2; + /* internal thunk store for Event data (HWData) */ + uint32_t HWData3; +}; + +struct HsaEventDescriptor { + /* event type to allocate */ + enum HSA_EVENTTYPE EventType; + /* H-NUMA node containing GPU device that is event source */ + uint32_t NodeId; + /* pointer to user mode syncvar data, syncvar->UserDataPtrValue + * may be NULL + */ + struct HsaSyncVar SyncVar; +}; + +struct HsaEvent { + uint32_t EventId; + struct HsaEventData EventData; +}; + +#pragma pack(pop) + +enum DBGDEV_TYPE { + DBGDEV_TYPE_ILLEGAL = 0, + DBGDEV_TYPE_NODIQ = 1, + DBGDEV_TYPE_DIQ = 2, + DBGDEV_TYPE_TEST = 3 +}; + +struct dbg_address_watch_info { + struct kfd_process *process; + enum HSA_DBG_WATCH_MODE *watch_mode; + uint64_t *watch_address; + uint64_t *watch_mask; + struct HsaEvent *watch_event; + uint32_t num_watch_points; +}; + +struct dbg_wave_control_info { + struct kfd_process *process; + uint32_t trapId; + enum HSA_DBG_WAVEOP operand; + enum HSA_DBG_WAVEMODE mode; + struct HsaDbgWaveMessage dbgWave_msg; +}; + +struct kfd_dbgdev { + + /* The device that owns this data. */ + struct kfd_dev *dev; + + /* kernel queue for DIQ */ + struct kernel_queue *kq; + + /* a pointer to the pqm of the calling process */ + struct process_queue_manager *pqm; + + /* type of debug device ( DIQ, non DIQ, etc. ) */ + enum DBGDEV_TYPE type; + + /* virtualized function pointers to device dbg */ + int (*dbgdev_register)(struct kfd_dbgdev *dbgdev); + int (*dbgdev_unregister)(struct kfd_dbgdev *dbgdev); + int (*dbgdev_address_watch)(struct kfd_dbgdev *dbgdev, + struct dbg_address_watch_info *adw_info); + int (*dbgdev_wave_control)(struct kfd_dbgdev *dbgdev, + struct dbg_wave_control_info *wac_info); + +}; + +struct kfd_dbgmgr { + unsigned int pasid; + struct kfd_dev *dev; + struct kfd_dbgdev *dbgdev; +}; + +/* prototypes for debug manager functions */ +struct mutex *kfd_get_dbgmgr_mutex(void); +void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr); +bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev); +long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p); +long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p); +long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr, + struct dbg_wave_control_info *wac_info); +long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr, + struct dbg_address_watch_info *adw_info); +#endif /* KFD_DBGMGR_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 52cab0f53ebc..1d1e2e952a79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -33,8 +33,11 @@ static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, + /* max num of queues for KV.TODO should be a dynamic value */ + .max_no_of_hqd = 24, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED }; @@ -294,6 +297,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto dqm_start_error; } + kfd->dbgmgr = NULL; + kfd->init_complete = true; dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, kfd->pdev->device); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 4e215bd4d41f..547b0a589693 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -45,7 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); -static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); +static int destroy_queues_cpsch(struct device_queue_manager *dqm, + bool preempt_static_queues, bool lock); static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, @@ -775,7 +776,7 @@ static int stop_cpsch(struct device_queue_manager *dqm) BUG_ON(!dqm); - destroy_queues_cpsch(dqm, true); + destroy_queues_cpsch(dqm, true, true); list_for_each_entry(node, &dqm->queues, list) { pdd = qpd_to_pdd(node->qpd); @@ -829,7 +830,8 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, pr_debug("kfd: In %s\n", __func__); mutex_lock(&dqm->lock); - destroy_queues_cpsch(dqm, false); + /* here we actually preempt the DIQ */ + destroy_queues_cpsch(dqm, true, false); list_del(&kq->list); dqm->queue_count--; qpd->is_debug = false; @@ -913,7 +915,7 @@ out: return retval; } -static int amdkfd_fence_wait_timeout(unsigned int *fence_addr, +int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, unsigned long timeout) { @@ -935,13 +937,16 @@ static int destroy_sdma_queues(struct device_queue_manager *dqm, unsigned int sdma_engine) { return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, sdma_engine); } -static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) +static int destroy_queues_cpsch(struct device_queue_manager *dqm, + bool preempt_static_queues, bool lock) { int retval; + enum kfd_preempt_type_filter preempt_type; + struct kfd_process *p; BUG_ON(!dqm); @@ -960,8 +965,12 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) destroy_sdma_queues(dqm, 1); } + preempt_type = preempt_static_queues ? + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, - KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); + preempt_type, 0, false, 0); if (retval != 0) goto out; @@ -969,8 +978,13 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, KFD_FENCE_COMPLETED); /* should be timed out */ - amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, + retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); + if (retval != 0) { + p = kfd_get_process(current); + p->reset_wavefronts = true; + goto out; + } pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -989,7 +1003,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) if (lock) mutex_lock(&dqm->lock); - retval = destroy_queues_cpsch(dqm, false); + retval = destroy_queues_cpsch(dqm, false, false); if (retval != 0) { pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); goto out; @@ -1024,13 +1038,27 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, { int retval; struct mqd_manager *mqd; + bool preempt_all_queues; BUG_ON(!dqm || !qpd || !q); + preempt_all_queues = false; + retval = 0; /* remove queue from list to prevent rescheduling after preemption */ mutex_lock(&dqm->lock); + + if (qpd->is_debug) { + /* + * error, currently we do not allow to destroy a queue + * of a currently debugged process + */ + retval = -EBUSY; + goto failed_try_destroy_debugged_queue; + + } + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { @@ -1062,6 +1090,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, return 0; failed: +failed_try_destroy_debugged_queue: + mutex_unlock(&dqm->lock); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 57278e2d72e0..ec4036a09f3e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -88,9 +88,11 @@ struct device_queue_manager_ops { struct queue *q, struct qcm_process_device *qpd, int *allocate_vmid); + int (*destroy_queue)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q); + int (*update_queue)(struct device_queue_manager *dqm, struct queue *q); @@ -100,8 +102,10 @@ struct device_queue_manager_ops { int (*register_process)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + int (*unregister_process)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + int (*initialize)(struct device_queue_manager *dqm); int (*start)(struct device_queue_manager *dqm); int (*stop)(struct device_queue_manager *dqm); @@ -109,9 +113,11 @@ struct device_queue_manager_ops { int (*create_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); + void (*destroy_kernel_queue)(struct device_queue_manager *dqm, struct kernel_queue *kq, struct qcm_process_device *qpd); + bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, enum cache_policy default_policy, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e2533d875f43..99b6d28a11c3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -163,7 +163,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, num_queues = 0; list_for_each_entry(cur, &qpd->queues_list, list) num_queues++; - packet->bitfields10.num_queues = num_queues; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues; packet->sh_mem_config = qpd->sh_mem_config; packet->sh_mem_bases = qpd->sh_mem_bases; @@ -177,9 +177,10 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, } static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, - struct queue *q) + struct queue *q, bool is_static) { struct pm4_map_queues *packet; + bool use_static = is_static; BUG_ON(!pm || !buffer || !q); @@ -209,6 +210,7 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, case KFD_QUEUE_TYPE_SDMA: packet->bitfields2.engine_sel = engine_sel__mes_map_queues__sdma0; + use_static = false; /* no static queues under SDMA */ break; default: BUG(); @@ -218,6 +220,9 @@ static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = q->properties.doorbell_off; + packet->mes_map_queues_ordinals[0].bitfields3.is_static = + (use_static == true) ? 1 : 0; + packet->mes_map_queues_ordinals[0].mqd_addr_lo = lower_32_bits(q->gart_mqd_addr); @@ -271,9 +276,11 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pm_release_ib(pm); return -ENOMEM; } + retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); if (retval != 0) return retval; + proccesses_mapped++; inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), alloc_size_bytes); @@ -281,23 +288,36 @@ static int pm_create_runlist_ib(struct packet_manager *pm, list_for_each_entry(kq, &qpd->priv_queue_list, list) { if (kq->queue->properties.is_active != true) continue; + + pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n", + kq->queue->queue, qpd->is_debug); + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], - kq->queue); + kq->queue, qpd->is_debug); if (retval != 0) return retval; - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), - alloc_size_bytes); + + inc_wptr(&rl_wptr, + sizeof(struct pm4_map_queues), + alloc_size_bytes); } list_for_each_entry(q, &qpd->queues_list, list) { if (q->properties.is_active != true) continue; - retval = pm_create_map_queue(pm, - &rl_buffer[rl_wptr], q); + + pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n", + q->queue, qpd->is_debug); + + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], + q, qpd->is_debug); + if (retval != 0) return retval; - inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), - alloc_size_bytes); + + inc_wptr(&rl_wptr, + sizeof(struct pm4_map_queues), + alloc_size_bytes); } } @@ -488,7 +508,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet = (struct pm4_unmap_queues *)buffer; memset(buffer, 0, sizeof(struct pm4_unmap_queues)); - + pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n", + mode, reset, type); packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, sizeof(struct pm4_unmap_queues)); switch (type) { @@ -529,6 +550,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, packet->bitfields2.queue_sel = queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; break; + case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only; + break; default: BUG(); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h index 071ad5724bd2..5b393f3e34a9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h @@ -237,7 +237,8 @@ struct pm4_map_queues { struct { union { struct { - uint32_t reserved5:2; + uint32_t is_static:1; + uint32_t reserved5:1; uint32_t doorbell_offset:21; uint32_t reserved6:3; uint32_t queue:6; @@ -328,7 +329,8 @@ enum unmap_queues_action_enum { enum unmap_queues_queue_sel_enum { queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, - queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2 + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2, + queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3 }; enum unmap_queues_engine_sel_enum { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h new file mode 100644 index 000000000000..a0ff34878163 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h @@ -0,0 +1,290 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_PM4_HEADERS_DIQ_H_ +#define KFD_PM4_HEADERS_DIQ_H_ + +/*--------------------_INDIRECT_BUFFER-------------------- */ + +#ifndef _PM4__INDIRECT_BUFFER_DEFINED +#define _PM4__INDIRECT_BUFFER_DEFINED +enum _INDIRECT_BUFFER_cache_policy_enum { + cache_policy___indirect_buffer__lru = 0, + cache_policy___indirect_buffer__stream = 1, + cache_policy___indirect_buffer__bypass = 2 +}; + +enum { + IT_INDIRECT_BUFFER_PASID = 0x5C +}; + +struct pm4__indirect_buffer_pasid { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int reserved1:2; + unsigned int ib_base_lo:30; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int ib_base_hi:16; + unsigned int reserved2:16; + } bitfields3; + unsigned int ordinal3; + }; + + union { + unsigned int control; + unsigned int ordinal4; + }; + + union { + struct { + unsigned int pasid:10; + unsigned int reserved4:22; + } bitfields5; + unsigned int ordinal5; + }; + +}; + +#endif + +/*--------------------_RELEASE_MEM-------------------- */ + +#ifndef _PM4__RELEASE_MEM_DEFINED +#define _PM4__RELEASE_MEM_DEFINED +enum _RELEASE_MEM_event_index_enum { + event_index___release_mem__end_of_pipe = 5, + event_index___release_mem__shader_done = 6 +}; + +enum _RELEASE_MEM_cache_policy_enum { + cache_policy___release_mem__lru = 0, + cache_policy___release_mem__stream = 1, + cache_policy___release_mem__bypass = 2 +}; + +enum _RELEASE_MEM_dst_sel_enum { + dst_sel___release_mem__memory_controller = 0, + dst_sel___release_mem__tc_l2 = 1, + dst_sel___release_mem__queue_write_pointer_register = 2, + dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3 +}; + +enum _RELEASE_MEM_int_sel_enum { + int_sel___release_mem__none = 0, + int_sel___release_mem__send_interrupt_only = 1, + int_sel___release_mem__send_interrupt_after_write_confirm = 2, + int_sel___release_mem__send_data_after_write_confirm = 3 +}; + +enum _RELEASE_MEM_data_sel_enum { + data_sel___release_mem__none = 0, + data_sel___release_mem__send_32_bit_low = 1, + data_sel___release_mem__send_64_bit_data = 2, + data_sel___release_mem__send_gpu_clock_counter = 3, + data_sel___release_mem__send_cp_perfcounter_hi_lo = 4, + data_sel___release_mem__store_gds_data_to_memory = 5 +}; + +struct pm4__release_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int event_type:6; + unsigned int reserved1:2; + enum _RELEASE_MEM_event_index_enum event_index:4; + unsigned int tcl1_vol_action_ena:1; + unsigned int tc_vol_action_ena:1; + unsigned int reserved2:1; + unsigned int tc_wb_action_ena:1; + unsigned int tcl1_action_ena:1; + unsigned int tc_action_ena:1; + unsigned int reserved3:6; + unsigned int atc:1; + enum _RELEASE_MEM_cache_policy_enum cache_policy:2; + unsigned int reserved4:5; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int reserved5:16; + enum _RELEASE_MEM_dst_sel_enum dst_sel:2; + unsigned int reserved6:6; + enum _RELEASE_MEM_int_sel_enum int_sel:3; + unsigned int reserved7:2; + enum _RELEASE_MEM_data_sel_enum data_sel:3; + } bitfields3; + unsigned int ordinal3; + }; + + union { + struct { + unsigned int reserved8:2; + unsigned int address_lo_32b:30; + } bitfields4; + struct { + unsigned int reserved9:3; + unsigned int address_lo_64b:29; + } bitfields5; + unsigned int ordinal4; + }; + + unsigned int address_hi; + + unsigned int data_lo; + + unsigned int data_hi; + +}; +#endif + + +/*--------------------_SET_CONFIG_REG-------------------- */ + +#ifndef _PM4__SET_CONFIG_REG_DEFINED +#define _PM4__SET_CONFIG_REG_DEFINED + +struct pm4__set_config_reg { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int reg_offset:16; + unsigned int reserved1:7; + unsigned int vmid_shift:5; + unsigned int insert_vmid:1; + unsigned int reserved2:3; + } bitfields2; + unsigned int ordinal2; + }; + + unsigned int reg_data[1]; /*1..N of these fields */ + +}; +#endif + +/*--------------------_WAIT_REG_MEM-------------------- */ + +#ifndef _PM4__WAIT_REG_MEM_DEFINED +#define _PM4__WAIT_REG_MEM_DEFINED +enum _WAIT_REG_MEM_function_enum { + function___wait_reg_mem__always_pass = 0, + function___wait_reg_mem__less_than_ref_value = 1, + function___wait_reg_mem__less_than_equal_to_the_ref_value = 2, + function___wait_reg_mem__equal_to_the_reference_value = 3, + function___wait_reg_mem__not_equal_reference_value = 4, + function___wait_reg_mem__greater_than_or_equal_reference_value = 5, + function___wait_reg_mem__greater_than_reference_value = 6, + function___wait_reg_mem__reserved = 7 +}; + +enum _WAIT_REG_MEM_mem_space_enum { + mem_space___wait_reg_mem__register_space = 0, + mem_space___wait_reg_mem__memory_space = 1 +}; + +enum _WAIT_REG_MEM_operation_enum { + operation___wait_reg_mem__wait_reg_mem = 0, + operation___wait_reg_mem__wr_wait_wr_reg = 1 +}; + +struct pm4__wait_reg_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + enum _WAIT_REG_MEM_function_enum function:3; + unsigned int reserved1:1; + enum _WAIT_REG_MEM_mem_space_enum mem_space:2; + enum _WAIT_REG_MEM_operation_enum operation:2; + unsigned int reserved2:24; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + unsigned int reserved3:2; + unsigned int memory_poll_addr_lo:30; + } bitfields3; + struct { + unsigned int register_poll_addr:16; + unsigned int reserved4:16; + } bitfields4; + struct { + unsigned int register_write_addr:16; + unsigned int reserved5:16; + } bitfields5; + unsigned int ordinal3; + }; + + union { + struct { + unsigned int poll_address_hi:16; + unsigned int reserved6:16; + } bitfields6; + struct { + unsigned int register_write_addr:16; + unsigned int reserved7:16; + } bitfields7; + unsigned int ordinal4; + }; + + unsigned int reference; + + unsigned int mask; + + union { + struct { + unsigned int poll_interval:16; + unsigned int reserved8:16; + } bitfields8; + unsigned int ordinal7; + }; + +}; +#endif + + +#endif /* KFD_PM4_HEADERS_DIQ_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b6f838f56589..cb79046e5c80 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -128,6 +128,7 @@ struct kfd_device_info { unsigned int asic_family; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; + unsigned int max_no_of_hqd; size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; @@ -167,8 +168,8 @@ struct kfd_dev { const struct kfd2kgd_calls *kfd2kgd; struct mutex doorbell_mutex; - unsigned long doorbell_available_index[DIV_ROUND_UP( - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; + DECLARE_BITMAP(doorbell_available_index, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); void *gtt_mem; uint64_t gtt_start_gpu_addr; @@ -195,6 +196,9 @@ struct kfd_dev { * from the HW ring into a SW ring. */ bool interrupts_active; + + /* Debug manager */ + struct kfd_dbgmgr *dbgmgr; }; /* KGD2KFD callbacks */ @@ -231,6 +235,7 @@ struct device *kfd_chardev(void); enum kfd_preempt_type_filter { KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, + KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, KFD_PREEMPT_TYPE_FILTER_BY_PASID }; @@ -503,8 +508,6 @@ struct kfd_process { /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ struct kfd_queue **queues; - unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; - /*Is the user space process 32 bit?*/ bool is_32bit_user_mode; @@ -516,6 +519,11 @@ struct kfd_process { event_pages */ u32 next_nonsignal_event_id; size_t signal_event_count; + /* + * This flag tells if we should reset all wavefronts on + * process termination + */ + bool reset_wavefronts; }; /** @@ -650,6 +658,12 @@ int pqm_create_queue(struct process_queue_manager *pqm, int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); +struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, + unsigned int qid); + +int amdkfd_fence_wait_timeout(unsigned int *fence_addr, + unsigned int fence_value, + unsigned long timeout); /* Packet Manager */ @@ -717,4 +731,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint64_t *event_page_offset, uint32_t *event_slot_index); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); +int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); + #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index dc910af2bb3c..56b904f5bdb1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -31,6 +31,7 @@ struct mm_struct; #include "kfd_priv.h" +#include "kfd_dbgmgr.h" /* * Initial size for the array of queues. @@ -172,6 +173,9 @@ static void kfd_process_wq_release(struct work_struct *work) pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n", pdd->dev->id, p->pasid); + if (p->reset_wavefronts) + dbgdev_wave_reset_wavefronts(pdd->dev, p); + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); list_del(&pdd->per_device_list); @@ -301,6 +305,8 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (kfd_init_apertures(process) != 0) goto err_init_apretures; + process->reset_wavefronts = false; + return process; err_init_apretures: @@ -399,7 +405,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) mutex_lock(&p->mutex); + if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid)) + kfd_dbgmgr_destroy(dev->dbgmgr); + pqm_uninit(&p->pqm); + if (p->reset_wavefronts) + dbgdev_wave_reset_wavefronts(dev, p); pdd = kfd_get_process_device_data(dev, p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 530b82c4e78b..7b69070f7ecc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -158,6 +158,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct queue *q; struct process_queue_node *pqn; struct kernel_queue *kq; + int num_queues = 0; + struct queue *cur; BUG_ON(!pqm || !dev || !properties || !qid); @@ -172,6 +174,20 @@ int pqm_create_queue(struct process_queue_manager *pqm, return -1; } + /* + * for debug process, verify that it is within the static queues limit + * currently limit is set to half of the total avail HQD slots + * If we are just about to create DIQ, the is_debug flag is not set yet + * Hence we also check the type as well + */ + if ((pdd->qpd.is_debug) || + (type == KFD_QUEUE_TYPE_DIQ)) { + list_for_each_entry(cur, &pdd->qpd.queues_list, list) + num_queues++; + if (num_queues >= dev->device_info->max_no_of_hqd/2) + return (-ENOSPC); + } + retval = find_available_queue_slot(pqm, qid); if (retval != 0) return retval; @@ -341,7 +357,7 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, return 0; } -static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue( +struct kernel_queue *pqm_get_kernel_queue( struct process_queue_manager *pqm, unsigned int qid) { diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 4ea21ae88b07..9080daa116b6 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -163,6 +163,27 @@ struct kfd2kgd_calls { int (*hqd_sdma_destroy)(struct kgd_dev *kgd, void *mqd, unsigned int timeout); + int (*address_watch_disable)(struct kgd_dev *kgd); + int (*address_watch_execute)(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); + int (*wave_control_execute)(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); + uint32_t (*address_watch_get_offset)(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + bool (*get_atc_vmid_pasid_mapping_valid)( + struct kgd_dev *kgd, + uint8_t vmid); + uint16_t (*get_atc_vmid_pasid_mapping_pasid)( + struct kgd_dev *kgd, + uint8_t vmid); + void (*write_vmid_invalidate_request)(struct kgd_dev *kgd, + uint8_t vmid); + uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); }; diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 1134526286c8..3427b115e2bb 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -825,7 +825,7 @@ static u64 drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *entry) hole_start = drm_mm_hole_node_start(entry); hole_end = drm_mm_hole_node_end(entry); hole_size = hole_end - hole_start; - seq_printf(m, "%#llx-%#llx: %llu: free\n", hole_start, + seq_printf(m, "%#018llx-%#018llx: %llu: free\n", hole_start, hole_end, hole_size); return hole_size; } @@ -846,7 +846,7 @@ int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm) total_free += drm_mm_dump_hole(m, &mm->head_node); drm_mm_for_each_node(entry, mm) { - seq_printf(m, "%#016llx-%#016llx: %llu: used\n", entry->start, + seq_printf(m, "%#018llx-%#018llx: %llu: used\n", entry->start, entry->start + entry->size, entry->size); total_used += entry->size; total_free += drm_mm_dump_hole(m, entry); diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c index b728523e194f..2aaa3c88999e 100644 --- a/drivers/gpu/drm/i2c/adv7511.c +++ b/drivers/gpu/drm/i2c/adv7511.c @@ -438,7 +438,7 @@ static int adv7511_irq_process(struct adv7511 *adv7511) regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0); regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1); - if (irq0 & ADV7511_INT0_HDP) + if (irq0 & ADV7511_INT0_HDP && adv7511->encoder) drm_helper_hpd_irq_event(adv7511->encoder->dev); if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) { diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index 011e1cac3e4c..fe1599d75f14 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -644,7 +644,8 @@ tda998x_write_avi(struct tda998x_priv *priv, struct drm_display_mode *mode) len = hdmi_avi_infoframe_pack(&frame, buf, sizeof(buf)); if (len < 0) { - dev_err(&priv->hdmi->dev, "hdmi_avi_infoframe_pack() failed: %d\n", len); + dev_err(&priv->hdmi->dev, + "hdmi_avi_infoframe_pack() failed: %zd\n", len); return; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index adbbddab42c6..88cc793c46d3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -120,10 +120,13 @@ static inline const char *get_global_flag(struct drm_i915_gem_object *obj) static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct intel_engine_cs *ring; struct i915_vma *vma; int pin_count = 0; + int i; - seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x %x %x %x%s%s%s", + seq_printf(m, "%pK: %s%s%s%s %8zdKiB %02x %02x [ ", &obj->base, obj->active ? "*" : " ", get_pin_flag(obj), @@ -131,8 +134,11 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) get_global_flag(obj), obj->base.size / 1024, obj->base.read_domains, - obj->base.write_domain, - i915_gem_request_get_seqno(obj->last_read_req), + obj->base.write_domain); + for_each_ring(ring, dev_priv, i) + seq_printf(m, "%x ", + i915_gem_request_get_seqno(obj->last_read_req[i])); + seq_printf(m, "] %x %x%s%s%s", i915_gem_request_get_seqno(obj->last_write_req), i915_gem_request_get_seqno(obj->last_fenced_req), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), @@ -169,9 +175,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_read_req != NULL) + if (obj->last_write_req != NULL) seq_printf(m, " (%s)", - i915_gem_request_get_ring(obj->last_read_req)->name); + i915_gem_request_get_ring(obj->last_write_req)->name); if (obj->frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); } @@ -665,7 +671,7 @@ static int i915_gem_request_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - struct drm_i915_gem_request *rq; + struct drm_i915_gem_request *req; int ret, any, i; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -677,22 +683,22 @@ static int i915_gem_request_info(struct seq_file *m, void *data) int count; count = 0; - list_for_each_entry(rq, &ring->request_list, list) + list_for_each_entry(req, &ring->request_list, list) count++; if (count == 0) continue; seq_printf(m, "%s requests: %d\n", ring->name, count); - list_for_each_entry(rq, &ring->request_list, list) { + list_for_each_entry(req, &ring->request_list, list) { struct task_struct *task; rcu_read_lock(); task = NULL; - if (rq->pid) - task = pid_task(rq->pid, PIDTYPE_PID); + if (req->pid) + task = pid_task(req->pid, PIDTYPE_PID); seq_printf(m, " %x @ %d: %s [%d]\n", - rq->seqno, - (int) (jiffies - rq->emitted_jiffies), + req->seqno, + (int) (jiffies - req->emitted_jiffies), task ? task->comm : "<unknown>", task ? task->pid : -1); rcu_read_unlock(); @@ -2276,22 +2282,35 @@ static int i915_ppgtt_info(struct seq_file *m, void *data) return 0; } +static int count_irq_waiters(struct drm_i915_private *i915) +{ + struct intel_engine_cs *ring; + int count = 0; + int i; + + for_each_ring(ring, i915, i) + count += ring->irq_refcount; + + return count; +} + static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_file *file; - int ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); - if (ret) - goto unlock; + seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); + seq_printf(m, "GPU busy? %d\n", dev_priv->mm.busy); + seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); + seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n", + intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), + intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), + intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), + intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), + intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + spin_lock(&dev_priv->rps.client_lock); list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct drm_i915_file_private *file_priv = file->driver_priv; struct task_struct *task; @@ -2301,17 +2320,20 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "%s [%d]: %d boosts%s\n", task ? task->comm : "<unknown>", task ? task->pid : -1, - file_priv->rps_boosts, - list_empty(&file_priv->rps_boost) ? "" : ", active"); + file_priv->rps.boosts, + list_empty(&file_priv->rps.link) ? "" : ", active"); rcu_read_unlock(); } + seq_printf(m, "Semaphore boosts: %d%s\n", + dev_priv->rps.semaphores.boosts, + list_empty(&dev_priv->rps.semaphores.link) ? "" : ", active"); + seq_printf(m, "MMIO flip boosts: %d%s\n", + dev_priv->rps.mmioflips.boosts, + list_empty(&dev_priv->rps.mmioflips.link) ? "" : ", active"); seq_printf(m, "Kernel boosts: %d\n", dev_priv->rps.boosts); + spin_unlock(&dev_priv->rps.client_lock); - mutex_unlock(&dev_priv->rps.hw_lock); -unlock: - mutex_unlock(&dev->struct_mutex); - - return ret; + return 0; } static int i915_llc(struct seq_file *m, void *data) @@ -5154,6 +5176,9 @@ static int i915_dpcd_show(struct seq_file *m, void *data) ssize_t err; int i; + if (connector->status != connector_status_connected) + return -ENODEV; + for (i = 0; i < ARRAY_SIZE(i915_dpcd_debug); i++) { const struct dpcd_block *b = &i915_dpcd_debug[i]; size_t size = b->end ? b->end - b->offset + 1 : (b->size ?: 1); diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a238889630d8..d2df321ba634 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -814,7 +814,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) spin_lock_init(&dev_priv->uncore.lock); spin_lock_init(&dev_priv->mm.object_stat_lock); spin_lock_init(&dev_priv->mmio_flip_lock); - mutex_init(&dev_priv->dpio_lock); + mutex_init(&dev_priv->sb_lock); mutex_init(&dev_priv->modeset_restore_lock); mutex_init(&dev_priv->csr_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 51149fb75e96..884b4f9b81c4 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -595,6 +595,7 @@ static int intel_suspend_complete(struct drm_i915_private *dev_priv); static int vlv_resume_prepare(struct drm_i915_private *dev_priv, bool rpm_resume); static int skl_resume_prepare(struct drm_i915_private *dev_priv); +static int bxt_resume_prepare(struct drm_i915_private *dev_priv); static int i915_drm_suspend(struct drm_device *dev) @@ -811,14 +812,17 @@ static int i915_drm_resume_early(struct drm_device *dev) if (IS_VALLEYVIEW(dev_priv)) ret = vlv_resume_prepare(dev_priv, false); if (ret) - DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret); + DRM_ERROR("Resume prepare failed: %d, continuing anyway\n", + ret); intel_uncore_early_sanitize(dev, true); - if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - hsw_disable_pc8(dev_priv); + if (IS_BROXTON(dev)) + ret = bxt_resume_prepare(dev_priv); else if (IS_SKYLAKE(dev_priv)) ret = skl_resume_prepare(dev_priv); + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + hsw_disable_pc8(dev_priv); intel_uncore_sanitize(dev); intel_power_domains_init_hw(dev_priv); @@ -989,7 +993,7 @@ static int i915_pm_suspend_late(struct device *dev) struct drm_device *drm_dev = dev_to_i915(dev)->dev; /* - * We have a suspedn ordering issue with the snd-hda driver also + * We have a suspend ordering issue with the snd-hda driver also * requiring our device to be power up. Due to the lack of a * parent/child relationship we currently solve this with an late * suspend hook. @@ -1043,6 +1047,8 @@ static int skl_suspend_complete(struct drm_i915_private *dev_priv) */ intel_csr_load_status_set(dev_priv, FW_UNINITIALIZED); + skl_uninit_cdclk(dev_priv); + return 0; } @@ -1089,6 +1095,7 @@ static int skl_resume_prepare(struct drm_i915_private *dev_priv) { struct drm_device *dev = dev_priv->dev; + skl_init_cdclk(dev_priv); intel_csr_load_program(dev); return 0; @@ -1586,16 +1593,15 @@ static int intel_runtime_resume(struct device *device) */ static int intel_suspend_complete(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; int ret; - if (IS_BROXTON(dev)) + if (IS_BROXTON(dev_priv)) ret = bxt_suspend_complete(dev_priv); - else if (IS_SKYLAKE(dev)) + else if (IS_SKYLAKE(dev_priv)) ret = skl_suspend_complete(dev_priv); - else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) + else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) ret = hsw_suspend_complete(dev_priv); - else if (IS_VALLEYVIEW(dev)) + else if (IS_VALLEYVIEW(dev_priv)) ret = vlv_suspend_complete(dev_priv); else ret = 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index acfa4fc93803..72f5a3f9dbf2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -56,7 +56,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20150508" +#define DRIVER_DATE "20150522" #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -272,6 +272,30 @@ struct drm_i915_private; struct i915_mm_struct; struct i915_mmu_object; +struct drm_i915_file_private { + struct drm_i915_private *dev_priv; + struct drm_file *file; + + struct { + spinlock_t lock; + struct list_head request_list; +/* 20ms is a fairly arbitrary limit (greater than the average frame time) + * chosen to prevent the CPU getting more than a frame ahead of the GPU + * (when using lax throttling for the frontbuffer). We also use it to + * offer free GPU waitboosts for severely congested workloads. + */ +#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20) + } mm; + struct idr context_idr; + + struct intel_rps_client { + struct list_head link; + unsigned boosts; + } rps; + + struct intel_engine_cs *bsd_ring; +}; + enum intel_dpll_id { DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */ /* real shared dpll ids must be >= 0 */ @@ -309,7 +333,7 @@ struct intel_dpll_hw_state { uint32_t cfgcr1, cfgcr2; /* bxt */ - uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pcsdw12; + uint32_t ebb0, pll0, pll1, pll2, pll3, pll6, pll8, pll10, pcsdw12; }; struct intel_shared_dpll_config { @@ -508,7 +532,7 @@ struct drm_i915_error_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 rseqno, wseqno; + u32 rseqno[I915_NUM_RINGS], wseqno; u32 gtt_offset; u32 read_domains; u32 write_domain; @@ -1065,17 +1089,24 @@ struct intel_gen6_power_mgmt { int last_adj; enum { LOW_POWER, BETWEEN, HIGH_POWER } power; + spinlock_t client_lock; + struct list_head clients; + bool client_boost; + bool enabled; struct delayed_work delayed_resume_work; - struct list_head clients; unsigned boosts; + struct intel_rps_client semaphores, mmioflips; + /* manual wa residency calculations */ struct intel_rps_ei up_ei, down_ei; /* * Protects RPS/RC6 register access and PCU communication. - * Must be taken after struct_mutex if nested. + * Must be taken after struct_mutex if nested. Note that + * this lock may be held for long periods of time when + * talking to hw - so only take it when talking to hw! */ struct mutex hw_lock; }; @@ -1468,7 +1499,8 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1, struct skl_ddb_allocation { struct skl_ddb_entry pipe[I915_MAX_PIPES]; - struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; + struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */ + struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* y-plane */ struct skl_ddb_entry cursor[I915_MAX_PIPES]; }; @@ -1634,8 +1666,8 @@ struct drm_i915_private { /* To control wakeup latency, e.g. for irq-driven dp aux transfers. */ struct pm_qos_request pm_qos; - /* DPIO indirect register protection */ - struct mutex dpio_lock; + /* Sideband mailbox protection */ + struct mutex sb_lock; /** Cached value of IMR to avoid reads in updating the bitfield */ union { @@ -1684,6 +1716,7 @@ struct drm_i915_private { int num_fence_regs; /* 8 on pre-965, 16 otherwise */ unsigned int fsb_freq, mem_freq, is_ddr3; + unsigned int skl_boot_cdclk; unsigned int cdclk_freq; unsigned int hpll_freq; @@ -1938,7 +1971,7 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; struct list_head global_list; - struct list_head ring_list; + struct list_head ring_list[I915_NUM_RINGS]; /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; @@ -1949,7 +1982,7 @@ struct drm_i915_gem_object { * rendering and so a non-zero seqno), and is not set if it i s on * inactive (ready to be unbound) list. */ - unsigned int active:1; + unsigned int active:I915_NUM_RINGS; /** * This is set if the object has been written to since last bound @@ -2020,8 +2053,17 @@ struct drm_i915_gem_object { void *dma_buf_vmapping; int vmapping_count; - /** Breadcrumb of last rendering to the buffer. */ - struct drm_i915_gem_request *last_read_req; + /** Breadcrumb of last rendering to the buffer. + * There can only be one writer, but we allow for multiple readers. + * If there is a writer that necessarily implies that all other + * read requests are complete - but we may only be lazily clearing + * the read requests. A read request is naturally the most recent + * request on a ring, so we may have two different write and read + * requests on one ring where the write request is older than the + * read request. This allows for the CPU to read from an active + * buffer by only waiting for the write to complete. + * */ + struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS]; struct drm_i915_gem_request *last_write_req; /** Breadcrumb of last fenced GPU access to the buffer. */ struct drm_i915_gem_request *last_fenced_req; @@ -2160,10 +2202,12 @@ i915_gem_request_get_ring(struct drm_i915_gem_request *req) return req ? req->ring : NULL; } -static inline void +static inline struct drm_i915_gem_request * i915_gem_request_reference(struct drm_i915_gem_request *req) { - kref_get(&req->ref); + if (req) + kref_get(&req->ref); + return req; } static inline void @@ -2204,22 +2248,6 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, * a later patch when the call to i915_seqno_passed() is obsoleted... */ -struct drm_i915_file_private { - struct drm_i915_private *dev_priv; - struct drm_file *file; - - struct { - spinlock_t lock; - struct list_head request_list; - } mm; - struct idr context_idr; - - struct list_head rps_boost; - struct intel_engine_cs *bsd_ring; - - unsigned rps_boosts; -}; - /* * A command that requires special handling by the command parser. */ @@ -2375,6 +2403,7 @@ struct drm_i915_cmd_table { #define SKL_REVID_C0 (0x2) #define SKL_REVID_D0 (0x3) #define SKL_REVID_E0 (0x4) +#define SKL_REVID_F0 (0x5) #define BXT_REVID_A0 (0x0) #define BXT_REVID_B0 (0x3) @@ -2445,6 +2474,9 @@ struct drm_i915_cmd_table { #define HAS_IPS(dev) (IS_HSW_ULT(dev) || IS_BROADWELL(dev)) +#define HAS_DP_MST(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \ + INTEL_INFO(dev)->gen >= 9) + #define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi) #define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) #define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev) || \ @@ -2820,7 +2852,6 @@ static inline bool i915_stop_ring_allow_warn(struct drm_i915_private *dev_priv) void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); -int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj); int __must_check i915_gem_init(struct drm_device *dev); int i915_gem_init_rings(struct drm_device *dev); int __must_check i915_gem_init_hw(struct drm_device *dev); @@ -2838,10 +2869,13 @@ int __i915_wait_request(struct drm_i915_gem_request *req, unsigned reset_counter, bool interruptible, s64 *timeout, - struct drm_i915_file_private *file_priv); + struct intel_rps_client *rps); int __must_check i915_wait_request(struct drm_i915_gem_request *req); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int __must_check +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly); +int __must_check i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); int __must_check diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f128ed8d6f65..be35f0486202 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -38,14 +38,14 @@ #include <linux/pci.h> #include <linux/dma-buf.h> +#define RQ_BUG_ON(expr) + static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static __must_check int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly); static void -i915_gem_object_retire(struct drm_i915_gem_object *obj); - +i915_gem_object_retire__write(struct drm_i915_gem_object *obj); +static void +i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); static void i915_gem_write_fence(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj); static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, @@ -518,8 +518,6 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, ret = i915_gem_object_wait_rendering(obj, true); if (ret) return ret; - - i915_gem_object_retire(obj); } ret = i915_gem_object_get_pages(obj); @@ -939,8 +937,6 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; - - i915_gem_object_retire(obj); } /* Same trick applies to invalidate partially written cachelines read * before writing. */ @@ -1181,16 +1177,16 @@ static bool missed_irq(struct drm_i915_private *dev_priv, return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings); } -static int __i915_spin_request(struct drm_i915_gem_request *rq) +static int __i915_spin_request(struct drm_i915_gem_request *req) { unsigned long timeout; - if (i915_gem_request_get_ring(rq)->irq_refcount) + if (i915_gem_request_get_ring(req)->irq_refcount) return -EBUSY; timeout = jiffies + 1; while (!need_resched()) { - if (i915_gem_request_completed(rq, true)) + if (i915_gem_request_completed(req, true)) return 0; if (time_after_eq(jiffies, timeout)) @@ -1198,7 +1194,7 @@ static int __i915_spin_request(struct drm_i915_gem_request *rq) cpu_relax_lowlatency(); } - if (i915_gem_request_completed(rq, false)) + if (i915_gem_request_completed(req, false)) return 0; return -EAGAIN; @@ -1225,7 +1221,7 @@ int __i915_wait_request(struct drm_i915_gem_request *req, unsigned reset_counter, bool interruptible, s64 *timeout, - struct drm_i915_file_private *file_priv) + struct intel_rps_client *rps) { struct intel_engine_cs *ring = i915_gem_request_get_ring(req); struct drm_device *dev = ring->dev; @@ -1239,14 +1235,17 @@ int __i915_wait_request(struct drm_i915_gem_request *req, WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled"); + if (list_empty(&req->list)) + return 0; + if (i915_gem_request_completed(req, true)) return 0; timeout_expire = timeout ? jiffies + nsecs_to_jiffies_timeout((u64)*timeout) : 0; - if (INTEL_INFO(dev)->gen >= 6) - gen6_rps_boost(dev_priv, file_priv); + if (INTEL_INFO(dev_priv)->gen >= 6) + gen6_rps_boost(dev_priv, rps, req->emitted_jiffies); /* Record current time in case interrupted by signal, or wedged */ trace_i915_gem_request_wait_begin(req); @@ -1338,6 +1337,63 @@ out: return ret; } +static inline void +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) +{ + struct drm_i915_file_private *file_priv = request->file_priv; + + if (!file_priv) + return; + + spin_lock(&file_priv->mm.lock); + list_del(&request->client_list); + request->file_priv = NULL; + spin_unlock(&file_priv->mm.lock); +} + +static void i915_gem_request_retire(struct drm_i915_gem_request *request) +{ + trace_i915_gem_request_retire(request); + + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + request->ringbuf->last_retired_head = request->postfix; + + list_del_init(&request->list); + i915_gem_request_remove_from_client(request); + + put_pid(request->pid); + + i915_gem_request_unreference(request); +} + +static void +__i915_gem_request_retire__upto(struct drm_i915_gem_request *req) +{ + struct intel_engine_cs *engine = req->ring; + struct drm_i915_gem_request *tmp; + + lockdep_assert_held(&engine->dev->struct_mutex); + + if (list_empty(&req->list)) + return; + + do { + tmp = list_first_entry(&engine->request_list, + typeof(*tmp), list); + + i915_gem_request_retire(tmp); + } while (tmp != req); + + WARN_ON(i915_verify_lists(engine->dev)); +} + /** * Waits for a request to be signaled, and cleans up the * request and object lists appropriately for that event. @@ -1348,7 +1404,6 @@ i915_wait_request(struct drm_i915_gem_request *req) struct drm_device *dev; struct drm_i915_private *dev_priv; bool interruptible; - unsigned reset_counter; int ret; BUG_ON(req == NULL); @@ -1367,29 +1422,13 @@ i915_wait_request(struct drm_i915_gem_request *req) if (ret) return ret; - reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); - i915_gem_request_reference(req); - ret = __i915_wait_request(req, reset_counter, + ret = __i915_wait_request(req, + atomic_read(&dev_priv->gpu_error.reset_counter), interruptible, NULL, NULL); - i915_gem_request_unreference(req); - return ret; -} - -static int -i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj) -{ - if (!obj->active) - return 0; - - /* Manually manage the write flush as we may have not yet - * retired the buffer. - * - * Note that the last_write_req is always the earlier of - * the two (read/write) requests, so if we haved successfully waited, - * we know we have passed the last write. - */ - i915_gem_request_assign(&obj->last_write_req, NULL); + if (ret) + return ret; + __i915_gem_request_retire__upto(req); return 0; } @@ -1397,22 +1436,56 @@ i915_gem_object_wait_rendering__tail(struct drm_i915_gem_object *obj) * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. */ -static __must_check int +int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, bool readonly) { - struct drm_i915_gem_request *req; - int ret; + int ret, i; - req = readonly ? obj->last_write_req : obj->last_read_req; - if (!req) + if (!obj->active) return 0; - ret = i915_wait_request(req); - if (ret) - return ret; + if (readonly) { + if (obj->last_write_req != NULL) { + ret = i915_wait_request(obj->last_write_req); + if (ret) + return ret; - return i915_gem_object_wait_rendering__tail(obj); + i = obj->last_write_req->ring->id; + if (obj->last_read_req[i] == obj->last_write_req) + i915_gem_object_retire__read(obj, i); + else + i915_gem_object_retire__write(obj); + } + } else { + for (i = 0; i < I915_NUM_RINGS; i++) { + if (obj->last_read_req[i] == NULL) + continue; + + ret = i915_wait_request(obj->last_read_req[i]); + if (ret) + return ret; + + i915_gem_object_retire__read(obj, i); + } + RQ_BUG_ON(obj->active); + } + + return 0; +} + +static void +i915_gem_object_retire_request(struct drm_i915_gem_object *obj, + struct drm_i915_gem_request *req) +{ + int ring = req->ring->id; + + if (obj->last_read_req[ring] == req) + i915_gem_object_retire__read(obj, ring); + else if (obj->last_write_req == req) + i915_gem_object_retire__write(obj); + + __i915_gem_request_retire__upto(req); } /* A nonblocking variant of the above wait. This is a highly dangerous routine @@ -1420,40 +1493,75 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, */ static __must_check int i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, - struct drm_i915_file_private *file_priv, + struct intel_rps_client *rps, bool readonly) { - struct drm_i915_gem_request *req; struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_request *requests[I915_NUM_RINGS]; unsigned reset_counter; - int ret; + int ret, i, n = 0; BUG_ON(!mutex_is_locked(&dev->struct_mutex)); BUG_ON(!dev_priv->mm.interruptible); - req = readonly ? obj->last_write_req : obj->last_read_req; - if (!req) + if (!obj->active) return 0; ret = i915_gem_check_wedge(&dev_priv->gpu_error, true); if (ret) return ret; - ret = i915_gem_check_olr(req); - if (ret) - return ret; - reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); - i915_gem_request_reference(req); + + if (readonly) { + struct drm_i915_gem_request *req; + + req = obj->last_write_req; + if (req == NULL) + return 0; + + ret = i915_gem_check_olr(req); + if (ret) + goto err; + + requests[n++] = i915_gem_request_reference(req); + } else { + for (i = 0; i < I915_NUM_RINGS; i++) { + struct drm_i915_gem_request *req; + + req = obj->last_read_req[i]; + if (req == NULL) + continue; + + ret = i915_gem_check_olr(req); + if (ret) + goto err; + + requests[n++] = i915_gem_request_reference(req); + } + } + mutex_unlock(&dev->struct_mutex); - ret = __i915_wait_request(req, reset_counter, true, NULL, file_priv); + for (i = 0; ret == 0 && i < n; i++) + ret = __i915_wait_request(requests[i], reset_counter, true, + NULL, rps); mutex_lock(&dev->struct_mutex); - i915_gem_request_unreference(req); - if (ret) - return ret; - return i915_gem_object_wait_rendering__tail(obj); +err: + for (i = 0; i < n; i++) { + if (ret == 0) + i915_gem_object_retire_request(obj, requests[i]); + i915_gem_request_unreference(requests[i]); + } + + return ret; +} + +static struct intel_rps_client *to_rps_client(struct drm_file *file) +{ + struct drm_i915_file_private *fpriv = file->driver_priv; + return &fpriv->rps; } /** @@ -1498,7 +1606,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, * to catch cases where we are gazumped. */ ret = i915_gem_object_wait_rendering__nonblocking(obj, - file->driver_priv, + to_rps_client(file), !write_domain); if (ret) goto unref; @@ -1919,7 +2027,6 @@ i915_gem_mmap_gtt(struct drm_file *file, uint32_t handle, uint64_t *offset) { - struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; int ret; @@ -2235,78 +2342,58 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) return 0; } -static void -i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, - struct intel_engine_cs *ring) +void i915_vma_move_to_active(struct i915_vma *vma, + struct intel_engine_cs *ring) { - struct drm_i915_gem_request *req; - struct intel_engine_cs *old_ring; - - BUG_ON(ring == NULL); - - req = intel_ring_get_request(ring); - old_ring = i915_gem_request_get_ring(obj->last_read_req); - - if (old_ring != ring && obj->last_write_req) { - /* Keep the request relative to the current ring */ - i915_gem_request_assign(&obj->last_write_req, req); - } + struct drm_i915_gem_object *obj = vma->obj; /* Add a reference if we're newly entering the active list. */ - if (!obj->active) { + if (obj->active == 0) drm_gem_object_reference(&obj->base); - obj->active = 1; - } + obj->active |= intel_ring_flag(ring); - list_move_tail(&obj->ring_list, &ring->active_list); + list_move_tail(&obj->ring_list[ring->id], &ring->active_list); + i915_gem_request_assign(&obj->last_read_req[ring->id], + intel_ring_get_request(ring)); - i915_gem_request_assign(&obj->last_read_req, req); + list_move_tail(&vma->mm_list, &vma->vm->active_list); } -void i915_vma_move_to_active(struct i915_vma *vma, - struct intel_engine_cs *ring) +static void +i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { - list_move_tail(&vma->mm_list, &vma->vm->active_list); - return i915_gem_object_move_to_active(vma->obj, ring); + RQ_BUG_ON(obj->last_write_req == NULL); + RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring))); + + i915_gem_request_assign(&obj->last_write_req, NULL); + intel_fb_obj_flush(obj, true); } static void -i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) +i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) { struct i915_vma *vma; - BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); - BUG_ON(!obj->active); + RQ_BUG_ON(obj->last_read_req[ring] == NULL); + RQ_BUG_ON(!(obj->active & (1 << ring))); + + list_del_init(&obj->ring_list[ring]); + i915_gem_request_assign(&obj->last_read_req[ring], NULL); + + if (obj->last_write_req && obj->last_write_req->ring->id == ring) + i915_gem_object_retire__write(obj); + + obj->active &= ~(1 << ring); + if (obj->active) + return; list_for_each_entry(vma, &obj->vma_list, vma_link) { if (!list_empty(&vma->mm_list)) list_move_tail(&vma->mm_list, &vma->vm->inactive_list); } - intel_fb_obj_flush(obj, true); - - list_del_init(&obj->ring_list); - - i915_gem_request_assign(&obj->last_read_req, NULL); - i915_gem_request_assign(&obj->last_write_req, NULL); - obj->base.write_domain = 0; - i915_gem_request_assign(&obj->last_fenced_req, NULL); - - obj->active = 0; drm_gem_object_unreference(&obj->base); - - WARN_ON(i915_verify_lists(dev)); -} - -static void -i915_gem_object_retire(struct drm_i915_gem_object *obj) -{ - if (obj->last_read_req == NULL) - return; - - if (i915_gem_request_completed(obj->last_read_req, true)) - i915_gem_object_move_to_inactive(obj); } static int @@ -2483,20 +2570,6 @@ int __i915_add_request(struct intel_engine_cs *ring, return 0; } -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv = request->file_priv; - - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); -} - static bool i915_context_is_banned(struct drm_i915_private *dev_priv, const struct intel_context *ctx) { @@ -2542,16 +2615,6 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv, } } -static void i915_gem_free_request(struct drm_i915_gem_request *request) -{ - list_del(&request->list); - i915_gem_request_remove_from_client(request); - - put_pid(request->pid); - - i915_gem_request_unreference(request); -} - void i915_gem_request_free(struct kref *req_ref) { struct drm_i915_gem_request *req = container_of(req_ref, @@ -2576,38 +2639,38 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring, struct intel_context *ctx) { struct drm_i915_private *dev_priv = to_i915(ring->dev); - struct drm_i915_gem_request *rq; + struct drm_i915_gem_request *req; int ret; if (ring->outstanding_lazy_request) return 0; - rq = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); - if (rq == NULL) + req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); + if (req == NULL) return -ENOMEM; - kref_init(&rq->ref); - rq->i915 = dev_priv; + kref_init(&req->ref); + req->i915 = dev_priv; - ret = i915_gem_get_seqno(ring->dev, &rq->seqno); - if (ret) { - kfree(rq); - return ret; - } + ret = i915_gem_get_seqno(ring->dev, &req->seqno); + if (ret) + goto err; - rq->ring = ring; + req->ring = ring; if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(rq, ctx); + ret = intel_logical_ring_alloc_request_extras(req, ctx); else - ret = intel_ring_alloc_request_extras(rq); - if (ret) { - kfree(rq); - return ret; - } + ret = intel_ring_alloc_request_extras(req); + if (ret) + goto err; - ring->outstanding_lazy_request = rq; + ring->outstanding_lazy_request = req; return 0; + +err: + kmem_cache_free(dev_priv->requests, req); + return ret; } struct drm_i915_gem_request * @@ -2652,9 +2715,9 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, obj = list_first_entry(&ring->active_list, struct drm_i915_gem_object, - ring_list); + ring_list[ring->id]); - i915_gem_object_move_to_inactive(obj); + i915_gem_object_retire__read(obj, ring->id); } /* @@ -2690,7 +2753,7 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv, struct drm_i915_gem_request, list); - i915_gem_free_request(request); + i915_gem_request_retire(request); } /* This may not have been flushed before the reset, so clean it now */ @@ -2738,6 +2801,8 @@ void i915_gem_reset(struct drm_device *dev) i915_gem_context_reset(dev); i915_gem_restore_fences(dev); + + WARN_ON(i915_verify_lists(dev)); } /** @@ -2746,11 +2811,11 @@ void i915_gem_reset(struct drm_device *dev) void i915_gem_retire_requests_ring(struct intel_engine_cs *ring) { - if (list_empty(&ring->request_list)) - return; - WARN_ON(i915_verify_lists(ring->dev)); + if (list_empty(&ring->active_list)) + return; + /* Retire requests first as we use it above for the early return. * If we retire requests last, we may use a later seqno and so clear * the requests lists without clearing the active list, leading to @@ -2766,16 +2831,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) if (!i915_gem_request_completed(request, true)) break; - trace_i915_gem_request_retire(request); - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - */ - request->ringbuf->last_retired_head = request->postfix; - - i915_gem_free_request(request); + i915_gem_request_retire(request); } /* Move any buffers on the active list that are no longer referenced @@ -2787,12 +2843,12 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) obj = list_first_entry(&ring->active_list, struct drm_i915_gem_object, - ring_list); + ring_list[ring->id]); - if (!i915_gem_request_completed(obj->last_read_req, true)) + if (!list_empty(&obj->last_read_req[ring->id]->list)) break; - i915_gem_object_move_to_inactive(obj); + i915_gem_object_retire__read(obj, ring->id); } if (unlikely(ring->trace_irq_req && @@ -2887,17 +2943,30 @@ i915_gem_idle_work_handler(struct work_struct *work) static int i915_gem_object_flush_active(struct drm_i915_gem_object *obj) { - struct intel_engine_cs *ring; - int ret; + int ret, i; + + if (!obj->active) + return 0; + + for (i = 0; i < I915_NUM_RINGS; i++) { + struct drm_i915_gem_request *req; + + req = obj->last_read_req[i]; + if (req == NULL) + continue; - if (obj->active) { - ring = i915_gem_request_get_ring(obj->last_read_req); + if (list_empty(&req->list)) + goto retire; - ret = i915_gem_check_olr(obj->last_read_req); + ret = i915_gem_check_olr(req); if (ret) return ret; - i915_gem_retire_requests_ring(ring); + if (i915_gem_request_completed(req, true)) { + __i915_gem_request_retire__upto(req); +retire: + i915_gem_object_retire__read(obj, i); + } } return 0; @@ -2931,9 +3000,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_wait *args = data; struct drm_i915_gem_object *obj; - struct drm_i915_gem_request *req; + struct drm_i915_gem_request *req[I915_NUM_RINGS]; unsigned reset_counter; - int ret = 0; + int i, n = 0; + int ret; if (args->flags != 0) return -EINVAL; @@ -2953,11 +3023,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (ret) goto out; - if (!obj->active || !obj->last_read_req) + if (!obj->active) goto out; - req = obj->last_read_req; - /* Do this after OLR check to make sure we make forward progress polling * on this IOCTL with a timeout == 0 (like busy ioctl) */ @@ -2968,13 +3036,23 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) drm_gem_object_unreference(&obj->base); reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter); - i915_gem_request_reference(req); + + for (i = 0; i < I915_NUM_RINGS; i++) { + if (obj->last_read_req[i] == NULL) + continue; + + req[n++] = i915_gem_request_reference(obj->last_read_req[i]); + } + mutex_unlock(&dev->struct_mutex); - ret = __i915_wait_request(req, reset_counter, true, - args->timeout_ns > 0 ? &args->timeout_ns : NULL, - file->driver_priv); - i915_gem_request_unreference__unlocked(req); + for (i = 0; i < n; i++) { + if (ret == 0) + ret = __i915_wait_request(req[i], reset_counter, true, + args->timeout_ns > 0 ? &args->timeout_ns : NULL, + file->driver_priv); + i915_gem_request_unreference__unlocked(req[i]); + } return ret; out: @@ -2983,6 +3061,59 @@ out: return ret; } +static int +__i915_gem_object_sync(struct drm_i915_gem_object *obj, + struct intel_engine_cs *to, + struct drm_i915_gem_request *req) +{ + struct intel_engine_cs *from; + int ret; + + from = i915_gem_request_get_ring(req); + if (to == from) + return 0; + + if (i915_gem_request_completed(req, true)) + return 0; + + ret = i915_gem_check_olr(req); + if (ret) + return ret; + + if (!i915_semaphore_is_enabled(obj->base.dev)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + ret = __i915_wait_request(req, + atomic_read(&i915->gpu_error.reset_counter), + i915->mm.interruptible, + NULL, + &i915->rps.semaphores); + if (ret) + return ret; + + i915_gem_object_retire_request(obj, req); + } else { + int idx = intel_ring_sync_index(from, to); + u32 seqno = i915_gem_request_get_seqno(req); + + if (seqno <= from->semaphore.sync_seqno[idx]) + return 0; + + trace_i915_gem_ring_sync_to(from, to, req); + ret = to->semaphore.sync_to(to, from, seqno); + if (ret) + return ret; + + /* We use last_read_req because sync_to() + * might have just caused seqno wrap under + * the radar. + */ + from->semaphore.sync_seqno[idx] = + i915_gem_request_get_seqno(obj->last_read_req[from->id]); + } + + return 0; +} + /** * i915_gem_object_sync - sync an object to a ring. * @@ -2991,7 +3122,17 @@ out: * * This code is meant to abstract object synchronization with the GPU. * Calling with NULL implies synchronizing the object with the CPU - * rather than a particular GPU ring. + * rather than a particular GPU ring. Conceptually we serialise writes + * between engines inside the GPU. We only allow on engine to write + * into a buffer at any time, but multiple readers. To ensure each has + * a coherent view of memory, we must: + * + * - If there is an outstanding write request to the object, the new + * request must wait for it to complete (either CPU or in hw, requests + * on the same ring will be naturally ordered). + * + * - If we are a write request (pending_write_domain is set), the new + * request must wait for outstanding read requests to complete. * * Returns 0 if successful, else propagates up the lower layer error. */ @@ -2999,41 +3140,32 @@ int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_engine_cs *to) { - struct intel_engine_cs *from; - u32 seqno; - int ret, idx; - - from = i915_gem_request_get_ring(obj->last_read_req); - - if (from == NULL || to == from) - return 0; + const bool readonly = obj->base.pending_write_domain == 0; + struct drm_i915_gem_request *req[I915_NUM_RINGS]; + int ret, i, n; - if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) - return i915_gem_object_wait_rendering(obj, false); - - idx = intel_ring_sync_index(from, to); - - seqno = i915_gem_request_get_seqno(obj->last_read_req); - /* Optimization: Avoid semaphore sync when we are sure we already - * waited for an object with higher seqno */ - if (seqno <= from->semaphore.sync_seqno[idx]) + if (!obj->active) return 0; - ret = i915_gem_check_olr(obj->last_read_req); - if (ret) - return ret; + if (to == NULL) + return i915_gem_object_wait_rendering(obj, readonly); - trace_i915_gem_ring_sync_to(from, to, obj->last_read_req); - ret = to->semaphore.sync_to(to, from, seqno); - if (!ret) - /* We use last_read_req because sync_to() - * might have just caused seqno wrap under - * the radar. - */ - from->semaphore.sync_seqno[idx] = - i915_gem_request_get_seqno(obj->last_read_req); + n = 0; + if (readonly) { + if (obj->last_write_req) + req[n++] = obj->last_write_req; + } else { + for (i = 0; i < I915_NUM_RINGS; i++) + if (obj->last_read_req[i]) + req[n++] = obj->last_read_req[i]; + } + for (i = 0; i < n; i++) { + ret = __i915_gem_object_sync(obj, to, req[i]); + if (ret) + return ret; + } - return ret; + return 0; } static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) @@ -3079,7 +3211,7 @@ int i915_vma_unbind(struct i915_vma *vma) BUG_ON(obj->pages == NULL); - ret = i915_gem_object_finish_gpu(obj); + ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; /* Continue on if we fail due to EIO, the GPU is hung so we @@ -3119,10 +3251,6 @@ int i915_vma_unbind(struct i915_vma *vma) /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ if (list_empty(&obj->vma_list)) { - /* Throw away the active reference before - * moving to the unbound list. */ - i915_gem_object_retire(obj); - i915_gem_gtt_finish_object(obj); list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list); } @@ -3155,6 +3283,7 @@ int i915_gpu_idle(struct drm_device *dev) return ret; } + WARN_ON(i915_verify_lists(dev)); return 0; } @@ -3777,8 +3906,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_retire(obj); - /* Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through * shmemfs and that our cache domain tracking remains valid. @@ -3854,7 +3981,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, } if (i915_gem_obj_bound_any(obj)) { - ret = i915_gem_object_finish_gpu(obj); + ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; @@ -3976,11 +4103,9 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 old_read_domains, old_write_domain; int ret; - if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) { - ret = i915_gem_object_sync(obj, pipelined); - if (ret) - return ret; - } + ret = i915_gem_object_sync(obj, pipelined); + if (ret) + return ret; /* Mark the pin_display early so that we account for the * display coherency whilst setting up the cache domains. @@ -4045,23 +4170,6 @@ i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, obj->pin_display--; } -int -i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) -{ - int ret; - - if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - - /* Ensure that we invalidate the GPU's caches and TLBs. */ - obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; - return 0; -} - /** * Moves a single object to the CPU read, and possibly write domain. * @@ -4081,7 +4189,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (ret) return ret; - i915_gem_object_retire(obj); i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -4132,7 +4239,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_file_private *file_priv = file->driver_priv; - unsigned long recent_enough = jiffies - msecs_to_jiffies(20); + unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_gem_request *request, *target = NULL; unsigned reset_counter; int ret; @@ -4370,15 +4477,15 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * necessary flushes here. */ ret = i915_gem_object_flush_active(obj); + if (ret) + goto unref; - args->busy = obj->active; - if (obj->last_read_req) { - struct intel_engine_cs *ring; - BUILD_BUG_ON(I915_NUM_RINGS > 16); - ring = i915_gem_request_get_ring(obj->last_read_req); - args->busy |= intel_ring_flag(ring) << 16; - } + BUILD_BUG_ON(I915_NUM_RINGS > 16); + args->busy = obj->active << 16; + if (obj->last_write_req) + args->busy |= obj->last_write_req->ring->id; +unref: drm_gem_object_unreference(&obj->base); unlock: mutex_unlock(&dev->struct_mutex); @@ -4452,8 +4559,11 @@ unlock: void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops) { + int i; + INIT_LIST_HEAD(&obj->global_list); - INIT_LIST_HEAD(&obj->ring_list); + for (i = 0; i < I915_NUM_RINGS; i++) + INIT_LIST_HEAD(&obj->ring_list[i]); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -5112,10 +5222,10 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) } spin_unlock(&file_priv->mm.lock); - if (!list_empty(&file_priv->rps_boost)) { - mutex_lock(&to_i915(dev)->rps.hw_lock); - list_del(&file_priv->rps_boost); - mutex_unlock(&to_i915(dev)->rps.hw_lock); + if (!list_empty(&file_priv->rps.link)) { + spin_lock(&to_i915(dev)->rps.client_lock); + list_del(&file_priv->rps.link); + spin_unlock(&to_i915(dev)->rps.client_lock); } } @@ -5133,7 +5243,7 @@ int i915_gem_open(struct drm_device *dev, struct drm_file *file) file->driver_priv = file_priv; file_priv->dev_priv = dev->dev_private; file_priv->file = file; - INIT_LIST_HEAD(&file_priv->rps_boost); + INIT_LIST_HEAD(&file_priv->rps.link); spin_lock_init(&file_priv->mm.lock); INIT_LIST_HEAD(&file_priv->mm.request_list); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5a47eb5e3c5d..8867818b1401 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -753,8 +753,6 @@ static int do_switch(struct intel_engine_cs *ring, * swapped, but there is no way to do that yet. */ from->legacy_hw_ctx.rcs_state->dirty = 1; - BUG_ON(i915_gem_request_get_ring( - from->legacy_hw_ctx.rcs_state->last_read_req) != ring); /* obj is kept alive until the next request by its active ref */ i915_gem_object_ggtt_unpin(from->legacy_hw_ctx.rcs_state); diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c index f462d1b51d97..17299d04189f 100644 --- a/drivers/gpu/drm/i915/i915_gem_debug.c +++ b/drivers/gpu/drm/i915/i915_gem_debug.c @@ -34,82 +34,34 @@ int i915_verify_lists(struct drm_device *dev) { static int warned; - struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; + struct intel_engine_cs *ring; int err = 0; + int i; if (warned) return 0; - list_for_each_entry(obj, &dev_priv->render_ring.active_list, list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed render active %p\n", obj); - err++; - break; - } else if (!obj->active || - (obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) { - DRM_ERROR("invalid render active %p (a %d r %x)\n", - obj, - obj->active, - obj->base.read_domains); - err++; - } else if (obj->base.write_domain && list_empty(&obj->gpu_write_list)) { - DRM_ERROR("invalid render active %p (w %x, gwl %d)\n", - obj, - obj->base.write_domain, - !list_empty(&obj->gpu_write_list)); - err++; - } - } - - list_for_each_entry(obj, &dev_priv->mm.flushing_list, list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed flushing %p\n", obj); - err++; - break; - } else if (!obj->active || - (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0 || - list_empty(&obj->gpu_write_list)) { - DRM_ERROR("invalid flushing %p (a %d w %x gwl %d)\n", - obj, - obj->active, - obj->base.write_domain, - !list_empty(&obj->gpu_write_list)); - err++; - } - } - - list_for_each_entry(obj, &dev_priv->mm.gpu_write_list, gpu_write_list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed gpu write %p\n", obj); - err++; - break; - } else if (!obj->active || - (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) { - DRM_ERROR("invalid gpu write %p (a %d w %x)\n", - obj, - obj->active, - obj->base.write_domain); - err++; - } - } - - list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) { - if (obj->base.dev != dev || - !atomic_read(&obj->base.refcount.refcount)) { - DRM_ERROR("freed inactive %p\n", obj); - err++; - break; - } else if (obj->pin_count || obj->active || - (obj->base.write_domain & I915_GEM_GPU_DOMAINS)) { - DRM_ERROR("invalid inactive %p (p %d a %d w %x)\n", - obj, - obj->pin_count, obj->active, - obj->base.write_domain); - err++; + for_each_ring(ring, dev_priv, i) { + list_for_each_entry(obj, &ring->active_list, ring_list[ring->id]) { + if (obj->base.dev != dev || + !atomic_read(&obj->base.refcount.refcount)) { + DRM_ERROR("%s: freed active obj %p\n", + ring->name, obj); + err++; + break; + } else if (!obj->active || + obj->last_read_req[ring->id] == NULL) { + DRM_ERROR("%s: invalid active obj %p\n", + ring->name, obj); + err++; + } else if (obj->base.write_domain) { + DRM_ERROR("%s: invalid write obj %p (w %x)\n", + ring->name, + obj, obj->base.write_domain); + err++; + } } } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 560c79a8a43d..bd0e4bda2c64 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -889,6 +889,7 @@ static int i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, struct list_head *vmas) { + const unsigned other_rings = ~intel_ring_flag(ring); struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; @@ -896,9 +897,12 @@ i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - ret = i915_gem_object_sync(obj, ring); - if (ret) - return ret; + + if (obj->active & other_rings) { + ret = i915_gem_object_sync(obj, ring); + if (ret) + return ret; + } if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) flush_chipset |= i915_gem_clflush_object(obj, false); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e3bcc3ba7e40..619dad1b2386 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -757,7 +757,7 @@ static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt, WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); /* FIXME: upper bound must not overflow 32 bits */ - WARN_ON((start + length) >= (1ULL << 32)); + WARN_ON((start + length) > (1ULL << 32)); gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) { if (pd) @@ -952,6 +952,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start = 0; ppgtt->base.total = 1ULL << 32; + if (IS_ENABLED(CONFIG_X86_32)) + /* While we have a proliferation of size_t variables + * we cannot represent the full ppgtt size on 32bit, + * so limit it to the same size as the GGTT (currently + * 2GiB). + */ + ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.allocate_va_range = gen8_alloc_va_range; ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 4039ede158be..1f4e5a32a16e 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -219,11 +219,14 @@ i915_mmu_notifier_add(struct drm_device *dev, struct i915_mmu_object *mo) { struct interval_tree_node *it; - int ret; + int ret = 0; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + /* By this point we have already done a lot of expensive setup that + * we do not want to repeat just because the caller (e.g. X) has a + * signal pending (and partly because of that expensive setup, X + * using an interrupt timer is likely to get stuck in an EINTR loop). + */ + mutex_lock(&dev->struct_mutex); /* Make sure we drop the final active reference (and thereby * remove the objects from the interval tree) before we do diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a3e330d2a1d8..6f4256918f76 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -192,15 +192,20 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, struct drm_i915_error_buffer *err, int count) { + int i; + err_printf(m, " %s [%d]:\n", name, count); while (count--) { - err_printf(m, " %08x %8u %02x %02x %x %x", + err_printf(m, " %08x %8u %02x %02x [ ", err->gtt_offset, err->size, err->read_domains, - err->write_domain, - err->rseqno, err->wseqno); + err->write_domain); + for (i = 0; i < I915_NUM_RINGS; i++) + err_printf(m, "%02x ", err->rseqno[i]); + + err_printf(m, "] %02x", err->wseqno); err_puts(m, pin_flag(err->pinned)); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); @@ -681,10 +686,12 @@ static void capture_bo(struct drm_i915_error_buffer *err, struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; + int i; err->size = obj->base.size; err->name = obj->base.name; - err->rseqno = i915_gem_request_get_seqno(obj->last_read_req); + for (i = 0; i < I915_NUM_RINGS; i++) + err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]); err->wseqno = i915_gem_request_get_seqno(obj->last_write_req); err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; @@ -697,8 +704,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->ring = obj->last_read_req ? - i915_gem_request_get_ring(obj->last_read_req)->id : -1; + err->ring = obj->last_write_req ? + i915_gem_request_get_ring(obj->last_write_req)->id : -1; err->cache_level = obj->cache_level; } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9da955e4f355..e6bb72dca3ff 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -79,7 +79,7 @@ static const u32 hpd_status_g4x[HPD_NUM_PINS] = { [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS }; -static const u32 hpd_status_i915[HPD_NUM_PINS] = { /* i915 and valleyview are the same */ +static const u32 hpd_status_i915[HPD_NUM_PINS] = { [HPD_CRT] = CRT_HOTPLUG_INT_STATUS, [HPD_SDVO_B] = SDVOB_HOTPLUG_INT_STATUS_I915, [HPD_SDVO_C] = SDVOC_HOTPLUG_INT_STATUS_I915, @@ -1070,12 +1070,25 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir) return events; } +static bool any_waiters(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *ring; + int i; + + for_each_ring(ring, dev_priv, i) + if (ring->irq_refcount) + return true; + + return false; +} + static void gen6_pm_rps_work(struct work_struct *work) { struct drm_i915_private *dev_priv = container_of(work, struct drm_i915_private, rps.work); + bool client_boost; + int new_delay, adj, min, max; u32 pm_iir; - int new_delay, adj; spin_lock_irq(&dev_priv->irq_lock); /* Speed up work cancelation during disabling rps interrupts. */ @@ -1087,12 +1100,14 @@ static void gen6_pm_rps_work(struct work_struct *work) dev_priv->rps.pm_iir = 0; /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + client_boost = dev_priv->rps.client_boost; + dev_priv->rps.client_boost = false; spin_unlock_irq(&dev_priv->irq_lock); /* Make sure we didn't queue anything we're not going to process. */ WARN_ON(pm_iir & ~dev_priv->pm_rps_events); - if ((pm_iir & dev_priv->pm_rps_events) == 0) + if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost) return; mutex_lock(&dev_priv->rps.hw_lock); @@ -1101,7 +1116,13 @@ static void gen6_pm_rps_work(struct work_struct *work) adj = dev_priv->rps.last_adj; new_delay = dev_priv->rps.cur_freq; - if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { + min = dev_priv->rps.min_freq_softlimit; + max = dev_priv->rps.max_freq_softlimit; + + if (client_boost) { + new_delay = dev_priv->rps.max_freq_softlimit; + adj = 0; + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) { if (adj > 0) adj *= 2; else /* CHV needs even encode values */ @@ -1114,6 +1135,8 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay = dev_priv->rps.efficient_freq; adj = 0; } + } else if (any_waiters(dev_priv)) { + adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) new_delay = dev_priv->rps.efficient_freq; @@ -1135,9 +1158,7 @@ static void gen6_pm_rps_work(struct work_struct *work) * interrupt */ new_delay += adj; - new_delay = clamp_t(int, new_delay, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit); + new_delay = clamp_t(int, new_delay, min, max); intel_set_rps(dev_priv->dev, new_delay); @@ -1386,7 +1407,7 @@ static int i915_port_to_hotplug_shift(enum port port) } } -static inline enum port get_port_from_pin(enum hpd_pin pin) +static enum port get_port_from_pin(enum hpd_pin pin) { switch (pin) { case HPD_PORT_B: @@ -1400,10 +1421,10 @@ static inline enum port get_port_from_pin(enum hpd_pin pin) } } -static inline void intel_hpd_irq_handler(struct drm_device *dev, - u32 hotplug_trigger, - u32 dig_hotplug_reg, - const u32 hpd[HPD_NUM_PINS]) +static void intel_hpd_irq_handler(struct drm_device *dev, + u32 hotplug_trigger, + u32 dig_hotplug_reg, + const u32 hpd[HPD_NUM_PINS]) { struct drm_i915_private *dev_priv = dev->dev_private; int i; @@ -1743,7 +1764,7 @@ static void i9xx_hpd_irq_handler(struct drm_device *dev) */ POSTING_READ(PORT_HOTPLUG_STAT); - if (IS_G4X(dev)) { + if (IS_G4X(dev) || IS_VALLEYVIEW(dev)) { u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X; intel_hpd_irq_handler(dev, hotplug_trigger, 0, hpd_status_g4x); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 58627a319416..6d3fead3a358 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -595,10 +595,6 @@ enum punit_power_well { PUNIT_POWER_WELL_DPIO_RX0 = 10, PUNIT_POWER_WELL_DPIO_RX1 = 11, PUNIT_POWER_WELL_DPIO_CMN_D = 12, - /* FIXME: guesswork below */ - PUNIT_POWER_WELL_DPIO_TX_D_LANES_01 = 13, - PUNIT_POWER_WELL_DPIO_TX_D_LANES_23 = 14, - PUNIT_POWER_WELL_DPIO_RX2 = 15, PUNIT_POWER_WELL_NUM, }; @@ -1204,6 +1200,12 @@ enum skl_disp_power_wells { #define PORT_PLL_GAIN_CTL(x) ((x) << 16) /* PORT_PLL_8_A */ #define PORT_PLL_TARGET_CNT_MASK 0x3FF +/* PORT_PLL_9_A */ +#define PORT_PLL_LOCK_THRESHOLD_MASK 0xe +/* PORT_PLL_10_A */ +#define PORT_PLL_DCO_AMP_OVR_EN_H (1<<27) +#define PORT_PLL_DCO_AMP_MASK 0x3c00 +#define PORT_PLL_DCO_AMP(x) (x<<10) #define _PORT_PLL_BASE(port) _PORT3(port, _PORT_PLL_0_A, \ _PORT_PLL_0_B, \ _PORT_PLL_0_C) @@ -1455,6 +1457,8 @@ enum skl_disp_power_wells { #define RING_HWS_PGA(base) ((base)+0x80) #define RING_HWS_PGA_GEN6(base) ((base)+0x2080) +#define HSW_GTT_CACHE_EN 0x4024 +#define GTT_CACHE_EN_ALL 0xF0007FFF #define GEN7_WR_WATERMARK 0x4028 #define GEN7_GFX_PRIO_CTRL 0x402C #define ARB_MODE 0x4030 @@ -2137,6 +2141,10 @@ enum skl_disp_power_wells { #define DPIO_PHY_STATUS (VLV_DISPLAY_BASE + 0x6240) #define DPLL_PORTD_READY_MASK (0xf) #define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100) +#define PHY_LDO_DELAY_0NS 0x0 +#define PHY_LDO_DELAY_200NS 0x1 +#define PHY_LDO_DELAY_600NS 0x2 +#define PHY_LDO_SEQ_DELAY(delay, phy) ((delay) << (2*(phy)+23)) #define PHY_CH_SU_PSR 0x1 #define PHY_CH_DEEP_PSR 0x7 #define PHY_CH_POWER_MODE(mode, phy, ch) ((mode) << (6*(phy)+3*(ch)+2)) @@ -5167,6 +5175,8 @@ enum skl_disp_power_wells { #define _PLANE_KEYMAX_2_A 0x702a0 #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c +#define _PLANE_NV12_BUF_CFG_1_A 0x70278 +#define _PLANE_NV12_BUF_CFG_2_A 0x70378 #define _PLANE_CTL_1_B 0x71180 #define _PLANE_CTL_2_B 0x71280 @@ -5253,6 +5263,15 @@ enum skl_disp_power_wells { #define PLANE_BUF_CFG(pipe, plane) \ _PLANE(plane, _PLANE_BUF_CFG_1(pipe), _PLANE_BUF_CFG_2(pipe)) +#define _PLANE_NV12_BUF_CFG_1_B 0x71278 +#define _PLANE_NV12_BUF_CFG_2_B 0x71378 +#define _PLANE_NV12_BUF_CFG_1(pipe) \ + _PIPE(pipe, _PLANE_NV12_BUF_CFG_1_A, _PLANE_NV12_BUF_CFG_1_B) +#define _PLANE_NV12_BUF_CFG_2(pipe) \ + _PIPE(pipe, _PLANE_NV12_BUF_CFG_2_A, _PLANE_NV12_BUF_CFG_2_B) +#define PLANE_NV12_BUF_CFG(pipe, plane) \ + _PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe)) + /* SKL new cursor registers */ #define _CUR_BUF_CFG_A 0x7017c #define _CUR_BUF_CFG_B 0x7117c @@ -5774,6 +5793,7 @@ enum skl_disp_power_wells { /* GEN8 chicken */ #define HDC_CHICKEN0 0x7300 +#define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) #define HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT (1<<5) @@ -6422,6 +6442,7 @@ enum skl_disp_power_wells { #define TRANS_DP_PORT_SEL_D (2<<29) #define TRANS_DP_PORT_SEL_NONE (3<<29) #define TRANS_DP_PORT_SEL_MASK (3<<29) +#define TRANS_DP_PIPE_TO_PORT(val) ((((val) & TRANS_DP_PORT_SEL_MASK) >> 29) + PORT_B) #define TRANS_DP_AUDIO_ONLY (1<<26) #define TRANS_DP_ENH_FRAMING (1<<18) #define TRANS_DP_8BPC (0<<9) @@ -6681,6 +6702,9 @@ enum skl_disp_power_wells { #define GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT 8 #define GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT 16 #define GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT 24 +#define SKL_PCODE_CDCLK_CONTROL 0x7 +#define SKL_CDCLK_PREPARE_FOR_CHANGE 0x3 +#define SKL_CDCLK_READY_FOR_CHANGE 0x1 #define GEN6_PCODE_WRITE_MIN_FREQ_TABLE 0x8 #define GEN6_PCODE_READ_MIN_FREQ_TABLE 0x9 #define GEN6_READ_OC_PARAMS 0xc diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index cee596d0a6a2..198fc3c3291b 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -36,10 +36,11 @@ static int panel_type; -static void * -find_section(struct bdb_header *bdb, int section_id) +static const void * +find_section(const void *_bdb, int section_id) { - u8 *base = (u8 *)bdb; + const struct bdb_header *bdb = _bdb; + const u8 *base = _bdb; int index = 0; u16 total, current_size; u8 current_id; @@ -53,7 +54,7 @@ find_section(struct bdb_header *bdb, int section_id) current_id = *(base + index); index++; - current_size = *((u16 *)(base + index)); + current_size = *((const u16 *)(base + index)); index += 2; if (index + current_size > total) @@ -69,7 +70,7 @@ find_section(struct bdb_header *bdb, int section_id) } static u16 -get_blocksize(void *p) +get_blocksize(const void *p) { u16 *block_ptr, block_size; @@ -204,7 +205,7 @@ get_lvds_fp_timing(const struct bdb_header *bdb, /* Try to find integrated panel data */ static void parse_lfp_panel_data(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { const struct bdb_lvds_options *lvds_options; const struct bdb_lvds_lfp_data *lvds_lfp_data; @@ -310,7 +311,8 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv, } static void -parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb) +parse_lfp_backlight(struct drm_i915_private *dev_priv, + const struct bdb_header *bdb) { const struct bdb_lfp_backlight_data *backlight_data; const struct bdb_lfp_backlight_data_entry *entry; @@ -348,9 +350,9 @@ parse_lfp_backlight(struct drm_i915_private *dev_priv, struct bdb_header *bdb) /* Try to find sdvo panel data */ static void parse_sdvo_panel_data(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { - struct lvds_dvo_timing *dvo_timing; + const struct lvds_dvo_timing *dvo_timing; struct drm_display_mode *panel_fixed_mode; int index; @@ -361,7 +363,7 @@ parse_sdvo_panel_data(struct drm_i915_private *dev_priv, } if (index == -1) { - struct bdb_sdvo_lvds_options *sdvo_lvds_options; + const struct bdb_sdvo_lvds_options *sdvo_lvds_options; sdvo_lvds_options = find_section(bdb, BDB_SDVO_LVDS_OPTIONS); if (!sdvo_lvds_options) @@ -402,10 +404,10 @@ static int intel_bios_ssc_frequency(struct drm_device *dev, static void parse_general_features(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { struct drm_device *dev = dev_priv->dev; - struct bdb_general_features *general; + const struct bdb_general_features *general; general = find_section(bdb, BDB_GENERAL_FEATURES); if (general) { @@ -428,9 +430,9 @@ parse_general_features(struct drm_i915_private *dev_priv, static void parse_general_definitions(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { - struct bdb_general_definitions *general; + const struct bdb_general_definitions *general; general = find_section(bdb, BDB_GENERAL_DEFINITIONS); if (general) { @@ -447,19 +449,19 @@ parse_general_definitions(struct drm_i915_private *dev_priv, } } -static union child_device_config * -child_device_ptr(struct bdb_general_definitions *p_defs, int i) +static const union child_device_config * +child_device_ptr(const struct bdb_general_definitions *p_defs, int i) { - return (void *) &p_defs->devices[i * p_defs->child_dev_size]; + return (const void *) &p_defs->devices[i * p_defs->child_dev_size]; } static void parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { struct sdvo_device_mapping *p_mapping; - struct bdb_general_definitions *p_defs; - union child_device_config *p_child; + const struct bdb_general_definitions *p_defs; + const union child_device_config *p_child; int i, child_device_num, count; u16 block_size; @@ -545,9 +547,9 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv, static void parse_driver_features(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { - struct bdb_driver_features *driver; + const struct bdb_driver_features *driver; driver = find_section(bdb, BDB_DRIVER_FEATURES); if (!driver) @@ -571,11 +573,11 @@ parse_driver_features(struct drm_i915_private *dev_priv, } static void -parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb) +parse_edp(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { - struct bdb_edp *edp; - struct edp_power_seq *edp_pps; - struct edp_link_params *edp_link_params; + const struct bdb_edp *edp; + const struct edp_power_seq *edp_pps; + const struct edp_link_params *edp_link_params; edp = find_section(bdb, BDB_EDP); if (!edp) { @@ -683,10 +685,10 @@ parse_edp(struct drm_i915_private *dev_priv, struct bdb_header *bdb) } static void -parse_psr(struct drm_i915_private *dev_priv, struct bdb_header *bdb) +parse_psr(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { - struct bdb_psr *psr; - struct psr_table *psr_table; + const struct bdb_psr *psr; + const struct psr_table *psr_table; psr = find_section(bdb, BDB_PSR); if (!psr) { @@ -794,13 +796,14 @@ static u8 *goto_next_sequence(u8 *data, int *size) } static void -parse_mipi(struct drm_i915_private *dev_priv, struct bdb_header *bdb) +parse_mipi(struct drm_i915_private *dev_priv, const struct bdb_header *bdb) { - struct bdb_mipi_config *start; - struct bdb_mipi_sequence *sequence; - struct mipi_config *config; - struct mipi_pps_data *pps; - u8 *data, *seq_data; + const struct bdb_mipi_config *start; + const struct bdb_mipi_sequence *sequence; + const struct mipi_config *config; + const struct mipi_pps_data *pps; + u8 *data; + const u8 *seq_data; int i, panel_id, seq_size; u16 block_size; @@ -944,7 +947,7 @@ err: } static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, - struct bdb_header *bdb) + const struct bdb_header *bdb) { union child_device_config *it, *child = NULL; struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; @@ -1046,7 +1049,7 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, } static void parse_ddi_ports(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { struct drm_device *dev = dev_priv->dev; enum port port; @@ -1066,10 +1069,11 @@ static void parse_ddi_ports(struct drm_i915_private *dev_priv, static void parse_device_mapping(struct drm_i915_private *dev_priv, - struct bdb_header *bdb) + const struct bdb_header *bdb) { - struct bdb_general_definitions *p_defs; - union child_device_config *p_child, *child_dev_ptr; + const struct bdb_general_definitions *p_defs; + const union child_device_config *p_child; + union child_device_config *child_dev_ptr; int i, child_device_num, count; u16 block_size; @@ -1126,8 +1130,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv, child_dev_ptr = dev_priv->vbt.child_dev + count; count++; - memcpy((void *)child_dev_ptr, (void *)p_child, - sizeof(*p_child)); + memcpy(child_dev_ptr, p_child, sizeof(*p_child)); } return; } @@ -1196,19 +1199,22 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = { { } }; -static struct bdb_header *validate_vbt(char *base, size_t size, - struct vbt_header *vbt, - const char *source) +static const struct bdb_header *validate_vbt(const void __iomem *_base, + size_t size, + const void __iomem *_vbt, + const char *source) { - size_t offset; - struct bdb_header *bdb; - - if (vbt == NULL) { - DRM_DEBUG_DRIVER("VBT signature missing\n"); - return NULL; - } + /* + * This is the one place where we explicitly discard the address space + * (__iomem) of the BIOS/VBT. (And this will cause a sparse complaint.) + * From now on everything is based on 'base', and treated as regular + * memory. + */ + const void *base = (const void *) _base; + size_t offset = _vbt - _base; + const struct vbt_header *vbt = base + offset; + const struct bdb_header *bdb; - offset = (char *)vbt - base; if (offset + sizeof(struct vbt_header) > size) { DRM_DEBUG_DRIVER("VBT header incomplete\n"); return NULL; @@ -1225,7 +1231,7 @@ static struct bdb_header *validate_vbt(char *base, size_t size, return NULL; } - bdb = (struct bdb_header *)(base + offset); + bdb = base + offset; if (offset + bdb->bdb_size > size) { DRM_DEBUG_DRIVER("BDB incomplete\n"); return NULL; @@ -1236,6 +1242,22 @@ static struct bdb_header *validate_vbt(char *base, size_t size, return bdb; } +static const struct bdb_header *find_vbt(void __iomem *bios, size_t size) +{ + const struct bdb_header *bdb = NULL; + size_t i; + + /* Scour memory looking for the VBT signature. */ + for (i = 0; i + 4 < size; i++) { + if (ioread32(bios + i) == *((const u32 *) "$VBT")) { + bdb = validate_vbt(bios, size, bios + i, "PCI ROM"); + break; + } + } + + return bdb; +} + /** * intel_parse_bios - find VBT and initialize settings from the BIOS * @dev: DRM device @@ -1250,7 +1272,7 @@ intel_parse_bios(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct pci_dev *pdev = dev->pdev; - struct bdb_header *bdb = NULL; + const struct bdb_header *bdb = NULL; u8 __iomem *bios = NULL; if (HAS_PCH_NOP(dev)) @@ -1260,27 +1282,17 @@ intel_parse_bios(struct drm_device *dev) /* XXX Should this validation be moved to intel_opregion.c? */ if (!dmi_check_system(intel_no_opregion_vbt) && dev_priv->opregion.vbt) - bdb = validate_vbt((char *)dev_priv->opregion.header, OPREGION_SIZE, - (struct vbt_header *)dev_priv->opregion.vbt, - "OpRegion"); + bdb = validate_vbt(dev_priv->opregion.header, OPREGION_SIZE, + dev_priv->opregion.vbt, "OpRegion"); if (bdb == NULL) { - size_t i, size; + size_t size; bios = pci_map_rom(pdev, &size); if (!bios) return -1; - /* Scour memory looking for the VBT signature */ - for (i = 0; i + 4 < size; i++) { - if (memcmp(bios + i, "$VBT", 4) == 0) { - bdb = validate_vbt(bios, size, - (struct vbt_header *)(bios + i), - "PCI ROM"); - break; - } - } - + bdb = find_vbt(bios, size); if (!bdb) { pci_unmap_rom(pdev, bios); return -1; diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 93bb5159d093..521af2c069cb 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -207,6 +207,14 @@ static void intel_disable_crt(struct intel_encoder *encoder) intel_crt_set_dpms(encoder, DRM_MODE_DPMS_OFF); } +static void pch_disable_crt(struct intel_encoder *encoder) +{ +} + +static void pch_post_disable_crt(struct intel_encoder *encoder) +{ + intel_disable_crt(encoder); +} static void hsw_crt_post_disable(struct intel_encoder *encoder) { @@ -888,7 +896,12 @@ void intel_crt_init(struct drm_device *dev) crt->adpa_reg = ADPA; crt->base.compute_config = intel_crt_compute_config; - crt->base.disable = intel_disable_crt; + if (HAS_PCH_SPLIT(dev) && !HAS_DDI(dev)) { + crt->base.disable = pch_disable_crt; + crt->base.post_disable = pch_post_disable_crt; + } else { + crt->base.disable = intel_disable_crt; + } crt->base.enable = intel_enable_crt; if (I915_HAS_HOTPLUG(dev)) crt->base.hpd_pin = HPD_CRT; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 9311cddb86e6..5cb8cc18994a 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -25,6 +25,22 @@ #include "i915_drv.h" #include "i915_reg.h" +/** + * DOC: csr support for dmc + * + * Display Context Save and Restore (CSR) firmware support added from gen9 + * onwards to drive newly added DMC (Display microcontroller) in display + * engine to save and restore the state of display engine when it enter into + * low-power state and comes back to normal. + * + * Firmware loading status will be one of the below states: FW_UNINITIALIZED, + * FW_LOADED, FW_FAILED. + * + * Once the firmware is written into the registers status will be moved from + * FW_UNINITIALIZED to FW_LOADED and for any erroneous condition status will + * be moved to FW_FAILED. + */ + #define I915_CSR_SKL "i915/skl_dmc_ver4.bin" MODULE_FIRMWARE(I915_CSR_SKL); @@ -183,6 +199,14 @@ static char intel_get_substepping(struct drm_device *dev) return -ENODATA; } +/** + * intel_csr_load_status_get() - to get firmware loading status. + * @dev_priv: i915 device. + * + * This function helps to get the firmware loading status. + * + * Return: Firmware loading status. + */ enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv) { enum csr_state state; @@ -194,6 +218,13 @@ enum csr_state intel_csr_load_status_get(struct drm_i915_private *dev_priv) return state; } +/** + * intel_csr_load_status_set() - help to set firmware loading status. + * @dev_priv: i915 device. + * @state: enumeration of firmware loading status. + * + * Set the firmware loading status. + */ void intel_csr_load_status_set(struct drm_i915_private *dev_priv, enum csr_state state) { @@ -202,6 +233,14 @@ void intel_csr_load_status_set(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->csr_lock); } +/** + * intel_csr_load_program() - write the firmware from memory to register. + * @dev: drm device. + * + * CSR firmware is read from a .bin file and kept in internal memory one time. + * Everytime display comes back from low power state this function is called to + * copy the firmware from internal memory to registers. + */ void intel_csr_load_program(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -359,6 +398,13 @@ out: release_firmware(fw); } +/** + * intel_csr_ucode_init() - initialize the firmware loading. + * @dev: drm device. + * + * This function is called at the time of loading the display driver to read + * firmware from a .bin file and copied into a internal memory. + */ void intel_csr_ucode_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -393,6 +439,13 @@ void intel_csr_ucode_init(struct drm_device *dev) } } +/** + * intel_csr_ucode_fini() - unload the CSR firmware. + * @dev: drm device. + * + * Firmmware unloading includes freeing the internal momory and reset the + * firmware loading status. + */ void intel_csr_ucode_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 807e15d41a1b..cacb07b7a8f1 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1087,6 +1087,9 @@ hsw_ddi_pll_select(struct intel_crtc *intel_crtc, WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) | WRPLL_DIVIDER_POST(p); + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + crtc_state->dpll_hw_state.wrpll = val; pll = intel_get_shared_dpll(intel_crtc, crtc_state); @@ -1309,6 +1312,9 @@ skl_ddi_pll_select(struct intel_crtc *intel_crtc, } else /* eDP */ return true; + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + crtc_state->dpll_hw_state.ctrl1 = ctrl1; crtc_state->dpll_hw_state.cfgcr1 = cfgcr1; crtc_state->dpll_hw_state.cfgcr2 = cfgcr2; @@ -1334,22 +1340,17 @@ struct bxt_clk_div { uint32_t m2_frac; bool m2_frac_en; uint32_t n; - uint32_t prop_coef; - uint32_t int_coef; - uint32_t gain_ctl; - uint32_t targ_cnt; - uint32_t lanestagger; }; /* pre-calculated values for DP linkrates */ static struct bxt_clk_div bxt_dp_clk_val[7] = { - /* 162 */ {4, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd}, - /* 270 */ {4, 1, 27, 0, 0, 1, 3, 8, 1, 9, 0xd}, - /* 540 */ {2, 1, 27, 0, 0, 1, 3, 8, 1, 9, 0x18}, - /* 216 */ {3, 2, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd}, - /* 243 */ {4, 1, 24, 1258291, 1, 1, 5, 11, 2, 9, 0xd}, - /* 324 */ {4, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0xd}, - /* 432 */ {3, 1, 32, 1677722, 1, 1, 5, 11, 2, 9, 0x18} + /* 162 */ {4, 2, 32, 1677722, 1, 1}, + /* 270 */ {4, 1, 27, 0, 0, 1}, + /* 540 */ {2, 1, 27, 0, 0, 1}, + /* 216 */ {3, 2, 32, 1677722, 1, 1}, + /* 243 */ {4, 1, 24, 1258291, 1, 1}, + /* 324 */ {4, 1, 32, 1677722, 1, 1}, + /* 432 */ {3, 1, 32, 1677722, 1, 1} }; static bool @@ -1360,6 +1361,9 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc, { struct intel_shared_dpll *pll; struct bxt_clk_div clk_div = {0}; + int vco = 0; + uint32_t prop_coef, int_coef, gain_ctl, targ_cnt; + uint32_t dcoampovr_en_h, dco_amp, lanestagger; if (intel_encoder->type == INTEL_OUTPUT_HDMI) { intel_clock_t best_clock; @@ -1383,21 +1387,7 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc, clk_div.m2_frac = best_clock.m2 & ((1 << 22) - 1); clk_div.m2_frac_en = clk_div.m2_frac != 0; - /* FIXME: set coef, gain, targcnt based on freq band */ - clk_div.prop_coef = 5; - clk_div.int_coef = 11; - clk_div.gain_ctl = 2; - clk_div.targ_cnt = 9; - if (clock > 270000) - clk_div.lanestagger = 0x18; - else if (clock > 135000) - clk_div.lanestagger = 0x0d; - else if (clock > 67000) - clk_div.lanestagger = 0x07; - else if (clock > 33000) - clk_div.lanestagger = 0x04; - else - clk_div.lanestagger = 0x02; + vco = best_clock.vco; } else if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT || intel_encoder->type == INTEL_OUTPUT_EDP) { struct drm_encoder *encoder = &intel_encoder->base; @@ -1417,8 +1407,48 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc, clk_div = bxt_dp_clk_val[0]; DRM_ERROR("Unknown link rate\n"); } + vco = clock * 10 / 2 * clk_div.p1 * clk_div.p2; + } + + dco_amp = 15; + dcoampovr_en_h = 0; + if (vco >= 6200000 && vco <= 6480000) { + prop_coef = 4; + int_coef = 9; + gain_ctl = 3; + targ_cnt = 8; + } else if ((vco > 5400000 && vco < 6200000) || + (vco >= 4800000 && vco < 5400000)) { + prop_coef = 5; + int_coef = 11; + gain_ctl = 3; + targ_cnt = 9; + if (vco >= 4800000 && vco < 5400000) + dcoampovr_en_h = 1; + } else if (vco == 5400000) { + prop_coef = 3; + int_coef = 8; + gain_ctl = 1; + targ_cnt = 9; + } else { + DRM_ERROR("Invalid VCO\n"); + return false; } + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + + if (clock > 270000) + lanestagger = 0x18; + else if (clock > 135000) + lanestagger = 0x0d; + else if (clock > 67000) + lanestagger = 0x07; + else if (clock > 33000) + lanestagger = 0x04; + else + lanestagger = 0x02; + crtc_state->dpll_hw_state.ebb0 = PORT_PLL_P1(clk_div.p1) | PORT_PLL_P2(clk_div.p2); crtc_state->dpll_hw_state.pll0 = clk_div.m2_int; @@ -1430,14 +1460,19 @@ bxt_ddi_pll_select(struct intel_crtc *intel_crtc, PORT_PLL_M2_FRAC_ENABLE; crtc_state->dpll_hw_state.pll6 = - clk_div.prop_coef | PORT_PLL_INT_COEFF(clk_div.int_coef); + prop_coef | PORT_PLL_INT_COEFF(int_coef); crtc_state->dpll_hw_state.pll6 |= - PORT_PLL_GAIN_CTL(clk_div.gain_ctl); + PORT_PLL_GAIN_CTL(gain_ctl); + + crtc_state->dpll_hw_state.pll8 = targ_cnt; - crtc_state->dpll_hw_state.pll8 = clk_div.targ_cnt; + if (dcoampovr_en_h) + crtc_state->dpll_hw_state.pll10 = PORT_PLL_DCO_AMP_OVR_EN_H; + + crtc_state->dpll_hw_state.pll10 |= PORT_PLL_DCO_AMP(dco_amp); crtc_state->dpll_hw_state.pcsdw12 = - LANESTAGGER_STRAP_OVRD | clk_div.lanestagger; + LANESTAGGER_STRAP_OVRD | lanestagger; pll = intel_get_shared_dpll(intel_crtc, crtc_state); if (pll == NULL) { @@ -2367,10 +2402,16 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, temp |= pll->config.hw_state.pll8; I915_WRITE(BXT_PORT_PLL(port, 8), temp); - /* - * FIXME: program PORT_PLL_9/i_lockthresh according to the latest - * specification update. - */ + temp = I915_READ(BXT_PORT_PLL(port, 9)); + temp &= ~PORT_PLL_LOCK_THRESHOLD_MASK; + temp |= (5 << 1); + I915_WRITE(BXT_PORT_PLL(port, 9), temp); + + temp = I915_READ(BXT_PORT_PLL(port, 10)); + temp &= ~PORT_PLL_DCO_AMP_OVR_EN_H; + temp &= ~PORT_PLL_DCO_AMP_MASK; + temp |= pll->config.hw_state.pll10; + I915_WRITE(BXT_PORT_PLL(port, 10), temp); /* Recalibrate with new settings */ temp = I915_READ(BXT_PORT_PLL_EBB_4(port)); @@ -2434,6 +2475,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, hw_state->pll3 = I915_READ(BXT_PORT_PLL(port, 3)); hw_state->pll6 = I915_READ(BXT_PORT_PLL(port, 6)); hw_state->pll8 = I915_READ(BXT_PORT_PLL(port, 8)); + hw_state->pll10 = I915_READ(BXT_PORT_PLL(port, 10)); /* * While we write to the group register to program all lanes at once we * can read only lane registers. We configure all lanes the same way, so @@ -2468,6 +2510,7 @@ void intel_ddi_pll_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t val = I915_READ(LCPLL_CTL); + int cdclk_freq; if (IS_SKYLAKE(dev)) skl_shared_dplls_init(dev_priv); @@ -2476,12 +2519,15 @@ void intel_ddi_pll_init(struct drm_device *dev) else hsw_shared_dplls_init(dev_priv); - DRM_DEBUG_KMS("CDCLK running at %dKHz\n", - dev_priv->display.get_display_clock_speed(dev)); + cdclk_freq = dev_priv->display.get_display_clock_speed(dev); + DRM_DEBUG_KMS("CDCLK running at %dKHz\n", cdclk_freq); if (IS_SKYLAKE(dev)) { + dev_priv->skl_boot_cdclk = cdclk_freq; if (!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE)) DRM_ERROR("LCPLL1 is disabled\n"); + else + intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS); } else if (IS_BROXTON(dev)) { broxton_init_cdclk(dev); broxton_ddi_phy_init(dev); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 240092afc392..4e3f302d86f7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -45,29 +45,33 @@ #include <drm/drm_rect.h> #include <linux/dma_remapping.h> -/* Primary plane formats supported by all gen */ -#define COMMON_PRIMARY_FORMATS \ - DRM_FORMAT_C8, \ - DRM_FORMAT_RGB565, \ - DRM_FORMAT_XRGB8888, \ - DRM_FORMAT_ARGB8888 - /* Primary plane formats for gen <= 3 */ -static const uint32_t intel_primary_formats_gen2[] = { - COMMON_PRIMARY_FORMATS, +static const uint32_t i8xx_primary_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, DRM_FORMAT_XRGB1555, - DRM_FORMAT_ARGB1555, + DRM_FORMAT_XRGB8888, }; /* Primary plane formats for gen >= 4 */ -static const uint32_t intel_primary_formats_gen4[] = { - COMMON_PRIMARY_FORMATS, \ +static const uint32_t i965_primary_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, +}; + +static const uint32_t skl_primary_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, DRM_FORMAT_ABGR8888, DRM_FORMAT_XRGB2101010, - DRM_FORMAT_ARGB2101010, DRM_FORMAT_XBGR2101010, - DRM_FORMAT_ABGR2101010, }; /* Cursor formats */ @@ -1136,9 +1140,9 @@ static void assert_dsi_pll(struct drm_i915_private *dev_priv, bool state) u32 val; bool cur_state; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); cur_state = val & DSI_PLL_VCO_EN; I915_STATE_WARN(cur_state != state, @@ -1657,13 +1661,15 @@ static void chv_enable_pll(struct intel_crtc *crtc, BUG_ON(!IS_CHERRYVIEW(dev_priv->dev)); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* Enable back the 10bit clock to display controller */ tmp = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); tmp |= DPIO_DCLKP_EN; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW14(port), tmp); + mutex_unlock(&dev_priv->sb_lock); + /* * Need to wait > 100ns between dclkp clock enable bit and PLL enable. */ @@ -1679,8 +1685,6 @@ static void chv_enable_pll(struct intel_crtc *crtc, /* not sure when this should be written */ I915_WRITE(DPLL_MD(pipe), pipe_config->dpll_hw_state.dpll_md); POSTING_READ(DPLL_MD(pipe)); - - mutex_unlock(&dev_priv->dpio_lock); } static int intel_num_dvo_pipes(struct drm_device *dev) @@ -1822,7 +1826,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) I915_WRITE(DPLL(pipe), val); POSTING_READ(DPLL(pipe)); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* Disable 10bit clock to display controller */ val = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)); @@ -1840,7 +1844,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe) vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW1_CH1, val); } - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } void vlv_wait_port_ready(struct drm_i915_private *dev_priv, @@ -2206,20 +2210,6 @@ static void intel_disable_pipe(struct intel_crtc *crtc) intel_wait_for_pipe_off(crtc); } -/* - * Plane regs are double buffered, going from enabled->disabled needs a - * trigger in order to latch. The display address reg provides this. - */ -void intel_flush_primary_plane(struct drm_i915_private *dev_priv, - enum plane plane) -{ - struct drm_device *dev = dev_priv->dev; - u32 reg = INTEL_INFO(dev)->gen >= 4 ? DSPSURF(plane) : DSPADDR(plane); - - I915_WRITE(reg, I915_READ(reg)); - POSTING_READ(reg); -} - /** * intel_enable_primary_hw_plane - enable the primary plane on a given pipe * @plane: plane to be enabled @@ -2702,26 +2692,21 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc, dspcntr |= DISPPLANE_8BPP; break; case DRM_FORMAT_XRGB1555: - case DRM_FORMAT_ARGB1555: dspcntr |= DISPPLANE_BGRX555; break; case DRM_FORMAT_RGB565: dspcntr |= DISPPLANE_BGRX565; break; case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: dspcntr |= DISPPLANE_BGRX888; break; case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: dspcntr |= DISPPLANE_RGBX888; break; case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: dspcntr |= DISPPLANE_BGRX101010; break; case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: dspcntr |= DISPPLANE_RGBX101010; break; default: @@ -2817,19 +2802,15 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc, dspcntr |= DISPPLANE_BGRX565; break; case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_ARGB8888: dspcntr |= DISPPLANE_BGRX888; break; case DRM_FORMAT_XBGR8888: - case DRM_FORMAT_ABGR8888: dspcntr |= DISPPLANE_RGBX888; break; case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: dspcntr |= DISPPLANE_BGRX101010; break; case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: dspcntr |= DISPPLANE_RGBX101010; break; default: @@ -2953,95 +2934,83 @@ void skl_detach_scalers(struct intel_crtc *intel_crtc) u32 skl_plane_ctl_format(uint32_t pixel_format) { - u32 plane_ctl_format = 0; switch (pixel_format) { + case DRM_FORMAT_C8: + return PLANE_CTL_FORMAT_INDEXED; case DRM_FORMAT_RGB565: - plane_ctl_format = PLANE_CTL_FORMAT_RGB_565; - break; + return PLANE_CTL_FORMAT_RGB_565; case DRM_FORMAT_XBGR8888: - plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX; - break; + return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX; case DRM_FORMAT_XRGB8888: - plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888; - break; + return PLANE_CTL_FORMAT_XRGB_8888; /* * XXX: For ARBG/ABGR formats we default to expecting scanout buffers * to be already pre-multiplied. We need to add a knob (or a different * DRM_FORMAT) for user-space to configure that. */ case DRM_FORMAT_ABGR8888: - plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX | + return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ORDER_RGBX | PLANE_CTL_ALPHA_SW_PREMULTIPLY; - break; case DRM_FORMAT_ARGB8888: - plane_ctl_format = PLANE_CTL_FORMAT_XRGB_8888 | + return PLANE_CTL_FORMAT_XRGB_8888 | PLANE_CTL_ALPHA_SW_PREMULTIPLY; - break; case DRM_FORMAT_XRGB2101010: - plane_ctl_format = PLANE_CTL_FORMAT_XRGB_2101010; - break; + return PLANE_CTL_FORMAT_XRGB_2101010; case DRM_FORMAT_XBGR2101010: - plane_ctl_format = PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010; - break; + return PLANE_CTL_ORDER_RGBX | PLANE_CTL_FORMAT_XRGB_2101010; case DRM_FORMAT_YUYV: - plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV; - break; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YUYV; case DRM_FORMAT_YVYU: - plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU; - break; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_YVYU; case DRM_FORMAT_UYVY: - plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY; - break; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY; case DRM_FORMAT_VYUY: - plane_ctl_format = PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY; - break; + return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY; default: - BUG(); + MISSING_CASE(pixel_format); } - return plane_ctl_format; + + return 0; } u32 skl_plane_ctl_tiling(uint64_t fb_modifier) { - u32 plane_ctl_tiling = 0; switch (fb_modifier) { case DRM_FORMAT_MOD_NONE: break; case I915_FORMAT_MOD_X_TILED: - plane_ctl_tiling = PLANE_CTL_TILED_X; - break; + return PLANE_CTL_TILED_X; case I915_FORMAT_MOD_Y_TILED: - plane_ctl_tiling = PLANE_CTL_TILED_Y; - break; + return PLANE_CTL_TILED_Y; case I915_FORMAT_MOD_Yf_TILED: - plane_ctl_tiling = PLANE_CTL_TILED_YF; - break; + return PLANE_CTL_TILED_YF; default: MISSING_CASE(fb_modifier); } - return plane_ctl_tiling; + + return 0; } u32 skl_plane_ctl_rotation(unsigned int rotation) { - u32 plane_ctl_rotation = 0; switch (rotation) { case BIT(DRM_ROTATE_0): break; + /* + * DRM_ROTATE_ is counter clockwise to stay compatible with Xrandr + * while i915 HW rotation is clockwise, thats why this swapping. + */ case BIT(DRM_ROTATE_90): - plane_ctl_rotation = PLANE_CTL_ROTATE_90; - break; + return PLANE_CTL_ROTATE_270; case BIT(DRM_ROTATE_180): - plane_ctl_rotation = PLANE_CTL_ROTATE_180; - break; + return PLANE_CTL_ROTATE_180; case BIT(DRM_ROTATE_270): - plane_ctl_rotation = PLANE_CTL_ROTATE_270; - break; + return PLANE_CTL_ROTATE_90; default: MISSING_CASE(rotation); } - return plane_ctl_rotation; + return 0; } static void skylake_update_primary_plane(struct drm_crtc *crtc, @@ -3115,7 +3084,7 @@ static void skylake_update_primary_plane(struct drm_crtc *crtc, if (intel_rotation_90_or_270(rotation)) { /* stride = Surface height in tiles */ - tile_height = intel_tile_height(dev, fb->bits_per_pixel, + tile_height = intel_tile_height(dev, fb->pixel_format, fb->modifier[0]); stride = DIV_ROUND_UP(fb->height, tile_height); x_offset = stride * tile_height - y - src_h; @@ -3295,27 +3264,30 @@ void intel_finish_reset(struct drm_device *dev) drm_modeset_unlock_all(dev); } -static int +static void intel_finish_fb(struct drm_framebuffer *old_fb) { struct drm_i915_gem_object *obj = intel_fb_obj(old_fb); - struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); bool was_interruptible = dev_priv->mm.interruptible; int ret; /* Big Hammer, we also need to ensure that any pending * MI_WAIT_FOR_EVENT inside a user batch buffer on the * current scanout is retired before unpinning the old - * framebuffer. + * framebuffer. Note that we rely on userspace rendering + * into the buffer attached to the pipe they are waiting + * on. If not, userspace generates a GPU hang with IPEHR + * point to the MI_WAIT_FOR_EVENT. * * This should only fail upon a hung GPU, in which case we * can safely continue. */ dev_priv->mm.interruptible = false; - ret = i915_gem_object_finish_gpu(obj); + ret = i915_gem_object_wait_rendering(obj, true); dev_priv->mm.interruptible = was_interruptible; - return ret; + WARN_ON(ret); } static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) @@ -3967,7 +3939,7 @@ static void lpt_program_iclkip(struct drm_crtc *crtc) u32 divsel, phaseinc, auxdiv, phasedir = 0; u32 temp; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* It is necessary to ungate the pixclk gate prior to programming * the divisors, and gate it back when it is done. @@ -4044,7 +4016,7 @@ static void lpt_program_iclkip(struct drm_crtc *crtc) I915_WRITE(PIXCLK_GATE, PIXCLK_GATE_UNGATE); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void ironlake_pch_transcoder_set_timings(struct intel_crtc *crtc, @@ -4182,8 +4154,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) temp &= ~(TRANS_DP_PORT_SEL_MASK | TRANS_DP_SYNC_MASK | TRANS_DP_BPC_MASK); - temp |= (TRANS_DP_OUTPUT_ENABLE | - TRANS_DP_ENH_FRAMING); + temp |= TRANS_DP_OUTPUT_ENABLE; temp |= bpc << 9; /* same format but at 11:9 */ if (crtc->mode.flags & DRM_MODE_FLAG_PHSYNC) @@ -4517,9 +4488,10 @@ skl_update_scaler_users( } /* check colorkey */ - if (intel_plane && intel_plane->ckey.flags != I915_SET_COLORKEY_NONE) { - DRM_DEBUG_KMS("PLANE:%d scaling with color key not allowed", - intel_plane->base.base.id); + if (WARN_ON(intel_plane && + intel_plane->ckey.flags != I915_SET_COLORKEY_NONE)) { + DRM_DEBUG_KMS("PLANE:%d scaling %ux%u->%ux%u not allowed with colorkey", + intel_plane->base.base.id, src_w, src_h, dst_w, dst_h); return -EINVAL; } @@ -4532,9 +4504,7 @@ skl_update_scaler_users( case DRM_FORMAT_ABGR8888: case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: case DRM_FORMAT_YUYV: case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: @@ -4858,11 +4828,22 @@ intel_pre_disable_primary(struct drm_crtc *crtc) static void intel_crtc_enable_planes(struct drm_crtc *crtc) { + struct drm_device *dev = crtc->dev; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; + intel_enable_primary_hw_plane(crtc->primary, crtc); intel_enable_sprite_planes(crtc); intel_crtc_update_cursor(crtc, true); intel_post_enable_primary(crtc); + + /* + * FIXME: Once we grow proper nuclear flip support out of this we need + * to compute the mask of flip planes precisely. For the time being + * consider this a flip to a NULL plane. + */ + intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe)); } static void intel_crtc_disable_planes(struct drm_crtc *crtc) @@ -5128,13 +5109,14 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc) ironlake_pfit_disable(intel_crtc); + if (intel_crtc->config->has_pch_encoder) + ironlake_fdi_disable(crtc); + for_each_encoder_on_crtc(dev, crtc, encoder) if (encoder->post_disable) encoder->post_disable(encoder); if (intel_crtc->config->has_pch_encoder) { - ironlake_fdi_disable(crtc); - ironlake_disable_pch_transcoder(dev_priv, pipe); if (HAS_PCH_CPT(dev)) { @@ -5543,16 +5525,224 @@ void broxton_uninit_cdclk(struct drm_device *dev) intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); } +static const struct skl_cdclk_entry { + unsigned int freq; + unsigned int vco; +} skl_cdclk_frequencies[] = { + { .freq = 308570, .vco = 8640 }, + { .freq = 337500, .vco = 8100 }, + { .freq = 432000, .vco = 8640 }, + { .freq = 450000, .vco = 8100 }, + { .freq = 540000, .vco = 8100 }, + { .freq = 617140, .vco = 8640 }, + { .freq = 675000, .vco = 8100 }, +}; + +static unsigned int skl_cdclk_decimal(unsigned int freq) +{ + return (freq - 1000) / 500; +} + +static unsigned int skl_cdclk_get_vco(unsigned int freq) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(skl_cdclk_frequencies); i++) { + const struct skl_cdclk_entry *e = &skl_cdclk_frequencies[i]; + + if (e->freq == freq) + return e->vco; + } + + return 8100; +} + +static void +skl_dpll0_enable(struct drm_i915_private *dev_priv, unsigned int required_vco) +{ + unsigned int min_freq; + u32 val; + + /* select the minimum CDCLK before enabling DPLL 0 */ + val = I915_READ(CDCLK_CTL); + val &= ~CDCLK_FREQ_SEL_MASK | ~CDCLK_FREQ_DECIMAL_MASK; + val |= CDCLK_FREQ_337_308; + + if (required_vco == 8640) + min_freq = 308570; + else + min_freq = 337500; + + val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_freq); + + I915_WRITE(CDCLK_CTL, val); + POSTING_READ(CDCLK_CTL); + + /* + * We always enable DPLL0 with the lowest link rate possible, but still + * taking into account the VCO required to operate the eDP panel at the + * desired frequency. The usual DP link rates operate with a VCO of + * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. + * The modeset code is responsible for the selection of the exact link + * rate later on, with the constraint of choosing a frequency that + * works with required_vco. + */ + val = I915_READ(DPLL_CTRL1); + + val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); + if (required_vco == 8640) + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, + SKL_DPLL0); + else + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, + SKL_DPLL0); + + I915_WRITE(DPLL_CTRL1, val); + POSTING_READ(DPLL_CTRL1); + + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); + + if (wait_for(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK, 5)) + DRM_ERROR("DPLL0 not locked\n"); +} + +static bool skl_cdclk_pcu_ready(struct drm_i915_private *dev_priv) +{ + int ret; + u32 val; + + /* inform PCU we want to change CDCLK */ + val = SKL_CDCLK_PREPARE_FOR_CHANGE; + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_read(dev_priv, SKL_PCODE_CDCLK_CONTROL, &val); + mutex_unlock(&dev_priv->rps.hw_lock); + + return ret == 0 && (val & SKL_CDCLK_READY_FOR_CHANGE); +} + +static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) +{ + unsigned int i; + + for (i = 0; i < 15; i++) { + if (skl_cdclk_pcu_ready(dev_priv)) + return true; + udelay(10); + } + + return false; +} + +static void skl_set_cdclk(struct drm_i915_private *dev_priv, unsigned int freq) +{ + u32 freq_select, pcu_ack; + + DRM_DEBUG_DRIVER("Changing CDCLK to %dKHz\n", freq); + + if (!skl_cdclk_wait_for_pcu_ready(dev_priv)) { + DRM_ERROR("failed to inform PCU about cdclk change\n"); + return; + } + + /* set CDCLK_CTL */ + switch(freq) { + case 450000: + case 432000: + freq_select = CDCLK_FREQ_450_432; + pcu_ack = 1; + break; + case 540000: + freq_select = CDCLK_FREQ_540; + pcu_ack = 2; + break; + case 308570: + case 337500: + default: + freq_select = CDCLK_FREQ_337_308; + pcu_ack = 0; + break; + case 617140: + case 675000: + freq_select = CDCLK_FREQ_675_617; + pcu_ack = 3; + break; + } + + I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(freq)); + POSTING_READ(CDCLK_CTL); + + /* inform PCU of the change */ + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + mutex_unlock(&dev_priv->rps.hw_lock); +} + +void skl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + /* disable DBUF power */ + I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST); + POSTING_READ(DBUF_CTL); + + udelay(10); + + if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE) + DRM_ERROR("DBuf power disable timeout\n"); + + /* disable DPLL0 */ + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); + if (wait_for(!(I915_READ(LCPLL1_CTL) & LCPLL_PLL_LOCK), 1)) + DRM_ERROR("Couldn't disable DPLL0\n"); + + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); +} + +void skl_init_cdclk(struct drm_i915_private *dev_priv) +{ + u32 val; + unsigned int required_vco; + + /* enable PCH reset handshake */ + val = I915_READ(HSW_NDE_RSTWRN_OPT); + I915_WRITE(HSW_NDE_RSTWRN_OPT, val | RESET_PCH_HANDSHAKE_ENABLE); + + /* enable PG1 and Misc I/O */ + intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS); + + /* DPLL0 already enabed !? */ + if (I915_READ(LCPLL1_CTL) & LCPLL_PLL_ENABLE) { + DRM_DEBUG_DRIVER("DPLL0 already running\n"); + return; + } + + /* enable DPLL0 */ + required_vco = skl_cdclk_get_vco(dev_priv->skl_boot_cdclk); + skl_dpll0_enable(dev_priv, required_vco); + + /* set CDCLK to the frequency the BIOS chose */ + skl_set_cdclk(dev_priv, dev_priv->skl_boot_cdclk); + + /* enable DBUF power */ + I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST); + POSTING_READ(DBUF_CTL); + + udelay(10); + + if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE)) + DRM_ERROR("DBuf power enable timeout\n"); +} + /* returns HPLL frequency in kHz */ static int valleyview_get_vco(struct drm_i915_private *dev_priv) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; /* Obtain SKU information */ - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); hpll_freq = vlv_cck_read(dev_priv, CCK_FUSE_REG) & CCK_FUSE_HPLL_FREQ_MASK; - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); return vco_freq[hpll_freq] * 1000; } @@ -5601,12 +5791,13 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) } mutex_unlock(&dev_priv->rps.hw_lock); + mutex_lock(&dev_priv->sb_lock); + if (cdclk == 400000) { u32 divider; divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - mutex_lock(&dev_priv->dpio_lock); /* adjust cdclk divider */ val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); val &= ~DISPLAY_FREQUENCY_VALUES; @@ -5617,10 +5808,8 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) DISPLAY_FREQUENCY_STATUS) == (divider << DISPLAY_FREQUENCY_STATUS_SHIFT), 50)) DRM_ERROR("timed out waiting for CDclk change\n"); - mutex_unlock(&dev_priv->dpio_lock); } - mutex_lock(&dev_priv->dpio_lock); /* adjust self-refresh exit latency value */ val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); val &= ~0x7f; @@ -5634,7 +5823,8 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) else val |= 3000 / 250; /* 3.0 usec */ vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - mutex_unlock(&dev_priv->dpio_lock); + + mutex_unlock(&dev_priv->sb_lock); vlv_update_cdclk(dev); } @@ -6562,9 +6752,9 @@ static int valleyview_get_display_clock_speed(struct drm_device *dev) if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = valleyview_get_vco(dev_priv); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); divider = val & DISPLAY_FREQUENCY_VALUES; @@ -6906,7 +7096,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, u32 bestn, bestm1, bestm2, bestp1, bestp2; u32 coreclk, reg_val; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); bestn = pipe_config->dpll.n; bestm1 = pipe_config->dpll.m1; @@ -6984,7 +7174,7 @@ static void vlv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW7(pipe), coreclk); vlv_dpio_write(dev_priv, pipe, VLV_PLL_DW11(pipe), 0x87871000); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void chv_update_pll(struct intel_crtc *crtc, @@ -7029,7 +7219,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, I915_WRITE(dpll_reg, pipe_config->dpll_hw_state.dpll & ~DPLL_VCO_ENABLE); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* p1 and p2 divider */ vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW13(port), @@ -7102,7 +7292,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW14(port)) | DPIO_AFC_RECAL); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } /** @@ -7477,6 +7667,9 @@ static int i9xx_crtc_compute_clock(struct intel_crtc *crtc, struct drm_connector_state *connector_state; int i; + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + for_each_connector_in_state(state, connector, connector_state, i) { if (connector_state->crtc != &crtc->base) continue; @@ -7600,9 +7793,9 @@ static void vlv_crtc_clock_get(struct intel_crtc *crtc, if (!(pipe_config->dpll_hw_state.dpll & DPLL_VCO_ENABLE)) return; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); mdiv = vlv_dpio_read(dev_priv, pipe, VLV_PLL_DW3(pipe)); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); clock.m1 = (mdiv >> DPIO_M1DIV_SHIFT) & 7; clock.m2 = mdiv & DPIO_M2DIV_MASK; @@ -7696,12 +7889,12 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2; int refclk = 100000; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); cmn_dw13 = vlv_dpio_read(dev_priv, pipe, CHV_CMN_DW13(port)); pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff); @@ -8067,7 +8260,7 @@ static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread, with_fdi, "LP PCH doesn't have FDI\n")) with_fdi = false; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK); tmp &= ~SBI_SSCCTL_DISABLE; @@ -8093,7 +8286,7 @@ static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread, tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE; intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } /* Sequence to disable CLKOUT_DP */ @@ -8102,7 +8295,7 @@ static void lpt_disable_clkout_dp(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; uint32_t reg, tmp; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ? SBI_GEN0 : SBI_DBUFF0; @@ -8121,7 +8314,7 @@ static void lpt_disable_clkout_dp(struct drm_device *dev) intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK); } - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void lpt_init_pch_refclk(struct drm_device *dev) @@ -8518,6 +8711,9 @@ static int ironlake_crtc_compute_clock(struct intel_crtc *crtc, bool is_lvds = false; struct intel_shared_dpll *pll; + memset(&crtc_state->dpll_hw_state, 0, + sizeof(crtc_state->dpll_hw_state)); + is_lvds = intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS); WARN(!(HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)), @@ -9353,6 +9549,12 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, } pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); + + if (INTEL_INFO(dev)->gen >= 9) { + pipe_config->scaler_state.scaler_id = -1; + pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX); + } + if (intel_display_power_is_enabled(dev_priv, pfit_domain)) { if (INTEL_INFO(dev)->gen == 9) skylake_get_pfit_config(crtc, pipe_config); @@ -9360,10 +9562,6 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, ironlake_get_pfit_config(crtc, pipe_config); else MISSING_CASE(INTEL_INFO(dev)->gen); - - } else { - pipe_config->scaler_state.scaler_id = -1; - pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX); } if (IS_HASWELL(dev)) @@ -9868,7 +10066,7 @@ retry: goto fail; } - crtc_state->base.enable = true; + crtc_state->base.active = crtc_state->base.enable = true; if (!mode) mode = &load_detect_mode; @@ -9965,7 +10163,7 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, connector_state->best_encoder = NULL; connector_state->crtc = NULL; - crtc_state->base.enable = false; + crtc_state->base.enable = crtc_state->base.active = false; ret = intel_modeset_setup_plane_state(state, crtc, NULL, NULL, 0, 0); @@ -10690,7 +10888,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring, else if (i915.enable_execlists) return true; else - return ring != i915_gem_request_get_ring(obj->last_read_req); + return ring != i915_gem_request_get_ring(obj->last_write_req); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc) @@ -10790,22 +10988,19 @@ static void intel_do_mmio_flip(struct intel_crtc *intel_crtc) static void intel_mmio_flip_work_func(struct work_struct *work) { - struct intel_crtc *crtc = - container_of(work, struct intel_crtc, mmio_flip.work); - struct intel_mmio_flip *mmio_flip; + struct intel_mmio_flip *mmio_flip = + container_of(work, struct intel_mmio_flip, work); - mmio_flip = &crtc->mmio_flip; if (mmio_flip->req) WARN_ON(__i915_wait_request(mmio_flip->req, - crtc->reset_counter, - false, NULL, NULL) != 0); + mmio_flip->crtc->reset_counter, + false, NULL, + &mmio_flip->i915->rps.mmioflips)); - intel_do_mmio_flip(crtc); - if (mmio_flip->req) { - mutex_lock(&crtc->base.dev->struct_mutex); - i915_gem_request_assign(&mmio_flip->req, NULL); - mutex_unlock(&crtc->base.dev->struct_mutex); - } + intel_do_mmio_flip(mmio_flip->crtc); + + i915_gem_request_unreference__unlocked(mmio_flip->req); + kfree(mmio_flip); } static int intel_queue_mmio_flip(struct drm_device *dev, @@ -10815,12 +11010,18 @@ static int intel_queue_mmio_flip(struct drm_device *dev, struct intel_engine_cs *ring, uint32_t flags) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_mmio_flip *mmio_flip; + + mmio_flip = kmalloc(sizeof(*mmio_flip), GFP_KERNEL); + if (mmio_flip == NULL) + return -ENOMEM; - i915_gem_request_assign(&intel_crtc->mmio_flip.req, - obj->last_write_req); + mmio_flip->i915 = to_i915(dev); + mmio_flip->req = i915_gem_request_reference(obj->last_write_req); + mmio_flip->crtc = to_intel_crtc(crtc); - schedule_work(&intel_crtc->mmio_flip.work); + INIT_WORK(&mmio_flip->work, intel_mmio_flip_work_func); + schedule_work(&mmio_flip->work); return 0; } @@ -11005,7 +11206,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { ring = &dev_priv->ring[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - ring = i915_gem_request_get_ring(obj->last_read_req); + ring = i915_gem_request_get_ring(obj->last_write_req); if (ring == NULL || ring->id != RCS) ring = &dev_priv->ring[BCS]; } else { @@ -11021,7 +11222,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, */ ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, crtc->primary->state, - mmio_flip ? i915_gem_request_get_ring(obj->last_read_req) : ring); + mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring); if (ret) goto cleanup_pending; @@ -11037,6 +11238,12 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, i915_gem_request_assign(&work->flip_queued_req, obj->last_write_req); } else { + if (obj->last_write_req) { + ret = i915_gem_check_olr(obj->last_write_req); + if (ret) + goto cleanup_unpin; + } + ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, ring, page_flip_flags); if (ret) @@ -11303,9 +11510,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("port clock: %d\n", pipe_config->port_clock); DRM_DEBUG_KMS("pipe src size: %dx%d\n", pipe_config->pipe_src_w, pipe_config->pipe_src_h); - DRM_DEBUG_KMS("num_scalers: %d\n", crtc->num_scalers); - DRM_DEBUG_KMS("scaler_users: 0x%x\n", pipe_config->scaler_state.scaler_users); - DRM_DEBUG_KMS("scaler id: %d\n", pipe_config->scaler_state.scaler_id); + DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n", + crtc->num_scalers, + pipe_config->scaler_state.scaler_users, + pipe_config->scaler_state.scaler_id); DRM_DEBUG_KMS("gmch pfit: control: 0x%08x, ratios: 0x%08x, lvds border: 0x%08x\n", pipe_config->gmch_pfit.control, pipe_config->gmch_pfit.pgm_ratios, @@ -11317,6 +11525,39 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("ips: %i\n", pipe_config->ips_enabled); DRM_DEBUG_KMS("double wide: %i\n", pipe_config->double_wide); + if (IS_BROXTON(dev)) { + DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: ebb0: 0x%x, " + "pll0: 0x%x, pll1: 0x%x, pll2: 0x%x, pll3: 0x%x, " + "pll6: 0x%x, pll8: 0x%x, pcsdw12: 0x%x\n", + pipe_config->ddi_pll_sel, + pipe_config->dpll_hw_state.ebb0, + pipe_config->dpll_hw_state.pll0, + pipe_config->dpll_hw_state.pll1, + pipe_config->dpll_hw_state.pll2, + pipe_config->dpll_hw_state.pll3, + pipe_config->dpll_hw_state.pll6, + pipe_config->dpll_hw_state.pll8, + pipe_config->dpll_hw_state.pcsdw12); + } else if (IS_SKYLAKE(dev)) { + DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: " + "ctrl1: 0x%x, cfgcr1: 0x%x, cfgcr2: 0x%x\n", + pipe_config->ddi_pll_sel, + pipe_config->dpll_hw_state.ctrl1, + pipe_config->dpll_hw_state.cfgcr1, + pipe_config->dpll_hw_state.cfgcr2); + } else if (HAS_DDI(dev)) { + DRM_DEBUG_KMS("ddi_pll_sel: %u; dpll_hw_state: wrpll: 0x%x\n", + pipe_config->ddi_pll_sel, + pipe_config->dpll_hw_state.wrpll); + } else { + DRM_DEBUG_KMS("dpll_hw_state: dpll: 0x%x, dpll_md: 0x%x, " + "fp0: 0x%x, fp1: 0x%x\n", + pipe_config->dpll_hw_state.dpll, + pipe_config->dpll_hw_state.dpll_md, + pipe_config->dpll_hw_state.fp0, + pipe_config->dpll_hw_state.fp1); + } + DRM_DEBUG_KMS("planes on this crtc\n"); list_for_each_entry(plane, &dev->mode_config.plane_list, head) { intel_plane = to_intel_plane(plane); @@ -11454,12 +11695,18 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) struct intel_crtc_scaler_state scaler_state; struct intel_dpll_hw_state dpll_hw_state; enum intel_dpll_id shared_dpll; + uint32_t ddi_pll_sel; + + /* FIXME: before the switch to atomic started, a new pipe_config was + * kzalloc'd. Code that depends on any field being zero should be + * fixed, so that the crtc_state can be safely duplicated. For now, + * only fields that are know to not cause problems are preserved. */ - /* Clear only the intel specific part of the crtc state excluding scalers */ tmp_state = crtc_state->base; scaler_state = crtc_state->scaler_state; shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; + ddi_pll_sel = crtc_state->ddi_pll_sel; memset(crtc_state, 0, sizeof *crtc_state); @@ -11467,6 +11714,7 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) crtc_state->scaler_state = scaler_state; crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; + crtc_state->ddi_pll_sel = ddi_pll_sel; } static int @@ -12264,8 +12512,6 @@ static int __intel_set_mode_setup_plls(struct drm_atomic_state *state) if (needs_modeset(crtc_state)) { clear_pipes |= 1 << intel_crtc->pipe; intel_crtc_state->shared_dpll = DPLL_ID_PRIVATE; - memset(&intel_crtc_state->dpll_hw_state, 0, - sizeof(intel_crtc_state->dpll_hw_state)); } } @@ -12342,7 +12588,6 @@ static int __intel_set_mode(struct drm_crtc *modeset_crtc, continue; if (!crtc_state->enable) { - crtc_state->active = false; intel_crtc_disable(crtc); } else if (crtc->state->enable) { intel_crtc_disable_planes(crtc); @@ -12492,7 +12737,8 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc) continue; } - crtc_state->base.enable = intel_crtc->new_enabled; + crtc_state->base.active = crtc_state->base.enable = + intel_crtc->new_enabled; if (&intel_crtc->base == crtc) drm_mode_copy(&crtc_state->base.mode, &crtc->mode); @@ -12617,11 +12863,16 @@ intel_modeset_stage_output_state(struct drm_device *dev, } for_each_crtc_in_state(state, crtc, crtc_state, i) { + bool has_connectors; + ret = drm_atomic_add_affected_connectors(state, crtc); if (ret) return ret; - crtc_state->enable = drm_atomic_connectors_for_crtc(state, crtc); + has_connectors = !!drm_atomic_connectors_for_crtc(state, crtc); + if (has_connectors != crtc_state->enable) + crtc_state->enable = + crtc_state->active = has_connectors; } ret = intel_modeset_setup_plane_state(state, set->crtc, set->mode, @@ -13008,8 +13259,11 @@ intel_check_primary_plane(struct drm_plane *plane, intel_atomic_get_crtc_state(state->base.state, intel_crtc) : NULL; if (INTEL_INFO(dev)->gen >= 9) { - min_scale = 1; - max_scale = skl_max_scale(intel_crtc, crtc_state); + /* use scaler when colorkey is not required */ + if (to_intel_plane(plane)->ckey.flags == I915_SET_COLORKEY_NONE) { + min_scale = 1; + max_scale = skl_max_scale(intel_crtc, crtc_state); + } can_position = true; } @@ -13251,8 +13505,8 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev, primary->max_downscale = 1; if (INTEL_INFO(dev)->gen >= 9) { primary->can_scale = true; + state->scaler_id = -1; } - state->scaler_id = -1; primary->pipe = pipe; primary->plane = pipe; primary->check_plane = intel_check_primary_plane; @@ -13262,12 +13516,15 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev, if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4) primary->plane = !pipe; - if (INTEL_INFO(dev)->gen <= 3) { - intel_primary_formats = intel_primary_formats_gen2; - num_formats = ARRAY_SIZE(intel_primary_formats_gen2); + if (INTEL_INFO(dev)->gen >= 9) { + intel_primary_formats = skl_primary_formats; + num_formats = ARRAY_SIZE(skl_primary_formats); + } else if (INTEL_INFO(dev)->gen >= 4) { + intel_primary_formats = i965_primary_formats; + num_formats = ARRAY_SIZE(i965_primary_formats); } else { - intel_primary_formats = intel_primary_formats_gen4; - num_formats = ARRAY_SIZE(intel_primary_formats_gen4); + intel_primary_formats = i8xx_primary_formats; + num_formats = ARRAY_SIZE(i8xx_primary_formats); } drm_universal_plane_init(dev, &primary->base, 0, @@ -13434,7 +13691,6 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev, cursor->max_downscale = 1; cursor->pipe = pipe; cursor->plane = pipe; - state->scaler_id = -1; cursor->check_plane = intel_check_cursor_plane; cursor->commit_plane = intel_commit_cursor_plane; cursor->disable_plane = intel_disable_cursor_plane; @@ -13457,6 +13713,9 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev, state->base.rotation); } + if (INTEL_INFO(dev)->gen >=9) + state->scaler_id = -1; + drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); return &cursor->base; @@ -13550,8 +13809,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base; dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base; - INIT_WORK(&intel_crtc->mmio_flip.work, intel_mmio_flip_work_func); - drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe); @@ -13948,25 +14205,35 @@ static int intel_framebuffer_init(struct drm_device *dev, case DRM_FORMAT_ARGB8888: break; case DRM_FORMAT_XRGB1555: - case DRM_FORMAT_ARGB1555: if (INTEL_INFO(dev)->gen > 3) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format)); return -EINVAL; } break; - case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: + if (!IS_VALLEYVIEW(dev) && INTEL_INFO(dev)->gen < 9) { + DRM_DEBUG("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format)); + return -EINVAL; + } + break; + case DRM_FORMAT_XBGR8888: case DRM_FORMAT_XRGB2101010: - case DRM_FORMAT_ARGB2101010: case DRM_FORMAT_XBGR2101010: - case DRM_FORMAT_ABGR2101010: if (INTEL_INFO(dev)->gen < 4) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format)); return -EINVAL; } break; + case DRM_FORMAT_ABGR2101010: + if (!IS_VALLEYVIEW(dev)) { + DRM_DEBUG("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format)); + return -EINVAL; + } + break; case DRM_FORMAT_YUYV: case DRM_FORMAT_UYVY: case DRM_FORMAT_YVYU: @@ -14595,6 +14862,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) WARN_ON(crtc->active); crtc->base.state->enable = false; + crtc->base.state->active = false; crtc->base.enabled = false; } @@ -14623,6 +14891,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) crtc->active ? "enabled" : "disabled"); crtc->base.state->enable = crtc->active; + crtc->base.state->active = crtc->active; crtc->base.enabled = crtc->active; /* Because we only establish the connector -> encoder -> @@ -14761,6 +15030,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) crtc->config); crtc->base.state->enable = crtc->active; + crtc->base.state->active = crtc->active; crtc->base.enabled = crtc->active; plane_state = to_intel_plane_state(primary->state); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index eca82cff40b9..280c282da9bd 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1097,6 +1097,9 @@ skl_edp_set_pll_config(struct intel_crtc_state *pipe_config, int link_clock) { u32 ctrl1; + memset(&pipe_config->dpll_hw_state, 0, + sizeof(pipe_config->dpll_hw_state)); + pipe_config->ddi_pll_sel = SKL_DPLL0; pipe_config->dpll_hw_state.cfgcr1 = 0; pipe_config->dpll_hw_state.cfgcr2 = 0; @@ -1266,7 +1269,7 @@ static void snprintf_int_array(char *str, size_t len, str[0] = '\0'; for (i = 0; i < nelem; i++) { - int r = snprintf(str, len, "%d,", array[i]); + int r = snprintf(str, len, "%s%d", i ? ", " : "", array[i]); if (r >= len) return; str += r; @@ -1567,7 +1570,7 @@ static void intel_dp_prepare(struct intel_encoder *encoder) /* Split out the IBX/CPU vs CPT settings */ - if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) { + if (IS_GEN7(dev) && port == PORT_A) { if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC) intel_dp->DP |= DP_SYNC_HS_HIGH; if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC) @@ -1578,7 +1581,18 @@ static void intel_dp_prepare(struct intel_encoder *encoder) intel_dp->DP |= DP_ENHANCED_FRAMING; intel_dp->DP |= crtc->pipe << 29; - } else if (!HAS_PCH_CPT(dev) || port == PORT_A) { + } else if (HAS_PCH_CPT(dev) && port != PORT_A) { + u32 trans_dp; + + intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT; + + trans_dp = I915_READ(TRANS_DP_CTL(crtc->pipe)); + if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) + trans_dp |= TRANS_DP_ENH_FRAMING; + else + trans_dp &= ~TRANS_DP_ENH_FRAMING; + I915_WRITE(TRANS_DP_CTL(crtc->pipe), trans_dp); + } else { if (!HAS_PCH_SPLIT(dev) && !IS_VALLEYVIEW(dev)) intel_dp->DP |= intel_dp->color_range; @@ -1591,14 +1605,10 @@ static void intel_dp_prepare(struct intel_encoder *encoder) if (drm_dp_enhanced_frame_cap(intel_dp->dpcd)) intel_dp->DP |= DP_ENHANCED_FRAMING; - if (!IS_CHERRYVIEW(dev)) { - if (crtc->pipe == 1) - intel_dp->DP |= DP_PIPEB_SELECT; - } else { + if (IS_CHERRYVIEW(dev)) intel_dp->DP |= DP_PIPE_SELECT_CHV(crtc->pipe); - } - } else { - intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT; + else if (crtc->pipe == PIPE_B) + intel_dp->DP |= DP_PIPEB_SELECT; } } @@ -2182,41 +2192,25 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, if (!(tmp & DP_PORT_EN)) return false; - if (port == PORT_A && IS_GEN7(dev) && !IS_VALLEYVIEW(dev)) { + if (IS_GEN7(dev) && port == PORT_A) { *pipe = PORT_TO_PIPE_CPT(tmp); - } else if (IS_CHERRYVIEW(dev)) { - *pipe = DP_PORT_TO_PIPE_CHV(tmp); - } else if (!HAS_PCH_CPT(dev) || port == PORT_A) { - *pipe = PORT_TO_PIPE(tmp); - } else { - u32 trans_sel; - u32 trans_dp; - int i; - - switch (intel_dp->output_reg) { - case PCH_DP_B: - trans_sel = TRANS_DP_PORT_SEL_B; - break; - case PCH_DP_C: - trans_sel = TRANS_DP_PORT_SEL_C; - break; - case PCH_DP_D: - trans_sel = TRANS_DP_PORT_SEL_D; - break; - default: - return true; - } + } else if (HAS_PCH_CPT(dev) && port != PORT_A) { + enum pipe p; - for_each_pipe(dev_priv, i) { - trans_dp = I915_READ(TRANS_DP_CTL(i)); - if ((trans_dp & TRANS_DP_PORT_SEL_MASK) == trans_sel) { - *pipe = i; + for_each_pipe(dev_priv, p) { + u32 trans_dp = I915_READ(TRANS_DP_CTL(p)); + if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) { + *pipe = p; return true; } } DRM_DEBUG_KMS("No pipe for dp port 0x%x found\n", intel_dp->output_reg); + } else if (IS_CHERRYVIEW(dev)) { + *pipe = DP_PORT_TO_PIPE_CHV(tmp); + } else { + *pipe = PORT_TO_PIPE(tmp); } return true; @@ -2237,24 +2231,24 @@ static void intel_dp_get_config(struct intel_encoder *encoder, pipe_config->has_audio = tmp & DP_AUDIO_OUTPUT_ENABLE && port != PORT_A; - if ((port == PORT_A) || !HAS_PCH_CPT(dev)) { - if (tmp & DP_SYNC_HS_HIGH) + if (HAS_PCH_CPT(dev) && port != PORT_A) { + tmp = I915_READ(TRANS_DP_CTL(crtc->pipe)); + if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH) flags |= DRM_MODE_FLAG_PHSYNC; else flags |= DRM_MODE_FLAG_NHSYNC; - if (tmp & DP_SYNC_VS_HIGH) + if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH) flags |= DRM_MODE_FLAG_PVSYNC; else flags |= DRM_MODE_FLAG_NVSYNC; } else { - tmp = I915_READ(TRANS_DP_CTL(crtc->pipe)); - if (tmp & TRANS_DP_HSYNC_ACTIVE_HIGH) + if (tmp & DP_SYNC_HS_HIGH) flags |= DRM_MODE_FLAG_PHSYNC; else flags |= DRM_MODE_FLAG_NHSYNC; - if (tmp & TRANS_DP_VSYNC_ACTIVE_HIGH) + if (tmp & DP_SYNC_VS_HIGH) flags |= DRM_MODE_FLAG_PVSYNC; else flags |= DRM_MODE_FLAG_NVSYNC; @@ -2361,7 +2355,7 @@ static void chv_post_disable_dp(struct intel_encoder *encoder) intel_dp_link_down(intel_dp); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* Propagate soft reset to data lane reset */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); @@ -2380,7 +2374,7 @@ static void chv_post_disable_dp(struct intel_encoder *encoder) val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void @@ -2419,7 +2413,8 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, } I915_WRITE(DP_TP_CTL(port), temp); - } else if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) { + } else if ((IS_GEN7(dev) && port == PORT_A) || + (HAS_PCH_CPT(dev) && port != PORT_A)) { *DP &= ~DP_LINK_TRAIN_MASK_CPT; switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { @@ -2676,7 +2671,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder) int pipe = intel_crtc->pipe; u32 val; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port)); val = 0; @@ -2689,7 +2684,7 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder) vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); intel_enable_dp(encoder); } @@ -2707,7 +2702,7 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder) intel_dp_prepare(encoder); /* Program Tx lane resets to default */ - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); @@ -2721,7 +2716,7 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder) vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW12(port), 0x00750f00); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW11(port), 0x00001500); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW14(port), 0x40400000); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void chv_pre_enable_dp(struct intel_encoder *encoder) @@ -2737,7 +2732,7 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder) int data, i, stagger; u32 val; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* allow hardware to manage TX FIFO reset source */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch)); @@ -2807,7 +2802,7 @@ static void chv_pre_enable_dp(struct intel_encoder *encoder) DPIO_TX1_STAGGER_MULT(7) | DPIO_TX2_STAGGER_MULT(5)); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); intel_enable_dp(encoder); } @@ -2825,7 +2820,7 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) intel_dp_prepare(encoder); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* program left/right clock distribution */ if (pipe != PIPE_B) { @@ -2875,7 +2870,7 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder) val |= CHV_CMN_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } /* @@ -3100,7 +3095,7 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp) return 0; } - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW4(port), demph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW2(port), @@ -3109,7 +3104,7 @@ static uint32_t vlv_signal_levels(struct intel_dp *intel_dp) vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW11(port), 0x00030000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), preemph_reg_value); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), 0x80000000); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); return 0; } @@ -3196,7 +3191,7 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp) return 0; } - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* Clear calc init */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW10(ch)); @@ -3283,7 +3278,7 @@ static uint32_t chv_signal_levels(struct intel_dp *intel_dp) val |= DPIO_LRC_BYPASS; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); return 0; } @@ -3848,6 +3843,7 @@ static void intel_dp_link_down(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct intel_crtc *crtc = to_intel_crtc(intel_dig_port->base.base.crtc); enum port port = intel_dig_port->port; struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -3861,36 +3857,41 @@ intel_dp_link_down(struct intel_dp *intel_dp) DRM_DEBUG_KMS("\n"); - if (HAS_PCH_CPT(dev) && (IS_GEN7(dev) || port != PORT_A)) { + if ((IS_GEN7(dev) && port == PORT_A) || + (HAS_PCH_CPT(dev) && port != PORT_A)) { DP &= ~DP_LINK_TRAIN_MASK_CPT; - I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT); + DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; } else { if (IS_CHERRYVIEW(dev)) DP &= ~DP_LINK_TRAIN_MASK_CHV; else DP &= ~DP_LINK_TRAIN_MASK; - I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE); + DP |= DP_LINK_TRAIN_PAT_IDLE; } + I915_WRITE(intel_dp->output_reg, DP); POSTING_READ(intel_dp->output_reg); - if (HAS_PCH_IBX(dev) && - I915_READ(intel_dp->output_reg) & DP_PIPEB_SELECT) { - /* Hardware workaround: leaving our transcoder select - * set to transcoder B while it's off will prevent the - * corresponding HDMI output on transcoder A. - * - * Combine this with another hardware workaround: - * transcoder select bit can only be cleared while the - * port is enabled. - */ - DP &= ~DP_PIPEB_SELECT; + DP &= ~(DP_PORT_EN | DP_AUDIO_OUTPUT_ENABLE); + I915_WRITE(intel_dp->output_reg, DP); + POSTING_READ(intel_dp->output_reg); + + /* + * HW workaround for IBX, we need to move the port + * to transcoder A after disabling it to allow the + * matching HDMI port to be enabled on transcoder A. + */ + if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B && port != PORT_A) { + /* always enable with pattern 1 (as per spec) */ + DP &= ~(DP_PIPEB_SELECT | DP_LINK_TRAIN_MASK); + DP |= DP_PORT_EN | DP_LINK_TRAIN_PAT_1; + I915_WRITE(intel_dp->output_reg, DP); + POSTING_READ(intel_dp->output_reg); + + DP &= ~DP_PORT_EN; I915_WRITE(intel_dp->output_reg, DP); POSTING_READ(intel_dp->output_reg); } - DP &= ~DP_AUDIO_OUTPUT_ENABLE; - I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN); - POSTING_READ(intel_dp->output_reg); msleep(intel_dp->panel_power_down_delay); } @@ -4040,46 +4041,70 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) u8 buf; int test_crc_count; int attempts = 6; + int ret = 0; - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) - return -EIO; + hsw_disable_ips(intel_crtc); + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) { + ret = -EIO; + goto out; + } - if (!(buf & DP_TEST_CRC_SUPPORTED)) - return -ENOTTY; + if (!(buf & DP_TEST_CRC_SUPPORTED)) { + ret = -ENOTTY; + goto out; + } - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) - return -EIO; + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) { + ret = -EIO; + goto out; + } if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK, - buf | DP_TEST_SINK_START) < 0) - return -EIO; + buf | DP_TEST_SINK_START) < 0) { + ret = -EIO; + goto out; + } + + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) { + ret = -EIO; + goto out; + } - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) - return -EIO; test_crc_count = buf & DP_TEST_COUNT_MASK; do { if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_TEST_SINK_MISC, &buf) < 0) - return -EIO; + DP_TEST_SINK_MISC, &buf) < 0) { + ret = -EIO; + goto out; + } intel_wait_for_vblank(dev, intel_crtc->pipe); } while (--attempts && (buf & DP_TEST_COUNT_MASK) == test_crc_count); if (attempts == 0) { DRM_DEBUG_KMS("Panel is unable to calculate CRC after 6 vblanks\n"); - return -ETIMEDOUT; + ret = -ETIMEDOUT; + goto out; } - if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) - return -EIO; + if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0) { + ret = -EIO; + goto out; + } - if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) - return -EIO; + if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK, &buf) < 0) { + ret = -EIO; + goto out; + } if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK, - buf & ~DP_TEST_SINK_START) < 0) - return -EIO; - - return 0; + buf & ~DP_TEST_SINK_START) < 0) { + ret = -EIO; + goto out; + } +out: + hsw_enable_ips(intel_crtc); + return ret; } static bool @@ -4142,7 +4167,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) if (!drm_dp_dpcd_write(&intel_dp->aux, DP_TEST_EDID_CHECKSUM, &intel_connector->detect_edid->checksum, - 1)); + 1)) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; @@ -5814,12 +5839,10 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp_aux_init(intel_dp, intel_connector); /* init MST on ports that can support it */ - if (IS_HASWELL(dev) || IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9) { - if (port == PORT_B || port == PORT_C || port == PORT_D) { - intel_dp_mst_encoder_init(intel_dig_port, - intel_connector->base.base.id); - } - } + if (HAS_DP_MST(dev) && + (port == PORT_B || port == PORT_C || port == PORT_D)) + intel_dp_mst_encoder_init(intel_dig_port, + intel_connector->base.base.id); if (!intel_edp_init_connector(intel_dp, intel_connector)) { drm_dp_aux_unregister(&intel_dp->aux); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ea3368e83626..2afb31a46275 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -459,8 +459,10 @@ struct intel_pipe_wm { }; struct intel_mmio_flip { - struct drm_i915_gem_request *req; struct work_struct work; + struct drm_i915_private *i915; + struct drm_i915_gem_request *req; + struct intel_crtc *crtc; }; struct skl_pipe_wm { @@ -544,7 +546,6 @@ struct intel_crtc { } wm; int scanline_offset; - struct intel_mmio_flip mmio_flip; struct intel_crtc_atomic_commit atomic; @@ -555,7 +556,15 @@ struct intel_crtc { struct intel_plane_wm_parameters { uint32_t horiz_pixels; uint32_t vert_pixels; + /* + * For packed pixel formats: + * bytes_per_pixel - holds bytes per pixel + * For planar pixel formats: + * bytes_per_pixel - holds bytes per pixel for uv-plane + * y_bytes_per_pixel - holds bytes per pixel for y-plane + */ uint8_t bytes_per_pixel; + uint8_t y_bytes_per_pixel; bool enabled; bool scaled; u64 tiling; @@ -1059,9 +1068,6 @@ intel_rotation_90_or_270(unsigned int rotation) return rotation & (BIT(DRM_ROTATE_90) | BIT(DRM_ROTATE_270)); } -unsigned int -intel_tile_height(struct drm_device *dev, uint32_t bits_per_pixel, - uint64_t fb_modifier); void intel_create_rotation_property(struct drm_device *dev, struct intel_plane *plane); @@ -1112,6 +1118,8 @@ void broxton_ddi_phy_init(struct drm_device *dev); void broxton_ddi_phy_uninit(struct drm_device *dev); void bxt_enable_dc9(struct drm_i915_private *dev_priv); void bxt_disable_dc9(struct drm_i915_private *dev_priv); +void skl_init_cdclk(struct drm_i915_private *dev_priv); +void skl_uninit_cdclk(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n); @@ -1359,9 +1367,10 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); void gen6_rps_boost(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv); + struct intel_rps_client *rps, + unsigned long submitted); void intel_queue_rps_boost_for_request(struct drm_device *dev, - struct drm_i915_gem_request *rq); + struct drm_i915_gem_request *req); void ilk_wm_get_hw_state(struct drm_device *dev); void skl_wm_get_hw_state(struct drm_device *dev); void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, @@ -1374,8 +1383,6 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob); /* intel_sprite.c */ int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane); -void intel_flush_primary_plane(struct drm_i915_private *dev_priv, - enum plane plane); int intel_plane_restore(struct drm_plane *plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 51966426addf..b5a5558ecd63 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -239,7 +239,7 @@ static int dpi_send_cmd(struct intel_dsi *intel_dsi, u32 cmd, bool hs, static void band_gap_reset(struct drm_i915_private *dev_priv) { - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_flisdsi_write(dev_priv, 0x08, 0x0001); vlv_flisdsi_write(dev_priv, 0x0F, 0x0005); @@ -248,7 +248,7 @@ static void band_gap_reset(struct drm_i915_private *dev_priv) vlv_flisdsi_write(dev_priv, 0x0F, 0x0000); vlv_flisdsi_write(dev_priv, 0x08, 0x0000); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static inline bool is_vid_mode(struct intel_dsi *intel_dsi) @@ -346,11 +346,11 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* program rcomp for compliance, reduce from 50 ohms to 45 ohms * needed everytime after power gate */ vlv_flisdsi_write(dev_priv, 0x04, 0x0004); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); /* bandgap reset is needed after everytime we do power gate */ band_gap_reset(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index d2cd8d5b27a1..a5e99ac305da 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -212,7 +212,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) function = gtable[gpio].function_reg; pad = gtable[gpio].pad_reg; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); if (!gtable[gpio].init) { /* program the function */ /* FIXME: remove constant below */ @@ -224,7 +224,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) /* pull up/down */ vlv_gpio_nc_write(dev_priv, pad, val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); return data; } diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 3622d0bafdf8..d20cf37b6901 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -162,59 +162,41 @@ static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count) #endif -static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp) +static int dsi_calc_mnp(int target_dsi_clk, struct dsi_mnp *dsi_mnp) { - u32 m, n, p; - u32 ref_clk; - u32 error; - u32 tmp_error; - int target_dsi_clk; - int calc_dsi_clk; - u32 calc_m; - u32 calc_p; + unsigned int calc_m = 0, calc_p = 0; + unsigned int m, n = 1, p; + int ref_clk = 25000; + int delta = target_dsi_clk; u32 m_seed; - /* dsi_clk is expected in KHZ */ - if (dsi_clk < 300000 || dsi_clk > 1150000) { + /* target_dsi_clk is expected in kHz */ + if (target_dsi_clk < 300000 || target_dsi_clk > 1150000) { DRM_ERROR("DSI CLK Out of Range\n"); return -ECHRNG; } - ref_clk = 25000; - target_dsi_clk = dsi_clk; - error = 0xFFFFFFFF; - tmp_error = 0xFFFFFFFF; - calc_m = 0; - calc_p = 0; - - for (m = 62; m <= 92; m++) { - for (p = 2; p <= 6; p++) { - /* Find the optimal m and p divisors - with minimal error +/- the required clock */ - calc_dsi_clk = (m * ref_clk) / p; - if (calc_dsi_clk == target_dsi_clk) { - calc_m = m; - calc_p = p; - error = 0; - break; - } else - tmp_error = abs(target_dsi_clk - calc_dsi_clk); - - if (tmp_error < error) { - error = tmp_error; + for (m = 62; m <= 92 && delta; m++) { + for (p = 2; p <= 6 && delta; p++) { + /* + * Find the optimal m and p divisors with minimal delta + * +/- the required clock + */ + int calc_dsi_clk = (m * ref_clk) / (p * n); + int d = abs(target_dsi_clk - calc_dsi_clk); + if (d < delta) { + delta = d; calc_m = m; calc_p = p; } } - - if (error == 0) - break; } + /* register has log2(N1), this works fine for powers of two */ + n = ffs(n) - 1; m_seed = lfsr_converts[calc_m - 62]; - n = 1; dsi_mnp->dsi_pll_ctrl = 1 << (DSI_PLL_P1_POST_DIV_SHIFT + calc_p - 2); - dsi_mnp->dsi_pll_div = (n - 1) << DSI_PLL_N1_DIV_SHIFT | + dsi_mnp->dsi_pll_div = n << DSI_PLL_N1_DIV_SHIFT | m_seed << DSI_PLL_M1_DIV_SHIFT; return 0; @@ -262,7 +244,7 @@ void vlv_enable_dsi_pll(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_configure_dsi_pll(encoder); @@ -276,11 +258,11 @@ void vlv_enable_dsi_pll(struct intel_encoder *encoder) if (wait_for(vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL) & DSI_PLL_LOCK, 20)) { - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); DRM_ERROR("DSI PLL lock failed\n"); return; } - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); DRM_DEBUG_KMS("DSI PLL locked\n"); } @@ -292,14 +274,14 @@ void vlv_disable_dsi_pll(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); tmp = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); tmp &= ~DSI_PLL_VCO_EN; tmp |= DSI_PLL_LDO_GATE; vlv_cck_write(dev_priv, CCK_REG_DSI_PLL_CONTROL, tmp); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void assert_bpp_mismatch(int pixel_format, int pipe_bpp) @@ -331,21 +313,25 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp) struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); u32 dsi_clock, pclk; u32 pll_ctl, pll_div; - u32 m = 0, p = 0; + u32 m = 0, p = 0, n; int refclk = 25000; int i; DRM_DEBUG_KMS("\n"); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); pll_ctl = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_CONTROL); pll_div = vlv_cck_read(dev_priv, CCK_REG_DSI_PLL_DIVIDER); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); /* mask out other bits and extract the P1 divisor */ pll_ctl &= DSI_PLL_P1_POST_DIV_MASK; pll_ctl = pll_ctl >> (DSI_PLL_P1_POST_DIV_SHIFT - 2); + /* N1 divisor */ + n = (pll_div & DSI_PLL_N1_DIV_MASK) >> DSI_PLL_N1_DIV_SHIFT; + n = 1 << n; /* register has log2(N1) */ + /* mask out the other bits and extract the M1 divisor */ pll_div &= DSI_PLL_M1_DIV_MASK; pll_div = pll_div >> DSI_PLL_M1_DIV_SHIFT; @@ -373,7 +359,7 @@ u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp) m = i + 62; - dsi_clock = (m * refclk) / p; + dsi_clock = (m * refclk) / (p * n); /* pixel_format and pipe_bpp should agree */ assert_bpp_mismatch(intel_dsi->pixel_format, pipe_bpp); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 4e7e7da2e03b..6372cfc7d053 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -96,6 +96,32 @@ static int intel_fbdev_blank(int blank, struct fb_info *info) return ret; } +static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, + struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + struct intel_fbdev *ifbdev = + container_of(fb_helper, struct intel_fbdev, helper); + + int ret; + ret = drm_fb_helper_pan_display(var, info); + + if (ret == 0) { + /* + * FIXME: fbdev presumes that all callbacks also work from + * atomic contexts and relies on that for emergency oops + * printing. KMS totally doesn't do that and the locking here is + * by far not the only place this goes wrong. Ignore this for + * now until we solve this for real. + */ + mutex_lock(&fb_helper->dev->struct_mutex); + intel_fb_obj_invalidate(ifbdev->fb->obj, NULL, ORIGIN_GTT); + mutex_unlock(&fb_helper->dev->struct_mutex); + } + + return ret; +} + static struct fb_ops intelfb_ops = { .owner = THIS_MODULE, .fb_check_var = drm_fb_helper_check_var, @@ -103,7 +129,7 @@ static struct fb_ops intelfb_ops = { .fb_fillrect = cfb_fillrect, .fb_copyarea = cfb_copyarea, .fb_imageblit = cfb_imageblit, - .fb_pan_display = drm_fb_helper_pan_display, + .fb_pan_display = intel_fbdev_pan_display, .fb_blank = intel_fbdev_blank, .fb_setcmap = drm_fb_helper_setcmap, .fb_debug_enter = drm_fb_helper_debug_enter, diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index d04e6dc97fe5..e97731aab6dc 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -873,59 +873,59 @@ static void intel_disable_hdmi(struct intel_encoder *encoder) struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); u32 temp; - u32 enable_bits = SDVO_ENABLE | SDVO_AUDIO_ENABLE; - - if (crtc->config->has_audio) - intel_audio_codec_disable(encoder); temp = I915_READ(intel_hdmi->hdmi_reg); - /* HW workaround for IBX, we need to move the port to transcoder A - * before disabling it. */ - if (HAS_PCH_IBX(dev)) { - struct drm_crtc *crtc = encoder->base.crtc; - int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1; - - if (temp & SDVO_PIPE_B_SELECT) { - temp &= ~SDVO_PIPE_B_SELECT; - I915_WRITE(intel_hdmi->hdmi_reg, temp); - POSTING_READ(intel_hdmi->hdmi_reg); - - /* Again we need to write this twice. */ - I915_WRITE(intel_hdmi->hdmi_reg, temp); - POSTING_READ(intel_hdmi->hdmi_reg); - - /* Transcoder selection bits only update - * effectively on vblank. */ - if (crtc) - intel_wait_for_vblank(dev, pipe); - else - msleep(50); - } - } - - /* HW workaround, need to toggle enable bit off and on for 12bpc, but - * we do this anyway which shows more stable in testing. - */ - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(intel_hdmi->hdmi_reg, temp & ~SDVO_ENABLE); - POSTING_READ(intel_hdmi->hdmi_reg); - } - - temp &= ~enable_bits; - + temp &= ~(SDVO_ENABLE | SDVO_AUDIO_ENABLE); I915_WRITE(intel_hdmi->hdmi_reg, temp); POSTING_READ(intel_hdmi->hdmi_reg); - /* HW workaround, need to write this twice for issue that may result - * in first write getting masked. + /* + * HW workaround for IBX, we need to move the port + * to transcoder A after disabling it to allow the + * matching DP port to be enabled on transcoder A. */ - if (HAS_PCH_SPLIT(dev)) { + if (HAS_PCH_IBX(dev) && crtc->pipe == PIPE_B) { + temp &= ~SDVO_PIPE_B_SELECT; + temp |= SDVO_ENABLE; + /* + * HW workaround, need to write this twice for issue + * that may result in first write getting masked. + */ + I915_WRITE(intel_hdmi->hdmi_reg, temp); + POSTING_READ(intel_hdmi->hdmi_reg); + I915_WRITE(intel_hdmi->hdmi_reg, temp); + POSTING_READ(intel_hdmi->hdmi_reg); + + temp &= ~SDVO_ENABLE; I915_WRITE(intel_hdmi->hdmi_reg, temp); POSTING_READ(intel_hdmi->hdmi_reg); } } +static void g4x_disable_hdmi(struct intel_encoder *encoder) +{ + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + + if (crtc->config->has_audio) + intel_audio_codec_disable(encoder); + + intel_disable_hdmi(encoder); +} + +static void pch_disable_hdmi(struct intel_encoder *encoder) +{ + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); + + if (crtc->config->has_audio) + intel_audio_codec_disable(encoder); +} + +static void pch_post_disable_hdmi(struct intel_encoder *encoder) +{ + intel_disable_hdmi(encoder); +} + static int hdmi_portclock_limit(struct intel_hdmi *hdmi, bool respect_dvi_limit) { struct drm_device *dev = intel_hdmi_to_dev(hdmi); @@ -1036,7 +1036,8 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, */ if (pipe_config->pipe_bpp > 8*3 && pipe_config->has_hdmi_sink && clock_12bpc <= portclock_limit && - hdmi_12bpc_possible(pipe_config)) { + hdmi_12bpc_possible(pipe_config) && + 0 /* FIXME 12bpc support totally broken */) { DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n"); desired_bpp = 12*3; @@ -1293,7 +1294,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder) u32 val; /* Enable clock channels for this port */ - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW8(port)); val = 0; if (pipe) @@ -1316,7 +1317,7 @@ static void vlv_hdmi_pre_enable(struct intel_encoder *encoder) /* Program lane clock */ vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW14(port), 0x00760018); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW23(port), 0x00400888); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); intel_hdmi->set_infoframes(&encoder->base, intel_crtc->config->has_hdmi_sink, @@ -1340,7 +1341,7 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder) intel_hdmi_prepare(encoder); /* Program Tx lane resets to default */ - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); @@ -1357,7 +1358,7 @@ static void vlv_hdmi_pre_pll_enable(struct intel_encoder *encoder) vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW9(port), 0x00002000); vlv_dpio_write(dev_priv, pipe, VLV_TX_DW5(port), DPIO_TX_OCALINIT_EN); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) @@ -1373,7 +1374,7 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) intel_hdmi_prepare(encoder); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* program left/right clock distribution */ if (pipe != PIPE_B) { @@ -1423,7 +1424,7 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder) val |= CHV_CMN_USEDCLKCHANNEL; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW19(ch), val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void vlv_hdmi_post_disable(struct intel_encoder *encoder) @@ -1436,10 +1437,10 @@ static void vlv_hdmi_post_disable(struct intel_encoder *encoder) int pipe = intel_crtc->pipe; /* Reset lanes to avoid HDMI flicker (VLV w/a) */ - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW0(port), 0x00000000); vlv_dpio_write(dev_priv, pipe, VLV_PCS_DW1(port), 0x00e00060); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void chv_hdmi_post_disable(struct intel_encoder *encoder) @@ -1453,7 +1454,7 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder) enum pipe pipe = intel_crtc->pipe; u32 val; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* Propagate soft reset to data lane reset */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW1(ch)); @@ -1472,7 +1473,7 @@ static void chv_hdmi_post_disable(struct intel_encoder *encoder) val &= ~(DPIO_PCS_TX_LANE2_RESET | DPIO_PCS_TX_LANE1_RESET); vlv_dpio_write(dev_priv, pipe, VLV_PCS23_DW0(ch), val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } static void chv_hdmi_pre_enable(struct intel_encoder *encoder) @@ -1490,7 +1491,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) int data, i, stagger; u32 val; - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); /* allow hardware to manage TX FIFO reset source */ val = vlv_dpio_read(dev_priv, pipe, VLV_PCS01_DW11(ch)); @@ -1633,7 +1634,7 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder) val |= DPIO_LRC_BYPASS; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW30, val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); intel_hdmi->set_infoframes(&encoder->base, intel_crtc->config->has_hdmi_sink, @@ -1806,7 +1807,12 @@ void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port) DRM_MODE_ENCODER_TMDS); intel_encoder->compute_config = intel_hdmi_compute_config; - intel_encoder->disable = intel_disable_hdmi; + if (HAS_PCH_SPLIT(dev)) { + intel_encoder->disable = pch_disable_hdmi; + intel_encoder->post_disable = pch_post_disable_hdmi; + } else { + intel_encoder->disable = g4x_disable_hdmi; + } intel_encoder->get_hw_state = intel_hdmi_get_hw_state; intel_encoder->get_config = intel_hdmi_get_config; if (IS_CHERRYVIEW(dev)) { diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index 3daa7e322326..92072f56e418 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -49,6 +49,19 @@ static const struct gmbus_pin gmbus_pins[] = { [GMBUS_PIN_DPD] = { "dpd", GPIOF }, }; +static const struct gmbus_pin gmbus_pins_bdw[] = { + [GMBUS_PIN_VGADDC] = { "vga", GPIOA }, + [GMBUS_PIN_DPC] = { "dpc", GPIOD }, + [GMBUS_PIN_DPB] = { "dpb", GPIOE }, + [GMBUS_PIN_DPD] = { "dpd", GPIOF }, +}; + +static const struct gmbus_pin gmbus_pins_skl[] = { + [GMBUS_PIN_DPC] = { "dpc", GPIOD }, + [GMBUS_PIN_DPB] = { "dpb", GPIOE }, + [GMBUS_PIN_DPD] = { "dpd", GPIOF }, +}; + static const struct gmbus_pin gmbus_pins_bxt[] = { [GMBUS_PIN_1_BXT] = { "dpb", PCH_GPIOB }, [GMBUS_PIN_2_BXT] = { "dpc", PCH_GPIOC }, @@ -61,6 +74,10 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, { if (IS_BROXTON(dev_priv)) return &gmbus_pins_bxt[pin]; + else if (IS_SKYLAKE(dev_priv)) + return &gmbus_pins_skl[pin]; + else if (IS_BROADWELL(dev_priv)) + return &gmbus_pins_bdw[pin]; else return &gmbus_pins[pin]; } @@ -72,6 +89,10 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, if (IS_BROXTON(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); + else if (IS_SKYLAKE(dev_priv)) + size = ARRAY_SIZE(gmbus_pins_skl); + else if (IS_BROADWELL(dev_priv)) + size = ARRAY_SIZE(gmbus_pins_bdw); else size = ARRAY_SIZE(gmbus_pins); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0fa9209ff556..9f5485ddcbe6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -394,6 +394,12 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) assert_spin_locked(&ring->execlist_lock); + /* + * If irqs are not active generate a warning as batches that finish + * without the irqs may get lost and a GPU Hang may occur. + */ + WARN_ON(!intel_irqs_enabled(ring->dev->dev_private)); + if (list_empty(&ring->execlist_queue)) return; @@ -421,7 +427,7 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring) * WaIdleLiteRestore: make sure we never cause a lite * restore with HEAD==TAIL */ - if (req0 && req0->elsp_submitted) { + if (req0->elsp_submitted) { /* * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL * as we resubmit the request. See gen8_emit_request() @@ -622,6 +628,7 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, struct list_head *vmas) { struct intel_engine_cs *ring = ringbuf->ring; + const unsigned other_rings = ~intel_ring_flag(ring); struct i915_vma *vma; uint32_t flush_domains = 0; bool flush_chipset = false; @@ -630,9 +637,11 @@ static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - ret = i915_gem_object_sync(obj, ring); - if (ret) - return ret; + if (obj->active & other_rings) { + ret = i915_gem_object_sync(obj, ring); + if (ret) + return ret; + } if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) flush_chipset |= i915_gem_clflush_object(obj, false); @@ -673,7 +682,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, { struct intel_engine_cs *ring = ringbuf->ring; struct drm_i915_gem_request *request; - int ret, new_space; + unsigned space; + int ret; if (intel_ring_space(ringbuf) >= bytes) return 0; @@ -684,14 +694,13 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, * from multiple ringbuffers. Here, we must ignore any that * aren't from the ringbuffer we're considering. */ - struct intel_context *ctx = request->ctx; - if (ctx->engine[ring->id].ringbuf != ringbuf) + if (request->ringbuf != ringbuf) continue; /* Would completion of this request free enough space? */ - new_space = __intel_ring_space(request->postfix, ringbuf->tail, - ringbuf->size); - if (new_space >= bytes) + space = __intel_ring_space(request->postfix, ringbuf->tail, + ringbuf->size); + if (space >= bytes) break; } @@ -702,11 +711,8 @@ static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf, if (ret) return ret; - i915_gem_retire_requests_ring(ring); - - WARN_ON(intel_ring_space(ringbuf) < new_space); - - return intel_ring_space(ringbuf) >= bytes ? 0 : -ENOSPC; + ringbuf->space = space; + return 0; } /* diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 5fd2d5ac02e2..25c8ec697da1 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -228,7 +228,6 @@ static int intel_overlay_do_wait_request(struct intel_overlay *overlay, ret = i915_wait_request(overlay->last_flip_req); if (ret) return ret; - i915_gem_retire_requests(dev); i915_gem_request_assign(&overlay->last_flip_req, NULL); return 0; @@ -376,7 +375,6 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) ret = i915_wait_request(overlay->last_flip_req); if (ret) return ret; - i915_gem_retire_requests(overlay->dev); if (overlay->flip_tail) overlay->flip_tail(overlay); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7006f94b94c1..c5914564939c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1946,7 +1946,7 @@ static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5]) int ilk_wm_max_level(const struct drm_device *dev) { /* how many WM levels are we expecting */ - if (IS_GEN9(dev)) + if (INTEL_INFO(dev)->gen >= 9) return 7; else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) return 4; @@ -2639,8 +2639,18 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, } static unsigned int -skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p) +skl_plane_relative_data_rate(const struct intel_plane_wm_parameters *p, int y) { + + /* for planar format */ + if (p->y_bytes_per_pixel) { + if (y) /* y-plane data rate */ + return p->horiz_pixels * p->vert_pixels * p->y_bytes_per_pixel; + else /* uv-plane data rate */ + return (p->horiz_pixels/2) * (p->vert_pixels/2) * p->bytes_per_pixel; + } + + /* for packed formats */ return p->horiz_pixels * p->vert_pixels * p->bytes_per_pixel; } @@ -2663,7 +2673,10 @@ skl_get_total_relative_data_rate(struct intel_crtc *intel_crtc, if (!p->enabled) continue; - total_data_rate += skl_plane_relative_data_rate(p); + total_data_rate += skl_plane_relative_data_rate(p, 0); /* packed/uv */ + if (p->y_bytes_per_pixel) { + total_data_rate += skl_plane_relative_data_rate(p, 1); /* y-plane */ + } } return total_data_rate; @@ -2682,6 +2695,7 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, struct skl_ddb_entry *alloc = &ddb->pipe[pipe]; uint16_t alloc_size, start, cursor_blocks; uint16_t minimum[I915_MAX_PLANES]; + uint16_t y_minimum[I915_MAX_PLANES]; unsigned int total_data_rate; int plane; @@ -2710,6 +2724,8 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, minimum[plane] = 8; alloc_size -= minimum[plane]; + y_minimum[plane] = p->y_bytes_per_pixel ? 8 : 0; + alloc_size -= y_minimum[plane]; } /* @@ -2723,16 +2739,17 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, start = alloc->start; for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) { const struct intel_plane_wm_parameters *p; - unsigned int data_rate; - uint16_t plane_blocks; + unsigned int data_rate, y_data_rate; + uint16_t plane_blocks, y_plane_blocks = 0; p = ¶ms->plane[plane]; if (!p->enabled) continue; - data_rate = skl_plane_relative_data_rate(p); + data_rate = skl_plane_relative_data_rate(p, 0); /* + * allocation for (packed formats) or (uv-plane part of planar format): * promote the expression to 64 bits to avoid overflowing, the * result is < available as data_rate / total_data_rate < 1 */ @@ -2744,6 +2761,22 @@ skl_allocate_pipe_ddb(struct drm_crtc *crtc, ddb->plane[pipe][plane].end = start + plane_blocks; start += plane_blocks; + + /* + * allocation for y_plane part of planar format: + */ + if (p->y_bytes_per_pixel) { + y_data_rate = skl_plane_relative_data_rate(p, 1); + y_plane_blocks = y_minimum[plane]; + y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, + total_data_rate); + + ddb->y_plane[pipe][plane].start = start; + ddb->y_plane[pipe][plane].end = start + y_plane_blocks; + + start += y_plane_blocks; + } + } } @@ -2856,13 +2889,18 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, p->pixel_rate = skl_pipe_pixel_rate(intel_crtc->config); fb = crtc->primary->state->fb; + /* For planar: Bpp is for uv plane, y_Bpp is for y plane */ if (fb) { p->plane[0].enabled = true; - p->plane[0].bytes_per_pixel = fb->bits_per_pixel / 8; + p->plane[0].bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? + drm_format_plane_cpp(fb->pixel_format, 1) : fb->bits_per_pixel / 8; + p->plane[0].y_bytes_per_pixel = fb->pixel_format == DRM_FORMAT_NV12 ? + drm_format_plane_cpp(fb->pixel_format, 0) : 0; p->plane[0].tiling = fb->modifier[0]; } else { p->plane[0].enabled = false; p->plane[0].bytes_per_pixel = 0; + p->plane[0].y_bytes_per_pixel = 0; p->plane[0].tiling = DRM_FORMAT_MOD_NONE; } p->plane[0].horiz_pixels = intel_crtc->config->pipe_src_w; @@ -2870,6 +2908,7 @@ static void skl_compute_wm_pipe_parameters(struct drm_crtc *crtc, p->plane[0].rotation = crtc->primary->state->rotation; fb = crtc->cursor->state->fb; + p->cursor.y_bytes_per_pixel = 0; if (fb) { p->cursor.enabled = true; p->cursor.bytes_per_pixel = fb->bits_per_pixel / 8; @@ -2905,22 +2944,25 @@ static bool skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t plane_bytes_per_line, plane_blocks_per_line; uint32_t res_blocks, res_lines; uint32_t selected_result; + uint8_t bytes_per_pixel; if (latency == 0 || !p->active || !p_params->enabled) return false; + bytes_per_pixel = p_params->y_bytes_per_pixel ? + p_params->y_bytes_per_pixel : + p_params->bytes_per_pixel; method1 = skl_wm_method1(p->pixel_rate, - p_params->bytes_per_pixel, + bytes_per_pixel, latency); method2 = skl_wm_method2(p->pixel_rate, p->pipe_htotal, p_params->horiz_pixels, - p_params->bytes_per_pixel, + bytes_per_pixel, p_params->tiling, latency); - plane_bytes_per_line = p_params->horiz_pixels * - p_params->bytes_per_pixel; + plane_bytes_per_line = p_params->horiz_pixels * bytes_per_pixel; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); if (p_params->tiling == I915_FORMAT_MOD_Y_TILED || @@ -3137,10 +3179,14 @@ static void skl_write_wm_values(struct drm_i915_private *dev_priv, new->plane_trans[pipe][i]); I915_WRITE(CUR_WM_TRANS(pipe), new->cursor_trans[pipe]); - for (i = 0; i < intel_num_planes(crtc); i++) + for (i = 0; i < intel_num_planes(crtc); i++) { skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, i), &new->ddb.plane[pipe][i]); + skl_ddb_entry_write(dev_priv, + PLANE_NV12_BUF_CFG(pipe, i), + &new->ddb.y_plane[pipe][i]); + } skl_ddb_entry_write(dev_priv, CUR_BUF_CFG(pipe), &new->ddb.cursor[pipe]); @@ -3298,6 +3344,7 @@ static bool skl_update_pipe_wm(struct drm_crtc *crtc, return false; intel_crtc->wm.skl_active = *pipe_wm; + return true; } @@ -3391,8 +3438,16 @@ skl_update_sprite_wm(struct drm_plane *plane, struct drm_crtc *crtc, intel_plane->wm.scaled = scaled; intel_plane->wm.horiz_pixels = sprite_width; intel_plane->wm.vert_pixels = sprite_height; - intel_plane->wm.bytes_per_pixel = pixel_size; intel_plane->wm.tiling = DRM_FORMAT_MOD_NONE; + + /* For planar: Bpp is for UV plane, y_Bpp is for Y plane */ + intel_plane->wm.bytes_per_pixel = + (fb && fb->pixel_format == DRM_FORMAT_NV12) ? + drm_format_plane_cpp(plane->state->fb->pixel_format, 1) : pixel_size; + intel_plane->wm.y_bytes_per_pixel = + (fb && fb->pixel_format == DRM_FORMAT_NV12) ? + drm_format_plane_cpp(plane->state->fb->pixel_format, 0) : 0; + /* * Framebuffer can be NULL on plane disable, but it does not * matter for watermarks if we assume no tiling in that case. @@ -4042,51 +4097,25 @@ static void valleyview_set_rps(struct drm_device *dev, u8 val) trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); } -/* vlv_set_rps_idle: Set the frequency to Rpn if Gfx clocks are down +/* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down * * * If Gfx is Idle, then - * 1. Mask Turbo interrupts - * 2. Bring up Gfx clock - * 3. Change the freq to Rpn and wait till P-Unit updates freq - * 4. Clear the Force GFX CLK ON bit so that Gfx can down - * 5. Unmask Turbo interrupts + * 1. Forcewake Media well. + * 2. Request idle freq. + * 3. Release Forcewake of Media well. */ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { - struct drm_device *dev = dev_priv->dev; u32 val = dev_priv->rps.idle_freq; - /* CHV and latest VLV don't need to force the gfx clock */ - if (IS_CHERRYVIEW(dev) || dev->pdev->revision >= 0xd) { - valleyview_set_rps(dev_priv->dev, val); - return; - } - - /* - * When we are idle. Drop to min voltage state. - */ - if (dev_priv->rps.cur_freq <= val) return; - /* Mask turbo interrupt so that they will not come in between */ - I915_WRITE(GEN6_PMINTRMSK, - gen6_sanitize_rps_pm_mask(dev_priv, ~0)); - - vlv_force_gfx_clock(dev_priv, true); - - dev_priv->rps.cur_freq = val; - - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); - - if (wait_for(((vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) - & GENFREQSTATUS) == 0, 100)) - DRM_ERROR("timed out waiting for Punit\n"); - - gen6_set_rps_thresholds(dev_priv, val); - vlv_force_gfx_clock(dev_priv, false); - - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); + /* Wake up the media well, as that takes a lot less + * power than the Render well. */ + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); + valleyview_set_rps(dev_priv->dev, val); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); } void gen6_rps_busy(struct drm_i915_private *dev_priv) @@ -4114,33 +4143,48 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) dev_priv->rps.last_adj = 0; I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); } + mutex_unlock(&dev_priv->rps.hw_lock); + spin_lock(&dev_priv->rps.client_lock); while (!list_empty(&dev_priv->rps.clients)) list_del_init(dev_priv->rps.clients.next); - mutex_unlock(&dev_priv->rps.hw_lock); + spin_unlock(&dev_priv->rps.client_lock); } void gen6_rps_boost(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv) + struct intel_rps_client *rps, + unsigned long submitted) { - u32 val; + /* This is intentionally racy! We peek at the state here, then + * validate inside the RPS worker. + */ + if (!(dev_priv->mm.busy && + dev_priv->rps.enabled && + dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)) + return; - mutex_lock(&dev_priv->rps.hw_lock); - val = dev_priv->rps.max_freq_softlimit; - if (dev_priv->rps.enabled && - dev_priv->mm.busy && - dev_priv->rps.cur_freq < val && - (file_priv == NULL || list_empty(&file_priv->rps_boost))) { - intel_set_rps(dev_priv->dev, val); - dev_priv->rps.last_adj = 0; + /* Force a RPS boost (and don't count it against the client) if + * the GPU is severely congested. + */ + if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) + rps = NULL; + + spin_lock(&dev_priv->rps.client_lock); + if (rps == NULL || list_empty(&rps->link)) { + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->rps.interrupts_enabled) { + dev_priv->rps.client_boost = true; + queue_work(dev_priv->wq, &dev_priv->rps.work); + } + spin_unlock_irq(&dev_priv->irq_lock); - if (file_priv != NULL) { - list_add(&file_priv->rps_boost, &dev_priv->rps.clients); - file_priv->rps_boosts++; + if (rps != NULL) { + list_add(&rps->link, &dev_priv->rps.clients); + rps->boosts++; } else dev_priv->rps.boosts++; } - mutex_unlock(&dev_priv->rps.hw_lock); + spin_unlock(&dev_priv->rps.client_lock); } void intel_set_rps(struct drm_device *dev, u8 val) @@ -4714,24 +4758,6 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) return rp1; } -static int cherryview_rps_min_freq(struct drm_i915_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; - u32 val, rpn; - - if (dev->pdev->revision >= 0x20) { - val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); - rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & - FB_GFX_FREQ_FUSE_MASK); - } else { /* For pre-production hardware */ - val = vlv_punit_read(dev_priv, PUNIT_GPU_STATUS_REG); - rpn = ((val >> PUNIT_GPU_STATIS_GFX_MIN_FREQ_SHIFT) & - PUNIT_GPU_STATUS_GFX_MIN_FREQ_MASK); - } - - return rpn; -} - static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { u32 val, rp1; @@ -4938,9 +4964,9 @@ static void cherryview_init_gt_powersave(struct drm_device *dev) mutex_lock(&dev_priv->rps.hw_lock); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); val = vlv_cck_read(dev_priv, CCK_FUSE_REG); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); switch ((val >> 2) & 0x7) { case 0: @@ -4983,7 +5009,8 @@ static void cherryview_init_gt_powersave(struct drm_device *dev) intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), dev_priv->rps.rp1_freq); - dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); + /* PUnit validated range is only [RPe, RP0] */ + dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); @@ -5859,13 +5886,15 @@ static void ibx_init_clock_gating(struct drm_device *dev) static void g4x_disable_trickle_feed(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; - int pipe; + enum pipe pipe; for_each_pipe(dev_priv, pipe) { I915_WRITE(DSPCNTR(pipe), I915_READ(DSPCNTR(pipe)) | DISPPLANE_TRICKLE_FEED_DISABLE); - intel_flush_primary_plane(dev_priv, pipe); + + I915_WRITE(DSPSURF(pipe), I915_READ(DSPSURF(pipe))); + POSTING_READ(DSPSURF(pipe)); } } @@ -6155,10 +6184,9 @@ static void broadwell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe; + uint32_t misccpctl; - I915_WRITE(WM3_LP_ILK, 0); - I915_WRITE(WM2_LP_ILK, 0); - I915_WRITE(WM1_LP_ILK, 0); + ilk_init_lp_watermarks(dev); /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -6187,6 +6215,22 @@ static void broadwell_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + /* + * WaProgramL3SqcReg1Default:bdw + * WaTempDisableDOPClkGating:bdw + */ + misccpctl = I915_READ(GEN7_MISCCPCTL); + I915_WRITE(GEN7_MISCCPCTL, misccpctl & ~GEN7_DOP_CLOCK_GATE_ENABLE); + I915_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); + I915_WRITE(GEN7_MISCCPCTL, misccpctl); + + /* + * WaGttCachingOffByDefault:bdw + * GTT cache may not work with big pages, so if those + * are ever enabled GTT cache may need to be disabled. + */ + I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); + lpt_init_clock_gating(dev); } @@ -6462,6 +6506,12 @@ static void cherryview_init_clock_gating(struct drm_device *dev) /* WaDisableSDEUnitClockGating:chv */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | GEN8_SDEUNIT_CLOCK_GATE_DISABLE); + + /* + * GTT cache may not work with big pages, so if those + * are ever enabled GTT cache may need to be disabled. + */ + I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); } static void g4x_init_clock_gating(struct drm_device *dev) @@ -6830,34 +6880,39 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) struct request_boost { struct work_struct work; - struct drm_i915_gem_request *rq; + struct drm_i915_gem_request *req; }; static void __intel_rps_boost_work(struct work_struct *work) { struct request_boost *boost = container_of(work, struct request_boost, work); + struct drm_i915_gem_request *req = boost->req; - if (!i915_gem_request_completed(boost->rq, true)) - gen6_rps_boost(to_i915(boost->rq->ring->dev), NULL); + if (!i915_gem_request_completed(req, true)) + gen6_rps_boost(to_i915(req->ring->dev), NULL, + req->emitted_jiffies); - i915_gem_request_unreference__unlocked(boost->rq); + i915_gem_request_unreference__unlocked(req); kfree(boost); } void intel_queue_rps_boost_for_request(struct drm_device *dev, - struct drm_i915_gem_request *rq) + struct drm_i915_gem_request *req) { struct request_boost *boost; - if (rq == NULL || INTEL_INFO(dev)->gen < 6) + if (req == NULL || INTEL_INFO(dev)->gen < 6) + return; + + if (i915_gem_request_completed(req, true)) return; boost = kmalloc(sizeof(*boost), GFP_ATOMIC); if (boost == NULL) return; - i915_gem_request_reference(rq); - boost->rq = rq; + i915_gem_request_reference(req); + boost->req = req; INIT_WORK(&boost->work, __intel_rps_boost_work); queue_work(to_i915(dev)->wq, &boost->work); @@ -6868,10 +6923,13 @@ void intel_pm_setup(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; mutex_init(&dev_priv->rps.hw_lock); + spin_lock_init(&dev_priv->rps.client_lock); INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, intel_gen6_powersave_work); INIT_LIST_HEAD(&dev_priv->rps.clients); + INIT_LIST_HEAD(&dev_priv->rps.semaphores.link); + INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link); dev_priv->pm.suspended = false; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9b96ed7de9bb..d934f857394d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -853,9 +853,6 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring) GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); - /* WaProgramL3SqcReg1Default:bdw */ - WA_WRITE(GEN8_L3SQCREG1, BDW_WA_L3SQCREG1_DEFAULT); - return 0; } @@ -918,6 +915,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t tmp; /* WaDisablePartialInstShootdown:skl,bxt */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -961,15 +959,19 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring) WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); - /* - * FIXME: don't apply the following on BXT for stepping C. On BXT A0 - * the flag reads back as 0. - */ - /* WaDisableMaskBasedCammingInRCC:sklC,bxtA */ - if (INTEL_REVID(dev) == SKL_REVID_C0 || IS_BROXTON(dev)) + /* WaDisableMaskBasedCammingInRCC:skl,bxt */ + if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) || + (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, PIXEL_MASK_CAMMING_DISABLE); + /* WaForceContextSaveRestoreNonCoherent:skl,bxt */ + tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT; + if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) || + (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0)) + tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE; + WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp); + return 0; } @@ -1060,10 +1062,6 @@ static int bxt_init_workarounds(struct intel_engine_cs *ring) GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } - /* WaForceContextSaveRestoreNonCoherent:bxt */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - return 0; } @@ -2102,15 +2100,16 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) { struct intel_ringbuffer *ringbuf = ring->buffer; struct drm_i915_gem_request *request; - int ret, new_space; + unsigned space; + int ret; if (intel_ring_space(ringbuf) >= n) return 0; list_for_each_entry(request, &ring->request_list, list) { - new_space = __intel_ring_space(request->postfix, ringbuf->tail, - ringbuf->size); - if (new_space >= n) + space = __intel_ring_space(request->postfix, ringbuf->tail, + ringbuf->size); + if (space >= n) break; } @@ -2121,10 +2120,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n) if (ret) return ret; - i915_gem_retire_requests_ring(ring); - - WARN_ON(intel_ring_space(ringbuf) < new_space); - + ringbuf->space = space; return 0; } @@ -2168,10 +2164,14 @@ int intel_ring_idle(struct intel_engine_cs *ring) return 0; req = list_entry(ring->request_list.prev, - struct drm_i915_gem_request, - list); - - return i915_wait_request(req); + struct drm_i915_gem_request, + list); + + /* Make sure we do not trigger any retires */ + return __i915_wait_request(req, + atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter), + to_i915(ring->dev)->mm.interruptible, + NULL, NULL); } int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 317b9b43d1c1..1a45385f4d66 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -771,7 +771,7 @@ static void vlv_set_power_well(struct drm_i915_private *dev_priv, vlv_punit_write(dev_priv, PUNIT_REG_PWRGT_CTRL, ctrl); if (wait_for(COND, 100)) - DRM_ERROR("timout setting power well state %08x (%08x)\n", + DRM_ERROR("timeout setting power well state %08x (%08x)\n", state, vlv_punit_read(dev_priv, PUNIT_REG_PWRGT_CTRL)); @@ -1029,7 +1029,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl); if (wait_for(COND, 100)) - DRM_ERROR("timout setting power well state %08x (%08x)\n", + DRM_ERROR("timeout setting power well state %08x (%08x)\n", state, vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ)); @@ -1233,18 +1233,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT(POWER_DOMAIN_AUX_C) | \ BIT(POWER_DOMAIN_INIT)) -#define CHV_PIPE_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_PIPE_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_INIT)) - #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) | \ BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) | \ @@ -1260,17 +1248,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT(POWER_DOMAIN_AUX_D) | \ BIT(POWER_DOMAIN_INIT)) -#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_INIT)) - -#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_INIT)) - static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, .enable = i9xx_always_on_power_well_noop, @@ -1428,40 +1405,17 @@ static struct i915_power_well chv_power_wells[] = { .domains = VLV_ALWAYS_ON_POWER_DOMAINS, .ops = &i9xx_always_on_power_well_ops, }, -#if 0 { .name = "display", - .domains = VLV_DISPLAY_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DISP2D, - .ops = &vlv_display_power_well_ops, - }, -#endif - { - .name = "pipe-a", /* - * FIXME: pipe A power well seems to be the new disp2d well. - * At least all registers seem to be housed there. Figure - * out if this a a temporary situation in pre-production - * hardware or a permanent state of affairs. + * Pipe A power well is the new disp2d well. Pipe B and C + * power wells don't actually exist. Pipe A power well is + * required for any pipe to work. */ - .domains = CHV_PIPE_A_POWER_DOMAINS | VLV_DISPLAY_POWER_DOMAINS, + .domains = VLV_DISPLAY_POWER_DOMAINS, .data = PIPE_A, .ops = &chv_pipe_power_well_ops, }, -#if 0 - { - .name = "pipe-b", - .domains = CHV_PIPE_B_POWER_DOMAINS, - .data = PIPE_B, - .ops = &chv_pipe_power_well_ops, - }, - { - .name = "pipe-c", - .domains = CHV_PIPE_C_POWER_DOMAINS, - .data = PIPE_C, - .ops = &chv_pipe_power_well_ops, - }, -#endif { .name = "dpio-common-bc", .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, @@ -1474,50 +1428,6 @@ static struct i915_power_well chv_power_wells[] = { .data = PUNIT_POWER_WELL_DPIO_CMN_D, .ops = &chv_dpio_cmn_power_well_ops, }, -#if 0 - { - .name = "dpio-tx-b-01", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, - }, - { - .name = "dpio-tx-b-23", - .domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, - }, - { - .name = "dpio-tx-c-01", - .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, - }, - { - .name = "dpio-tx-c-23", - .domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | - VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, - }, - { - .name = "dpio-tx-d-01", - .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | - CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01, - }, - { - .name = "dpio-tx-d-23", - .domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS | - CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS, - .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23, - }, -#endif }; static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, @@ -1724,6 +1634,8 @@ static void chv_phy_control_init(struct drm_i915_private *dev_priv) * value. */ dev_priv->chv_phy_control = + PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY0) | + PHY_LDO_SEQ_DELAY(PHY_LDO_DELAY_600NS, DPIO_PHY1) | PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH0) | PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY0, DPIO_CH1) | PHY_CH_POWER_MODE(PHY_CH_SU_PSR, DPIO_PHY1, DPIO_CH0); diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 0a0625761f42..d24ef75596a1 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -243,6 +243,14 @@ static void intel_sdvo_write_sdvox(struct intel_sdvo *intel_sdvo, u32 val) if (intel_sdvo->sdvo_reg == PCH_SDVOB) { I915_WRITE(intel_sdvo->sdvo_reg, val); POSTING_READ(intel_sdvo->sdvo_reg); + /* + * HW workaround, need to write this twice for issue + * that may result in first write getting masked. + */ + if (HAS_PCH_IBX(dev)) { + I915_WRITE(intel_sdvo->sdvo_reg, val); + POSTING_READ(intel_sdvo->sdvo_reg); + } return; } @@ -1429,6 +1437,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = encoder->base.dev->dev_private; struct intel_sdvo *intel_sdvo = to_sdvo(encoder); + struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc); u32 temp; intel_sdvo_set_active_outputs(intel_sdvo, 0); @@ -1437,35 +1446,34 @@ static void intel_disable_sdvo(struct intel_encoder *encoder) DRM_MODE_DPMS_OFF); temp = I915_READ(intel_sdvo->sdvo_reg); - if ((temp & SDVO_ENABLE) != 0) { - /* HW workaround for IBX, we need to move the port to - * transcoder A before disabling it. */ - if (HAS_PCH_IBX(encoder->base.dev)) { - struct drm_crtc *crtc = encoder->base.crtc; - int pipe = crtc ? to_intel_crtc(crtc)->pipe : -1; - - if (temp & SDVO_PIPE_B_SELECT) { - temp &= ~SDVO_PIPE_B_SELECT; - I915_WRITE(intel_sdvo->sdvo_reg, temp); - POSTING_READ(intel_sdvo->sdvo_reg); - - /* Again we need to write this twice. */ - I915_WRITE(intel_sdvo->sdvo_reg, temp); - POSTING_READ(intel_sdvo->sdvo_reg); - - /* Transcoder selection bits only update - * effectively on vblank. */ - if (crtc) - intel_wait_for_vblank(encoder->base.dev, pipe); - else - msleep(50); - } - } - intel_sdvo_write_sdvox(intel_sdvo, temp & ~SDVO_ENABLE); + temp &= ~SDVO_ENABLE; + intel_sdvo_write_sdvox(intel_sdvo, temp); + + /* + * HW workaround for IBX, we need to move the port + * to transcoder A after disabling it to allow the + * matching DP port to be enabled on transcoder A. + */ + if (HAS_PCH_IBX(dev_priv) && crtc->pipe == PIPE_B) { + temp &= ~SDVO_PIPE_B_SELECT; + temp |= SDVO_ENABLE; + intel_sdvo_write_sdvox(intel_sdvo, temp); + + temp &= ~SDVO_ENABLE; + intel_sdvo_write_sdvox(intel_sdvo, temp); } } +static void pch_disable_sdvo(struct intel_encoder *encoder) +{ +} + +static void pch_post_disable_sdvo(struct intel_encoder *encoder) +{ + intel_disable_sdvo(encoder); +} + static void intel_enable_sdvo(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; @@ -1478,14 +1486,9 @@ static void intel_enable_sdvo(struct intel_encoder *encoder) bool success; temp = I915_READ(intel_sdvo->sdvo_reg); - if ((temp & SDVO_ENABLE) == 0) { - /* HW workaround for IBX, we need to move the port - * to transcoder A before disabling it, so restore it here. */ - if (HAS_PCH_IBX(dev)) - temp |= SDVO_PIPE_SEL(intel_crtc->pipe); + temp |= SDVO_ENABLE; + intel_sdvo_write_sdvox(intel_sdvo, temp); - intel_sdvo_write_sdvox(intel_sdvo, temp | SDVO_ENABLE); - } for (i = 0; i < 2; i++) intel_wait_for_vblank(dev, intel_crtc->pipe); @@ -2988,7 +2991,12 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob) } intel_encoder->compute_config = intel_sdvo_compute_config; - intel_encoder->disable = intel_disable_sdvo; + if (HAS_PCH_SPLIT(dev)) { + intel_encoder->disable = pch_disable_sdvo; + intel_encoder->post_disable = pch_post_disable_sdvo; + } else { + intel_encoder->disable = intel_disable_sdvo; + } intel_encoder->pre_enable = intel_sdvo_pre_enable; intel_encoder->enable = intel_enable_sdvo; intel_encoder->get_hw_state = intel_sdvo_get_hw_state; diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 693ce8281970..8831fc579ade 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -49,7 +49,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, (port << IOSF_PORT_SHIFT) | (be << IOSF_BYTE_ENABLES_SHIFT) | (bar << IOSF_BAR_SHIFT); - WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); if (wait_for((I915_READ(VLV_IOSF_DOORBELL_REQ) & IOSF_SB_BUSY) == 0, 5)) { DRM_DEBUG_DRIVER("IOSF sideband idle wait (%s) timed out\n", @@ -81,10 +81,10 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRRDDA_NP, addr, &val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); return val; } @@ -93,10 +93,10 @@ void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, SB_CRWRDA_NP, addr, &val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); } u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) @@ -121,10 +121,10 @@ u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr) WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - mutex_lock(&dev_priv->dpio_lock); + mutex_lock(&dev_priv->sb_lock); vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_NC, SB_CRRDDA_NP, addr, &val); - mutex_unlock(&dev_priv->dpio_lock); + mutex_unlock(&dev_priv->sb_lock); return val; } @@ -213,7 +213,7 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, enum intel_sbi_destination destination) { u32 value = 0; - WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0, 100)) { @@ -243,7 +243,7 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, { u32 tmp; - WARN_ON(!mutex_is_locked(&dev_priv->dpio_lock)); + WARN_ON(!mutex_is_locked(&dev_priv->sb_lock)); if (wait_for((I915_READ(SBI_CTL_STAT) & SBI_BUSY) == 0, 100)) { diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index f215e223aa4a..8193a35388d7 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -229,8 +229,8 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_crtc *crtc, if (intel_rotation_90_or_270(rotation)) { /* stride: Surface height in tiles */ - tile_height = intel_tile_height(dev, fb->bits_per_pixel, - fb->modifier[0]); + tile_height = intel_tile_height(dev, fb->pixel_format, + fb->modifier[0]); stride = DIV_ROUND_UP(fb->height, tile_height); plane_size = (src_w << 16) | src_h; x_offset = stride * tile_height - y - (src_h + 1); @@ -282,7 +282,6 @@ skl_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc, bool force) I915_WRITE(PLANE_CTL(pipe, plane), 0); - /* Activate double buffered register update */ I915_WRITE(PLANE_SURF(pipe, plane), 0); POSTING_READ(PLANE_SURF(pipe, plane)); @@ -339,7 +338,6 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc, struct drm_device *dev = dplane->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_plane *intel_plane = to_intel_plane(dplane); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_gem_object *obj = intel_fb_obj(fb); int pipe = intel_plane->pipe; int plane = intel_plane->plane; @@ -453,8 +451,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc, I915_WRITE(SPCNTR(pipe, plane), sprctl); I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) + sprsurf_offset); - - intel_flush_primary_plane(dev_priv, intel_crtc->plane); + POSTING_READ(SPSURF(pipe, plane)); } static void @@ -463,21 +460,17 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc, bool force) struct drm_device *dev = dplane->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_plane *intel_plane = to_intel_plane(dplane); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_plane->pipe; int plane = intel_plane->plane; I915_WRITE(SPCNTR(pipe, plane), 0); - /* Activate double buffered register update */ I915_WRITE(SPSURF(pipe, plane), 0); - - intel_flush_primary_plane(dev_priv, intel_crtc->plane); + POSTING_READ(SPSURF(pipe, plane)); intel_update_sprite_watermarks(dplane, crtc, 0, 0, 0, false, false); } - static void ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, @@ -489,7 +482,6 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_device *dev = plane->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_plane *intel_plane = to_intel_plane(plane); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = intel_plane->pipe; u32 sprctl, sprscale = 0; @@ -599,8 +591,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, I915_WRITE(SPRCTL(pipe), sprctl); I915_WRITE(SPRSURF(pipe), i915_gem_obj_ggtt_offset(obj) + sprsurf_offset); - - intel_flush_primary_plane(dev_priv, intel_crtc->plane); + POSTING_READ(SPRSURF(pipe)); } static void @@ -609,17 +600,15 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc, bool force) struct drm_device *dev = plane->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_plane *intel_plane = to_intel_plane(plane); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_plane->pipe; I915_WRITE(SPRCTL(pipe), I915_READ(SPRCTL(pipe)) & ~SPRITE_ENABLE); /* Can't leave the scaler enabled... */ if (intel_plane->can_scale) I915_WRITE(SPRSCALE(pipe), 0); - /* Activate double buffered register update */ - I915_WRITE(SPRSURF(pipe), 0); - intel_flush_primary_plane(dev_priv, intel_crtc->plane); + I915_WRITE(SPRSURF(pipe), 0); + POSTING_READ(SPRSURF(pipe)); } static void @@ -633,7 +622,6 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_device *dev = plane->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_plane *intel_plane = to_intel_plane(plane); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_gem_object *obj = intel_fb_obj(fb); int pipe = intel_plane->pipe; unsigned long dvssurf_offset, linear_offset; @@ -730,8 +718,7 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, I915_WRITE(DVSCNTR(pipe), dvscntr); I915_WRITE(DVSSURF(pipe), i915_gem_obj_ggtt_offset(obj) + dvssurf_offset); - - intel_flush_primary_plane(dev_priv, intel_crtc->plane); + POSTING_READ(DVSSURF(pipe)); } static void @@ -740,17 +727,14 @@ ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc, bool force) struct drm_device *dev = plane->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_plane *intel_plane = to_intel_plane(plane); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int pipe = intel_plane->pipe; I915_WRITE(DVSCNTR(pipe), 0); /* Disable the scaler */ I915_WRITE(DVSSCALE(pipe), 0); - /* Flush double buffered register updates */ I915_WRITE(DVSSURF(pipe), 0); - - intel_flush_primary_plane(dev_priv, intel_crtc->plane); + POSTING_READ(DVSSURF(pipe)); } static int @@ -770,6 +754,7 @@ intel_check_sprite_plane(struct drm_plane *plane, const struct drm_rect *clip = &state->clip; int hscale, vscale; int max_scale, min_scale; + bool can_scale; int pixel_size; int ret; @@ -794,18 +779,29 @@ intel_check_sprite_plane(struct drm_plane *plane, return -EINVAL; } + /* setup can_scale, min_scale, max_scale */ + if (INTEL_INFO(dev)->gen >= 9) { + /* use scaler when colorkey is not required */ + if (intel_plane->ckey.flags == I915_SET_COLORKEY_NONE) { + can_scale = 1; + min_scale = 1; + max_scale = skl_max_scale(intel_crtc, crtc_state); + } else { + can_scale = 0; + min_scale = DRM_PLANE_HELPER_NO_SCALING; + max_scale = DRM_PLANE_HELPER_NO_SCALING; + } + } else { + can_scale = intel_plane->can_scale; + max_scale = intel_plane->max_downscale << 16; + min_scale = intel_plane->can_scale ? 1 : (1 << 16); + } + /* * FIXME the following code does a bunch of fuzzy adjustments to the * coordinates and sizes. We probably need some way to decide whether * more strict checking should be done instead. */ - max_scale = intel_plane->max_downscale << 16; - min_scale = intel_plane->can_scale ? 1 : (1 << 16); - - if (INTEL_INFO(dev)->gen >= 9) { - min_scale = 1; - max_scale = skl_max_scale(intel_crtc, crtc_state); - } drm_rect_rotate(src, fb->width << 16, fb->height << 16, state->base.rotation); @@ -876,7 +872,7 @@ intel_check_sprite_plane(struct drm_plane *plane, * Must keep src and dst the * same if we can't scale. */ - if (!intel_plane->can_scale) + if (!can_scale) crtc_w &= ~1; if (crtc_w == 0) @@ -888,7 +884,7 @@ intel_check_sprite_plane(struct drm_plane *plane, if (state->visible && (src_w != crtc_w || src_h != crtc_h)) { unsigned int width_bytes; - WARN_ON(!intel_plane->can_scale); + WARN_ON(!can_scale); /* FIXME interlacing min height is 6 */ @@ -1052,7 +1048,7 @@ int intel_plane_restore(struct drm_plane *plane) plane->state->src_w, plane->state->src_h); } -static uint32_t ilk_plane_formats[] = { +static const uint32_t ilk_plane_formats[] = { DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, @@ -1060,7 +1056,7 @@ static uint32_t ilk_plane_formats[] = { DRM_FORMAT_VYUY, }; -static uint32_t snb_plane_formats[] = { +static const uint32_t snb_plane_formats[] = { DRM_FORMAT_XBGR8888, DRM_FORMAT_XRGB8888, DRM_FORMAT_YUYV, @@ -1069,7 +1065,7 @@ static uint32_t snb_plane_formats[] = { DRM_FORMAT_VYUY, }; -static uint32_t vlv_plane_formats[] = { +static const uint32_t vlv_plane_formats[] = { DRM_FORMAT_RGB565, DRM_FORMAT_ABGR8888, DRM_FORMAT_ARGB8888, diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 3e3290c203c6..ed173d30f1c0 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -253,7 +253,7 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector) #define DP_VOLTAGE_MAX DP_TRAIN_VOLTAGE_SWING_LEVEL_3 #define DP_PRE_EMPHASIS_MAX DP_TRAIN_PRE_EMPH_LEVEL_3 -static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE], +static void dp_get_adjust_train(const u8 link_status[DP_LINK_STATUS_SIZE], int lane_count, u8 train_set[4]) { @@ -311,7 +311,7 @@ static int dp_get_max_dp_pix_clock(int link_rate, /***** radeon specific DP functions *****/ int radeon_dp_get_max_link_rate(struct drm_connector *connector, - u8 dpcd[DP_DPCD_SIZE]) + const u8 dpcd[DP_DPCD_SIZE]) { int max_link_rate; @@ -328,7 +328,7 @@ int radeon_dp_get_max_link_rate(struct drm_connector *connector, * if the max lane# < low rate lane# then use max lane# instead. */ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector, - u8 dpcd[DP_DPCD_SIZE], + const u8 dpcd[DP_DPCD_SIZE], int pix_clock) { int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector)); @@ -347,7 +347,7 @@ static int radeon_dp_get_dp_lane_number(struct drm_connector *connector, } static int radeon_dp_get_dp_link_clock(struct drm_connector *connector, - u8 dpcd[DP_DPCD_SIZE], + const u8 dpcd[DP_DPCD_SIZE], int pix_clock) { int bpp = convert_bpc_to_bpp(radeon_get_monitor_bpc(connector)); diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a0c35bbc8546..d3a22f212948 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -174,6 +174,31 @@ int cik_get_allowed_info_register(struct radeon_device *rdev, } } +/* + * Indirect registers accessor + */ +u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->didt_idx_lock, flags); + WREG32(CIK_DIDT_IND_INDEX, (reg)); + r = RREG32(CIK_DIDT_IND_DATA); + spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); + return r; +} + +void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->didt_idx_lock, flags); + WREG32(CIK_DIDT_IND_INDEX, (reg)); + WREG32(CIK_DIDT_IND_DATA, (v)); + spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); +} + /* get temperature in millidegrees */ int ci_get_temp(struct radeon_device *rdev) { diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h index 0ec5d53eb6fb..4e883fdc59d8 100644 --- a/drivers/gpu/drm/radeon/cik_reg.h +++ b/drivers/gpu/drm/radeon/cik_reg.h @@ -149,10 +149,30 @@ #define KFD_CIK_SDMA_QUEUE_OFFSET 0x200 +#define SQ_IND_INDEX 0x8DE0 +#define SQ_CMD 0x8DEC +#define SQ_IND_DATA 0x8DE4 + +/* + * The TCP_WATCHx_xxxx addresses that are shown here are in dwords, + * and that's why they are multiplied by 4 + */ +#define TCP_WATCH0_ADDR_H (0x32A0*4) +#define TCP_WATCH1_ADDR_H (0x32A3*4) +#define TCP_WATCH2_ADDR_H (0x32A6*4) +#define TCP_WATCH3_ADDR_H (0x32A9*4) +#define TCP_WATCH0_ADDR_L (0x32A1*4) +#define TCP_WATCH1_ADDR_L (0x32A4*4) +#define TCP_WATCH2_ADDR_L (0x32A7*4) +#define TCP_WATCH3_ADDR_L (0x32AA*4) +#define TCP_WATCH0_CNTL (0x32A2*4) +#define TCP_WATCH1_CNTL (0x32A5*4) +#define TCP_WATCH2_CNTL (0x32A8*4) +#define TCP_WATCH3_CNTL (0x32AB*4) + #define CPC_INT_CNTL 0xC2D0 #define CP_HQD_IQ_RPTR 0xC970u -#define AQL_ENABLE (1U << 0) #define SDMA0_RLC0_RB_CNTL 0xD400u #define SDMA_RB_VMID(x) (x << 24) #define SDMA0_RLC0_RB_BASE 0xD404u @@ -186,4 +206,38 @@ #define SDMA0_CNTL 0xD010 #define SDMA1_CNTL 0xD810 +enum { + MAX_TRAPID = 8, /* 3 bits in the bitfield. */ + MAX_WATCH_ADDRESSES = 4 +}; + +enum { + ADDRESS_WATCH_REG_ADDR_HI = 0, + ADDRESS_WATCH_REG_ADDR_LO, + ADDRESS_WATCH_REG_CNTL, + ADDRESS_WATCH_REG_MAX +}; + +enum { /* not defined in the CI/KV reg file */ + ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, + ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, + ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, + /* extend the mask to 26 bits in order to match the low address field */ + ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, + ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF +}; + +union TCP_WATCH_CNTL_BITS { + struct { + uint32_t mask:24; + uint32_t vmid:4; + uint32_t atc:1; + uint32_t mode:2; + uint32_t valid:1; + } bitfields, bits; + uint32_t u32All; + signed int i32All; + float f32All; +}; + #endif diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index b33ba3b0808b..391ff9d5d706 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -2148,9 +2148,12 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 -#define ATC_VMID0_PASID_MAPPING 0x339Cu -#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u -#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) +#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u +#define ATC_VMID0_PASID_MAPPING 0x339Cu +#define ATC_VMID_PASID_MAPPING_PASID_MASK (0xFFFF) +#define ATC_VMID_PASID_MAPPING_PASID_SHIFT 0 +#define ATC_VMID_PASID_MAPPING_VALID_MASK (0x1 << 31) +#define ATC_VMID_PASID_MAPPING_VALID_SHIFT 31 #define ATC_VM_APERTURE0_CNTL 0x3310u #define ATS_ACCESS_MODE_NEVER 0 diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 05e6d6ef5963..5397bed26b86 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -35,6 +35,75 @@ #include "evergreen_blit_shaders.h" #include "radeon_ucode.h" +/* + * Indirect registers accessor + */ +u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->cg_idx_lock, flags); + WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); + r = RREG32(EVERGREEN_CG_IND_DATA); + spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); + return r; +} + +void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->cg_idx_lock, flags); + WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); + WREG32(EVERGREEN_CG_IND_DATA, (v)); + spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); +} + +u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); + r = RREG32(EVERGREEN_PIF_PHY0_DATA); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); + return r; +} + +void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); + WREG32(EVERGREEN_PIF_PHY0_DATA, (v)); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); +} + +u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); + r = RREG32(EVERGREEN_PIF_PHY1_DATA); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); + return r; +} + +void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->pif_idx_lock, flags); + WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); + WREG32(EVERGREEN_PIF_PHY1_DATA, (v)); + spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); +} + static const u32 crtc_offsets[6] = { EVERGREEN_CRTC0_REGISTER_OFFSET, diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index aba2f428c0a8..75977d7e177e 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -36,6 +36,31 @@ #include "radeon_ucode.h" #include "clearstate_cayman.h" +/* + * Indirect registers accessor + */ +u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->smc_idx_lock, flags); + WREG32(TN_SMC_IND_INDEX_0, (reg)); + r = RREG32(TN_SMC_IND_DATA_0); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); + return r; +} + +void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->smc_idx_lock, flags); + WREG32(TN_SMC_IND_INDEX_0, (reg)); + WREG32(TN_SMC_IND_DATA_0, (v)); + spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); +} + static const u32 tn_rlc_save_restore_register_list[] = { 0x98fc, @@ -2041,6 +2066,25 @@ static int cayman_startup(struct radeon_device *rdev) if (r) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; + if (rdev->family == CHIP_ARUBA) { + r = radeon_vce_resume(rdev); + if (!r) + r = vce_v1_0_resume(rdev); + + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE1_INDEX); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE2_INDEX); + + if (r) { + dev_err(rdev->dev, "VCE init error (%d).\n", r); + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; + } + } + r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX); if (r) { dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r); @@ -2118,6 +2162,19 @@ static int cayman_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + + if (!r) + r = vce_v1_0_init(rdev); + else if (r != -ENOENT) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -2273,6 +2330,19 @@ int cayman_init(struct radeon_device *rdev) r600_ring_init(rdev, ring, 4096); } + if (rdev->family == CHIP_ARUBA) { + r = radeon_vce_init(rdev); + if (!r) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -2326,6 +2396,7 @@ void cayman_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + radeon_vce_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); @@ -2554,3 +2625,34 @@ void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); } + +int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) +{ + struct atom_clock_dividers dividers; + int r, i; + + r = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM, + ecclk, false, ÷rs); + if (r) + return r; + + for (i = 0; i < 100; i++) { + if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + WREG32_P(CG_ECLK_CNTL, dividers.post_div, ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK)); + + for (i = 0; i < 100; i++) { + if (RREG32(CG_ECLK_STATUS) & ECLK_STATUS) + break; + mdelay(10); + } + if (i == 100) + return -ETIMEDOUT; + + return 0; +} diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 3b290838918c..47eb49b77d32 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -46,6 +46,13 @@ #define DMIF_ADDR_CONFIG 0xBD4 +/* fusion vce clocks */ +#define CG_ECLK_CNTL 0x620 +# define ECLK_DIVIDER_MASK 0x7f +# define ECLK_DIR_CNTL_EN (1 << 8) +#define CG_ECLK_STATUS 0x624 +# define ECLK_STATUS (1 << 0) + /* DCE6 only */ #define DMIF_ADDR_CALC 0xC00 diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 04f2514f7564..238b13f045c1 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4090,6 +4090,28 @@ int r100_init(struct radeon_device *rdev) return 0; } +uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg) +{ + unsigned long flags; + uint32_t ret; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); + return ret; +} + +void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->mmio_idx_lock, flags); + writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); + writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); + spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); +} + u32 r100_io_rreg(struct radeon_device *rdev, u32 reg) { if (reg < rdev->rio_mem_size) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 08d68f3e13e9..718b12b03b57 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -50,6 +50,31 @@ */ /* + * Indirect registers accessor + */ +uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg) +{ + unsigned long flags; + uint32_t r; + + spin_lock_irqsave(&rdev->pcie_idx_lock, flags); + WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); + r = RREG32(RADEON_PCIE_DATA); + spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); + return r; +} + +void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->pcie_idx_lock, flags); + WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); + WREG32(RADEON_PCIE_DATA, (v)); + spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); +} + +/* * rv370,rv380 PCIE GART */ static int rv370_debugfs_pcie_gart_info_init(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 25b4ac967742..d0ff93256bb0 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -108,6 +108,53 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev); extern int evergreen_rlc_resume(struct radeon_device *rdev); extern void rv770_set_clk_bypass_mode(struct radeon_device *rdev); +/* + * Indirect registers accessor + */ +u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->rcu_idx_lock, flags); + WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); + r = RREG32(R600_RCU_DATA); + spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); + return r; +} + +void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->rcu_idx_lock, flags); + WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); + WREG32(R600_RCU_DATA, (v)); + spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); +} + +u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg) +{ + unsigned long flags; + u32 r; + + spin_lock_irqsave(&rdev->uvd_idx_lock, flags); + WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); + r = RREG32(R600_UVD_CTX_DATA); + spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); + return r; +} + +void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v) +{ + unsigned long flags; + + spin_lock_irqsave(&rdev->uvd_idx_lock, flags); + WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); + WREG32(R600_UVD_CTX_DATA, (v)); + spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); +} + /** * r600_get_allowed_info_register - fetch the register for the info ioctl * diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 46eb0fa75a61..4d2d0579fd49 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -467,7 +467,6 @@ struct radeon_bo_va { /* protected by bo being reserved */ struct list_head bo_list; uint32_t flags; - uint64_t addr; struct radeon_fence *last_pt_update; unsigned ref_count; @@ -719,7 +718,7 @@ struct radeon_doorbell { resource_size_t size; u32 __iomem *ptr; u32 num_doorbells; /* Number of doorbells actually reserved for radeon. */ - unsigned long used[DIV_ROUND_UP(RADEON_MAX_DOORBELLS, BITS_PER_LONG)]; + DECLARE_BITMAP(used, RADEON_MAX_DOORBELLS); }; int radeon_doorbell_get(struct radeon_device *rdev, u32 *page); @@ -941,6 +940,9 @@ struct radeon_vm { /* BOs freed, but not yet updated in the PT */ struct list_head freed; + /* BOs cleared in the PT */ + struct list_head cleared; + /* contains the page directory */ struct radeon_bo *page_directory; unsigned max_pde_used; @@ -1709,8 +1711,6 @@ int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev, * VCE */ #define RADEON_MAX_VCE_HANDLES 16 -#define RADEON_VCE_STACK_SIZE (1024*1024) -#define RADEON_VCE_HEAP_SIZE (4*1024*1024) struct radeon_vce { struct radeon_bo *vcpu_bo; @@ -1721,6 +1721,7 @@ struct radeon_vce { struct drm_file *filp[RADEON_MAX_VCE_HANDLES]; unsigned img_size[RADEON_MAX_VCE_HANDLES]; struct delayed_work idle_work; + uint32_t keyselect; }; int radeon_vce_init(struct radeon_device *rdev); @@ -2435,6 +2436,7 @@ struct radeon_device { atomic64_t vram_usage; atomic64_t gtt_usage; atomic64_t num_bytes_moved; + atomic_t gpu_reset_counter; /* ACPI interface */ struct radeon_atif atif; struct radeon_atcs atcs; @@ -2472,38 +2474,24 @@ int radeon_gpu_wait_for_idle(struct radeon_device *rdev); #define RADEON_MIN_MMIO_SIZE 0x10000 +uint32_t r100_mm_rreg_slow(struct radeon_device *rdev, uint32_t reg); +void r100_mm_wreg_slow(struct radeon_device *rdev, uint32_t reg, uint32_t v); static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg, bool always_indirect) { /* The mmio size is 64kb at minimum. Allows the if to be optimized out. */ if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) return readl(((void __iomem *)rdev->rmmio) + reg); - else { - unsigned long flags; - uint32_t ret; - - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - ret = readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); - - return ret; - } + else + return r100_mm_rreg_slow(rdev, reg); } - static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v, bool always_indirect) { if ((reg < rdev->rmmio_size || reg < RADEON_MIN_MMIO_SIZE) && !always_indirect) writel(v, ((void __iomem *)rdev->rmmio) + reg); - else { - unsigned long flags; - - spin_lock_irqsave(&rdev->mmio_idx_lock, flags); - writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX); - writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA); - spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags); - } + else + r100_mm_wreg_slow(rdev, reg, v); } u32 r100_io_rreg(struct radeon_device *rdev, u32 reg); @@ -2579,6 +2567,13 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f) tmp_ |= ((val) & ~(mask)); \ WREG32_PLL(reg, tmp_); \ } while (0) +#define WREG32_SMC_P(reg, val, mask) \ + do { \ + uint32_t tmp_ = RREG32_SMC(reg); \ + tmp_ &= (mask); \ + tmp_ |= ((val) & ~(mask)); \ + WREG32_SMC(reg, tmp_); \ + } while (0) #define DREG32_SYS(sqf, rdev, reg) seq_printf((sqf), #reg " : 0x%08X\n", r100_mm_rreg((rdev), (reg), false)) #define RREG32_IO(reg) r100_io_rreg(rdev, (reg)) #define WREG32_IO(reg, v) r100_io_wreg(rdev, (reg), (v)) @@ -2587,184 +2582,29 @@ static inline struct radeon_fence *to_radeon_fence(struct fence *f) #define WDOORBELL32(index, v) cik_mm_wdoorbell(rdev, (index), (v)) /* - * Indirect registers accessor + * Indirect registers accessors. + * They used to be inlined, but this increases code size by ~65 kbytes. + * Since each performs a pair of MMIO ops + * within a spin_lock_irqsave/spin_unlock_irqrestore region, + * the cost of call+ret is almost negligible. MMIO and locking + * costs several dozens of cycles each at best, call+ret is ~5 cycles. */ -static inline uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg) -{ - unsigned long flags; - uint32_t r; - - spin_lock_irqsave(&rdev->pcie_idx_lock, flags); - WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); - r = RREG32(RADEON_PCIE_DATA); - spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); - return r; -} - -static inline void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->pcie_idx_lock, flags); - WREG32(RADEON_PCIE_INDEX, ((reg) & rdev->pcie_reg_mask)); - WREG32(RADEON_PCIE_DATA, (v)); - spin_unlock_irqrestore(&rdev->pcie_idx_lock, flags); -} - -static inline u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->smc_idx_lock, flags); - WREG32(TN_SMC_IND_INDEX_0, (reg)); - r = RREG32(TN_SMC_IND_DATA_0); - spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); - return r; -} - -static inline void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->smc_idx_lock, flags); - WREG32(TN_SMC_IND_INDEX_0, (reg)); - WREG32(TN_SMC_IND_DATA_0, (v)); - spin_unlock_irqrestore(&rdev->smc_idx_lock, flags); -} - -static inline u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->rcu_idx_lock, flags); - WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); - r = RREG32(R600_RCU_DATA); - spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); - return r; -} - -static inline void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->rcu_idx_lock, flags); - WREG32(R600_RCU_INDEX, ((reg) & 0x1fff)); - WREG32(R600_RCU_DATA, (v)); - spin_unlock_irqrestore(&rdev->rcu_idx_lock, flags); -} - -static inline u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->cg_idx_lock, flags); - WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); - r = RREG32(EVERGREEN_CG_IND_DATA); - spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); - return r; -} - -static inline void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->cg_idx_lock, flags); - WREG32(EVERGREEN_CG_IND_ADDR, ((reg) & 0xffff)); - WREG32(EVERGREEN_CG_IND_DATA, (v)); - spin_unlock_irqrestore(&rdev->cg_idx_lock, flags); -} - -static inline u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); - r = RREG32(EVERGREEN_PIF_PHY0_DATA); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); - return r; -} - -static inline void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY0_INDEX, ((reg) & 0xffff)); - WREG32(EVERGREEN_PIF_PHY0_DATA, (v)); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); -} - -static inline u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); - r = RREG32(EVERGREEN_PIF_PHY1_DATA); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); - return r; -} - -static inline void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->pif_idx_lock, flags); - WREG32(EVERGREEN_PIF_PHY1_INDEX, ((reg) & 0xffff)); - WREG32(EVERGREEN_PIF_PHY1_DATA, (v)); - spin_unlock_irqrestore(&rdev->pif_idx_lock, flags); -} - -static inline u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->uvd_idx_lock, flags); - WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); - r = RREG32(R600_UVD_CTX_DATA); - spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); - return r; -} - -static inline void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->uvd_idx_lock, flags); - WREG32(R600_UVD_CTX_INDEX, ((reg) & 0x1ff)); - WREG32(R600_UVD_CTX_DATA, (v)); - spin_unlock_irqrestore(&rdev->uvd_idx_lock, flags); -} - - -static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg) -{ - unsigned long flags; - u32 r; - - spin_lock_irqsave(&rdev->didt_idx_lock, flags); - WREG32(CIK_DIDT_IND_INDEX, (reg)); - r = RREG32(CIK_DIDT_IND_DATA); - spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); - return r; -} - -static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v) -{ - unsigned long flags; - - spin_lock_irqsave(&rdev->didt_idx_lock, flags); - WREG32(CIK_DIDT_IND_INDEX, (reg)); - WREG32(CIK_DIDT_IND_DATA, (v)); - spin_unlock_irqrestore(&rdev->didt_idx_lock, flags); -} +uint32_t rv370_pcie_rreg(struct radeon_device *rdev, uint32_t reg); +void rv370_pcie_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v); +u32 tn_smc_rreg(struct radeon_device *rdev, u32 reg); +void tn_smc_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 r600_rcu_rreg(struct radeon_device *rdev, u32 reg); +void r600_rcu_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 eg_cg_rreg(struct radeon_device *rdev, u32 reg); +void eg_cg_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 eg_pif_phy0_rreg(struct radeon_device *rdev, u32 reg); +void eg_pif_phy0_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 eg_pif_phy1_rreg(struct radeon_device *rdev, u32 reg); +void eg_pif_phy1_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 r600_uvd_ctx_rreg(struct radeon_device *rdev, u32 reg); +void r600_uvd_ctx_wreg(struct radeon_device *rdev, u32 reg, u32 v); +u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg); +void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v); void r100_pll_errata_after_index(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 8dbf5083c4ff..f2421bc3e901 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -1761,6 +1761,19 @@ static struct radeon_asic cayman_asic = { }, }; +static struct radeon_asic_ring trinity_vce_ring = { + .ib_execute = &radeon_vce_ib_execute, + .emit_fence = &radeon_vce_fence_emit, + .emit_semaphore = &radeon_vce_semaphore_emit, + .cs_parse = &radeon_vce_cs_parse, + .ring_test = &radeon_vce_ring_test, + .ib_test = &radeon_vce_ib_test, + .is_lockup = &radeon_ring_test_lockup, + .get_rptr = &vce_v1_0_get_rptr, + .get_wptr = &vce_v1_0_get_wptr, + .set_wptr = &vce_v1_0_set_wptr, +}; + static struct radeon_asic trinity_asic = { .init = &cayman_init, .fini = &cayman_fini, @@ -1794,6 +1807,8 @@ static struct radeon_asic trinity_asic = { [R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring, }, .irq = { .set = &evergreen_irq_set, @@ -1838,6 +1853,7 @@ static struct radeon_asic trinity_asic = { .set_pcie_lanes = NULL, .set_clock_gating = NULL, .set_uvd_clocks = &sumo_set_uvd_clocks, + .set_vce_clocks = &tn_set_vce_clocks, .get_temperature = &tn_get_temp, }, .dpm = { @@ -1929,6 +1945,8 @@ static struct radeon_asic si_asic = { [R600_RING_TYPE_DMA_INDEX] = &si_dma_ring, [CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring, [R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring, + [TN_RING_TYPE_VCE1_INDEX] = &trinity_vce_ring, + [TN_RING_TYPE_VCE2_INDEX] = &trinity_vce_ring, }, .irq = { .set = &si_irq_set, @@ -1973,6 +1991,7 @@ static struct radeon_asic si_asic = { .set_pcie_lanes = &r600_set_pcie_lanes, .set_clock_gating = NULL, .set_uvd_clocks = &si_set_uvd_clocks, + .set_vce_clocks = &si_set_vce_clocks, .get_temperature = &si_get_temp, }, .dpm = { @@ -2436,6 +2455,8 @@ int radeon_asic_init(struct radeon_device *rdev) /* set num crtcs */ rdev->num_crtc = 4; rdev->has_uvd = true; + rdev->cg_flags = + RADEON_CG_SUPPORT_VCE_MGCG; break; case CHIP_TAHITI: case CHIP_PITCAIRN: diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index a3ca8cd305c5..e0aa33262eac 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -694,6 +694,7 @@ int trinity_dpm_force_performance_level(struct radeon_device *rdev, void trinity_dpm_enable_bapm(struct radeon_device *rdev, bool enable); u32 trinity_dpm_get_current_sclk(struct radeon_device *rdev); u32 trinity_dpm_get_current_mclk(struct radeon_device *rdev); +int tn_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); /* DCE6 - SI */ void dce6_bandwidth_update(struct radeon_device *rdev); @@ -745,6 +746,7 @@ void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, u32 si_get_xclk(struct radeon_device *rdev); uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev); int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk); +int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk); int si_get_temp(struct radeon_device *rdev); int si_get_allowed_info_register(struct radeon_device *rdev, u32 reg, u32 *val); @@ -970,10 +972,14 @@ uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev, struct radeon_ring *ring); void vce_v1_0_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring); +int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data); +unsigned vce_v1_0_bo_size(struct radeon_device *rdev); +int vce_v1_0_resume(struct radeon_device *rdev); int vce_v1_0_init(struct radeon_device *rdev); int vce_v1_0_start(struct radeon_device *rdev); /* vce v2.0 */ +unsigned vce_v2_0_bo_size(struct radeon_device *rdev); int vce_v2_0_resume(struct radeon_device *rdev); #endif diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index dcb779647c57..abbc154b1bff 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -242,6 +242,13 @@ static struct radeon_audio_funcs dce6_dp_funcs = { .dpms = evergreen_dp_enable, }; +static void radeon_audio_enable(struct radeon_device *rdev, + struct r600_audio_pin *pin, u8 enable_mask) +{ + if (rdev->audio.funcs->enable) + rdev->audio.funcs->enable(rdev, pin, enable_mask); +} + static void radeon_audio_interface_init(struct radeon_device *rdev) { if (ASIC_IS_DCE6(rdev)) { @@ -307,7 +314,7 @@ int radeon_audio_init(struct radeon_device *rdev) /* disable audio. it will be set up later */ for (i = 0; i < rdev->audio.num_pins; i++) - radeon_audio_enable(rdev, &rdev->audio.pin[i], false); + radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); return 0; } @@ -443,13 +450,6 @@ static void radeon_audio_select_pin(struct drm_encoder *encoder) radeon_encoder->audio->select_pin(encoder); } -void radeon_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, u8 enable_mask) -{ - if (rdev->audio.funcs->enable) - rdev->audio.funcs->enable(rdev, pin, enable_mask); -} - void radeon_audio_detect(struct drm_connector *connector, enum drm_connector_status status) { @@ -505,7 +505,7 @@ void radeon_audio_fini(struct radeon_device *rdev) return; for (i = 0; i < rdev->audio.num_pins; i++) - radeon_audio_enable(rdev, &rdev->audio.pin[i], false); + radeon_audio_enable(rdev, &rdev->audio.pin[i], 0); rdev->audio.enabled = false; } diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h index c92d059ab204..8438304f7139 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.h +++ b/drivers/gpu/drm/radeon/radeon_audio.h @@ -74,8 +74,6 @@ u32 radeon_audio_endpoint_rreg(struct radeon_device *rdev, void radeon_audio_endpoint_wreg(struct radeon_device *rdev, u32 offset, u32 reg, u32 v); struct r600_audio_pin *radeon_audio_get_pin(struct drm_encoder *encoder); -void radeon_audio_enable(struct radeon_device *rdev, - struct r600_audio_pin *pin, u8 enable_mask); void radeon_audio_fini(struct radeon_device *rdev); void radeon_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index b7ca4c514621..13e207e0dff0 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1725,6 +1725,8 @@ int radeon_gpu_reset(struct radeon_device *rdev) return 0; } + atomic_inc(&rdev->gpu_reset_counter); + radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 7d620d4b3f31..5751446677d3 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -90,9 +90,10 @@ * CS to GPU on >= r600 * 2.41.0 - evergreen/cayman: Add SET_BASE/DRAW_INDIRECT command parsing support * 2.42.0 - Add VCE/VUI (Video Usability Information) support + * 2.43.0 - RADEON_INFO_GPU_RESET_COUNTER */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 42 +#define KMS_DRIVER_MINOR 43 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c index 7162c935371c..1162bfa464f3 100644 --- a/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -79,10 +79,12 @@ static void radeon_hotplug_work_func(struct work_struct *work) struct drm_mode_config *mode_config = &dev->mode_config; struct drm_connector *connector; + mutex_lock(&mode_config->mutex); if (mode_config->num_connector) { list_for_each_entry(connector, &mode_config->connector_list, head) radeon_connector_hotplug(connector); } + mutex_unlock(&mode_config->mutex); /* Just fire off a uevent and let userspace tell us what to do */ drm_helper_hpd_irq_event(dev); } @@ -143,7 +145,13 @@ void radeon_driver_irq_preinstall_kms(struct drm_device *dev) */ int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { - dev->max_vblank_count = 0x001fffff; + struct radeon_device *rdev = dev->dev_private; + + if (ASIC_IS_AVIVO(rdev)) + dev->max_vblank_count = 0x00ffffff; + else + dev->max_vblank_count = 0x001fffff; + return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c index 813a416db538..e476c331f3fa 100644 --- a/drivers/gpu/drm/radeon/radeon_kfd.c +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -34,6 +34,13 @@ #define CIK_PIPE_PER_MEC (4) +static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { + TCP_WATCH0_ADDR_H, TCP_WATCH0_ADDR_L, TCP_WATCH0_CNTL, + TCP_WATCH1_ADDR_H, TCP_WATCH1_ADDR_L, TCP_WATCH1_CNTL, + TCP_WATCH2_ADDR_H, TCP_WATCH2_ADDR_L, TCP_WATCH2_CNTL, + TCP_WATCH3_ADDR_H, TCP_WATCH3_ADDR_L, TCP_WATCH3_CNTL +}; + struct kgd_mem { struct radeon_bo *bo; uint64_t gpu_addr; @@ -79,6 +86,23 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int timeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static const struct kfd2kgd_calls kfd2kgd = { .init_gtt_mem_allocation = alloc_gtt_mem, @@ -96,6 +120,13 @@ static const struct kfd2kgd_calls kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, + .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version }; @@ -372,8 +403,8 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, * the SW cleared it. * So the protocol is to always wait & clear. */ - uint32_t pasid_mapping = (pasid == 0) ? 0 : - (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID; + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID_PASID_MAPPING_VALID_MASK; write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), pasid_mapping); @@ -665,6 +696,122 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + union TCP_WATCH_CNTL_BITS cntl; + unsigned int i; + + cntl.u32All = 0; + + cntl.bitfields.valid = 0; + cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; + cntl.bitfields.atc = 1; + + /* Turning off this address until we set all the registers */ + for (i = 0; i < MAX_WATCH_ADDRESSES; i++) + write_register(kgd, + watchRegs[i * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + union TCP_WATCH_CNTL_BITS cntl; + + cntl.u32All = cntl_val; + + /* Turning off this watch point until we set all the registers */ + cntl.bitfields.valid = 0; + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_HI], + addr_hi); + + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_ADDR_LO], + addr_lo); + + /* Enable the watch point */ + cntl.bitfields.valid = 1; + + write_register(kgd, + watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + + ADDRESS_WATCH_REG_CNTL], + cntl.u32All); + + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct radeon_device *rdev = get_radeon_device(kgd); + uint32_t data; + + mutex_lock(&rdev->grbm_idx_mutex); + + write_register(kgd, GRBM_GFX_INDEX, gfx_index_val); + write_register(kgd, SQ_CMD, sq_cmd); + + /* Restore the GRBM_GFX_INDEX register */ + + data = INSTANCE_BROADCAST_WRITES | SH_BROADCAST_WRITES | + SE_BROADCAST_WRITES; + + write_register(kgd, GRBM_GFX_INDEX, data); + + mutex_unlock(&rdev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) +{ + uint32_t reg; + struct radeon_device *rdev = (struct radeon_device *) kgd; + + reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); + return reg & ATC_VMID_PASID_MAPPING_VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct radeon_device *rdev = (struct radeon_device *) kgd; + + reg = RREG32(ATC_VMID0_PASID_MAPPING + vmid*4); + return reg & ATC_VMID_PASID_MAPPING_PASID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct radeon_device *rdev = (struct radeon_device *) kgd; + + return WREG32(VM_INVALIDATE_REQUEST, 1 << vmid); +} + static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) { struct radeon_device *rdev = (struct radeon_device *) kgd; diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 7b2a7335cc5d..9632e886ddc3 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -576,6 +576,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (radeon_get_allowed_info_register(rdev, *value, value)) return -EINVAL; break; + case RADEON_INFO_GPU_RESET_COUNTER: + *value = atomic_read(&rdev->gpu_reset_counter); + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index fa91a17b81b6..6de5459316b5 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -754,7 +754,7 @@ extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector); extern int radeon_dp_get_panel_mode(struct drm_encoder *encoder, struct drm_connector *connector); int radeon_dp_get_max_link_rate(struct drm_connector *connector, - u8 *dpcd); + const u8 *dpcd); extern void radeon_dp_set_rx_power_state(struct drm_connector *connector, u8 power_state); extern void radeon_dp_aux_init(struct radeon_connector *radeon_connector); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 0de5711ac508..574f62bbd215 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -38,8 +38,10 @@ #define VCE_IDLE_TIMEOUT_MS 1000 /* Firmware Names */ +#define FIRMWARE_TAHITI "radeon/TAHITI_vce.bin" #define FIRMWARE_BONAIRE "radeon/BONAIRE_vce.bin" +MODULE_FIRMWARE(FIRMWARE_TAHITI); MODULE_FIRMWARE(FIRMWARE_BONAIRE); static void radeon_vce_idle_work_handler(struct work_struct *work); @@ -63,6 +65,14 @@ int radeon_vce_init(struct radeon_device *rdev) INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler); switch (rdev->family) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_ARUBA: + fw_name = FIRMWARE_TAHITI; + break; + case CHIP_BONAIRE: case CHIP_KAVERI: case CHIP_KABINI: @@ -118,13 +128,17 @@ int radeon_vce_init(struct radeon_device *rdev) rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8); /* we can only work with this fw version for now */ - if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) + if ((rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8))) && + (rdev->vce.fw_version != ((50 << 24) | (0 << 16) | (1 << 8))) && + (rdev->vce.fw_version != ((50 << 24) | (1 << 16) | (2 << 8)))) return -EINVAL; /* allocate firmware, stack and heap BO */ - size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) + - RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE; + if (rdev->family < CHIP_BONAIRE) + size = vce_v1_0_bo_size(rdev); + else + size = vce_v2_0_bo_size(rdev); r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->vce.vcpu_bo); @@ -225,13 +239,17 @@ int radeon_vce_resume(struct radeon_device *rdev) return r; } - memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); + memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); + if (rdev->family < CHIP_BONAIRE) + r = vce_v1_0_load_fw(rdev, cpu_addr); + else + memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size); radeon_bo_kunmap(rdev->vce.vcpu_bo); radeon_bo_unreserve(rdev->vce.vcpu_bo); - return 0; + return r; } /** diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index de42fc4a22b8..9739ded91b7a 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -331,7 +331,6 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, bo_va->it.start = 0; bo_va->it.last = 0; bo_va->flags = 0; - bo_va->addr = 0; bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->vm_status); @@ -491,9 +490,11 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } if (bo_va->it.start || bo_va->it.last) { - if (bo_va->addr) { + spin_lock(&vm->status_lock); + if (list_empty(&bo_va->vm_status)) { /* add a clone of the bo_va to clear the old address */ struct radeon_bo_va *tmp; + spin_unlock(&vm->status_lock); tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); if (!tmp) { mutex_unlock(&vm->mutex); @@ -502,14 +503,11 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, tmp->it.start = bo_va->it.start; tmp->it.last = bo_va->it.last; tmp->vm = vm; - tmp->addr = bo_va->addr; tmp->bo = radeon_bo_ref(bo_va->bo); spin_lock(&vm->status_lock); list_add(&tmp->vm_status, &vm->freed); - spin_unlock(&vm->status_lock); - - bo_va->addr = 0; } + spin_unlock(&vm->status_lock); interval_tree_remove(&bo_va->it, &vm->va); bo_va->it.start = 0; @@ -520,10 +518,12 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, bo_va->it.start = soffset; bo_va->it.last = eoffset - 1; interval_tree_insert(&bo_va->it, &vm->va); + spin_lock(&vm->status_lock); + list_add(&bo_va->vm_status, &vm->cleared); + spin_unlock(&vm->status_lock); } bo_va->flags = flags; - bo_va->addr = 0; soffset >>= radeon_vm_block_size; eoffset >>= radeon_vm_block_size; @@ -921,7 +921,16 @@ int radeon_vm_bo_update(struct radeon_device *rdev, } spin_lock(&vm->status_lock); - list_del_init(&bo_va->vm_status); + if (mem) { + if (list_empty(&bo_va->vm_status)) { + spin_unlock(&vm->status_lock); + return 0; + } + list_del_init(&bo_va->vm_status); + } else { + list_del(&bo_va->vm_status); + list_add(&bo_va->vm_status, &vm->cleared); + } spin_unlock(&vm->status_lock); bo_va->flags &= ~RADEON_VM_PAGE_VALID; @@ -947,10 +956,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, addr = 0; } - if (addr == bo_va->addr) - return 0; - bo_va->addr = addr; - trace_radeon_vm_bo_update(bo_va); nptes = bo_va->it.last - bo_va->it.start + 1; @@ -1038,7 +1043,7 @@ int radeon_vm_clear_freed(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_bo_va *bo_va; - int r; + int r = 0; spin_lock(&vm->status_lock); while (!list_empty(&vm->freed)) { @@ -1049,14 +1054,15 @@ int radeon_vm_clear_freed(struct radeon_device *rdev, r = radeon_vm_bo_update(rdev, bo_va, NULL); radeon_bo_unref(&bo_va->bo); radeon_fence_unref(&bo_va->last_pt_update); + spin_lock(&vm->status_lock); + list_del(&bo_va->vm_status); kfree(bo_va); if (r) - return r; + break; - spin_lock(&vm->status_lock); } spin_unlock(&vm->status_lock); - return 0; + return r; } @@ -1114,14 +1120,14 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, mutex_lock(&vm->mutex); if (bo_va->it.start || bo_va->it.last) interval_tree_remove(&bo_va->it, &vm->va); - spin_lock(&vm->status_lock); - list_del(&bo_va->vm_status); - if (bo_va->addr) { + spin_lock(&vm->status_lock); + if (list_empty(&bo_va->vm_status)) { bo_va->bo = radeon_bo_ref(bo_va->bo); list_add(&bo_va->vm_status, &vm->freed); } else { radeon_fence_unref(&bo_va->last_pt_update); + list_del(&bo_va->vm_status); kfree(bo_va); } spin_unlock(&vm->status_lock); @@ -1144,12 +1150,10 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, struct radeon_bo_va *bo_va; list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->addr) { - spin_lock(&bo_va->vm->status_lock); - list_del(&bo_va->vm_status); + spin_lock(&bo_va->vm->status_lock); + if (list_empty(&bo_va->vm_status)) list_add(&bo_va->vm_status, &bo_va->vm->invalidated); - spin_unlock(&bo_va->vm->status_lock); - } + spin_unlock(&bo_va->vm->status_lock); } } @@ -1179,6 +1183,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->freed); + INIT_LIST_HEAD(&vm->cleared); pd_size = radeon_vm_directory_size(rdev); pd_entries = radeon_vm_num_pdes(rdev); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 5326f753e107..34c3739c87cf 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6907,6 +6907,22 @@ static int si_startup(struct radeon_device *rdev) rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0; } + r = radeon_vce_resume(rdev); + if (!r) { + r = vce_v1_0_resume(rdev); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE1_INDEX); + if (!r) + r = radeon_fence_driver_start_ring(rdev, + TN_RING_TYPE_VCE2_INDEX); + } + if (r) { + dev_err(rdev->dev, "VCE init error (%d).\n", r); + rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0; + rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0; + } + /* Enable IRQ */ if (!rdev->irq.installed) { r = radeon_irq_kms_init(rdev); @@ -6975,6 +6991,23 @@ static int si_startup(struct radeon_device *rdev) } } + r = -ENOENT; + + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, + VCE_CMD_NO_OP); + + if (!r) + r = vce_v1_0_init(rdev); + else if (r != -ENOENT) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + r = radeon_ib_pool_init(rdev); if (r) { dev_err(rdev->dev, "IB initialization failed (%d).\n", r); @@ -7033,6 +7066,7 @@ int si_suspend(struct radeon_device *rdev) if (rdev->has_uvd) { uvd_v1_0_fini(rdev); radeon_uvd_suspend(rdev); + radeon_vce_suspend(rdev); } si_fini_pg(rdev); si_fini_cg(rdev); @@ -7140,6 +7174,17 @@ int si_init(struct radeon_device *rdev) } } + r = radeon_vce_init(rdev); + if (!r) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + ring->ring_obj = NULL; + r600_ring_init(rdev, ring, 4096); + } + rdev->ih.ring_obj = NULL; r600_ih_ring_init(rdev, 64 * 1024); @@ -7191,6 +7236,7 @@ void si_fini(struct radeon_device *rdev) if (rdev->has_uvd) { uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); + radeon_vce_fini(rdev); } si_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); @@ -7675,3 +7721,124 @@ static void si_program_aspm(struct radeon_device *rdev) } } } + +int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev) +{ + unsigned i; + + /* make sure VCEPLL_CTLREQ is deasserted */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + mdelay(10); + + /* assert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK); + + /* wait for CTLACK and CTLACK2 to get asserted */ + for (i = 0; i < 100; ++i) { + uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK; + if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask) + break; + mdelay(10); + } + + /* deassert UPLL_CTLREQ */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); + + if (i == 100) { + DRM_ERROR("Timeout setting UVD clocks!\n"); + return -ETIMEDOUT; + } + + return 0; +} + +int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk) +{ + unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0; + int r; + + /* bypass evclk and ecclk with bclk */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + /* put PLL in bypass mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK, + ~VCEPLL_BYPASS_EN_MASK); + + if (!evclk || !ecclk) { + /* keep the Bypass mode, put PLL to sleep */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + return 0; + } + + r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000, + 16384, 0x03FFFFFF, 0, 128, 5, + &fb_div, &evclk_div, &ecclk_div); + if (r) + return r; + + /* set RESET_ANTI_MUX to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK); + + /* set VCO_MODE to 1 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK, + ~VCEPLL_VCO_MODE_MASK); + + /* toggle VCEPLL_SLEEP to 1 then back to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK, + ~VCEPLL_SLEEP_MASK); + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK); + + /* deassert VCEPLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(1); + + r = si_vce_send_vcepll_ctlreq(rdev); + if (r) + return r; + + /* assert VCEPLL_RESET again */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK); + + /* disable spread spectrum. */ + WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK); + + /* set feedback divider */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK); + + /* set ref divider to 0 */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK); + + /* set PDIV_A and PDIV_B */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div), + ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK)); + + /* give the PLL some time to settle */ + mdelay(15); + + /* deassert PLL_RESET */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK); + + mdelay(15); + + /* switch from bypass mode to normal mode */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK); + + r = si_vce_send_vcepll_ctlreq(rdev); + if (r) + return r; + + /* switch VCLK and DCLK selection */ + WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2, + EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16), + ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK)); + + mdelay(100); + + return 0; +} diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index ff8b83f5e929..1dbdf3230dae 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1740,6 +1740,7 @@ struct ni_power_info *ni_get_pi(struct radeon_device *rdev); struct ni_ps *ni_get_ps(struct radeon_ps *rps); extern int si_mc_load_microcode(struct radeon_device *rdev); +extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable); static int si_populate_voltage_value(struct radeon_device *rdev, const struct atom_voltage_table *table, @@ -2928,6 +2929,56 @@ static struct si_dpm_quirk si_dpm_quirk_list[] = { { 0, 0, 0, 0 }, }; +static u16 si_get_lower_of_leakage_and_vce_voltage(struct radeon_device *rdev, + u16 vce_voltage) +{ + u16 highest_leakage = 0; + struct si_power_info *si_pi = si_get_pi(rdev); + int i; + + for (i = 0; i < si_pi->leakage_voltage.count; i++){ + if (highest_leakage < si_pi->leakage_voltage.entries[i].voltage) + highest_leakage = si_pi->leakage_voltage.entries[i].voltage; + } + + if (si_pi->leakage_voltage.count && (highest_leakage < vce_voltage)) + return highest_leakage; + + return vce_voltage; +} + +static int si_get_vce_clock_voltage(struct radeon_device *rdev, + u32 evclk, u32 ecclk, u16 *voltage) +{ + u32 i; + int ret = -EINVAL; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + if (((evclk == 0) && (ecclk == 0)) || + (table && (table->count == 0))) { + *voltage = 0; + return 0; + } + + for (i = 0; i < table->count; i++) { + if ((evclk <= table->entries[i].evclk) && + (ecclk <= table->entries[i].ecclk)) { + *voltage = table->entries[i].v; + ret = 0; + break; + } + } + + /* if no match return the highest voltage */ + if (ret) + *voltage = table->entries[table->count - 1].v; + + *voltage = si_get_lower_of_leakage_and_vce_voltage(rdev, *voltage); + + return ret; +} + static void si_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -2936,7 +2987,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, bool disable_mclk_switching = false; bool disable_sclk_switching = false; u32 mclk, sclk; - u16 vddc, vddci; + u16 vddc, vddci, min_vce_voltage = 0; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; u32 max_sclk = 0, max_mclk = 0; int i; @@ -2955,6 +3006,16 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, ++p; } + if (rps->vce_active) { + rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + si_get_vce_clock_voltage(rdev, rps->evclk, rps->ecclk, + &min_vce_voltage); + } else { + rps->evclk = 0; + rps->ecclk = 0; + } + if ((rdev->pm.dpm.new_active_crtc_count > 1) || ni_dpm_vblank_too_short(rdev)) disable_mclk_switching = true; @@ -3035,6 +3096,13 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, vddc = ps->performance_levels[0].vddc; } + if (rps->vce_active) { + if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk) + mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk; + } + /* adjusted low state */ ps->performance_levels[0].sclk = sclk; ps->performance_levels[0].mclk = mclk; @@ -3084,6 +3152,8 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, &ps->performance_levels[i]); for (i = 0; i < ps->performance_level_count; i++) { + if (ps->performance_levels[i].vddc < min_vce_voltage) + ps->performance_levels[i].vddc = min_vce_voltage; btc_apply_voltage_dependency_rules(&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk, ps->performance_levels[i].sclk, max_limits->vddc, &ps->performance_levels[i].vddc); @@ -3110,7 +3180,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].vddc > rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.vddc) ps->dc_compatible = false; } - } #if 0 @@ -5859,6 +5928,21 @@ static void si_set_pcie_lane_width_in_smc(struct radeon_device *rdev, } } +static void si_set_vce_clock(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps) +{ + if ((old_rps->evclk != new_rps->evclk) || + (old_rps->ecclk != new_rps->ecclk)) { + /* turn the clocks on when encoding, off otherwise */ + if (new_rps->evclk || new_rps->ecclk) + vce_v1_0_enable_mgcg(rdev, false); + else + vce_v1_0_enable_mgcg(rdev, true); + radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk); + } +} + void si_dpm_setup_asic(struct radeon_device *rdev) { int r; @@ -6547,6 +6631,7 @@ int si_dpm_set_power_state(struct radeon_device *rdev) return ret; } ni_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); + si_set_vce_clock(rdev, new_ps, old_ps); if (eg_pi->pcie_performance_request) si_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps); ret = si_set_power_state_conditionally_enable_ulv(rdev, new_ps); @@ -6793,6 +6878,21 @@ static int si_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk, mclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->si.usEngineClockLow); + sclk |= clock_info->si.ucEngineClockHigh << 16; + mclk = le16_to_cpu(clock_info->si.usMemoryClockLow); + mclk |= clock_info->si.ucMemoryClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = mclk; + } + return 0; } @@ -6837,10 +6937,11 @@ int si_dpm_init(struct radeon_device *rdev) if (ret) return ret; - ret = si_parse_power_table(rdev); + ret = r600_parse_extended_power_table(rdev); if (ret) return ret; - ret = r600_parse_extended_power_table(rdev); + + ret = si_parse_power_table(rdev); if (ret) return ret; diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 3afac3013983..4c4a7218a3bd 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1879,6 +1879,7 @@ #define VCE_VCPU_CACHE_SIZE1 0x20030 #define VCE_VCPU_CACHE_OFFSET2 0x20034 #define VCE_VCPU_CACHE_SIZE2 0x20038 +#define VCE_VCPU_SCRATCH7 0x200dc #define VCE_SOFT_RESET 0x20120 #define VCE_ECPU_SOFT_RESET (1 << 0) #define VCE_FME_SOFT_RESET (1 << 2) @@ -1893,6 +1894,7 @@ #define VCE_RB_RPTR 0x2018c #define VCE_RB_WPTR 0x20190 #define VCE_CLOCK_GATING_A 0x202f8 +# define CGC_DYN_CLOCK_MODE (1 << 16) #define VCE_CLOCK_GATING_B 0x202fc #define VCE_UENC_CLOCK_GATING 0x205bc #define VCE_UENC_REG_CLOCK_GATING 0x205c0 @@ -1917,4 +1919,31 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 +/* discrete vce clocks */ +#define CG_VCEPLL_FUNC_CNTL 0xc0030600 +# define VCEPLL_RESET_MASK 0x00000001 +# define VCEPLL_SLEEP_MASK 0x00000002 +# define VCEPLL_BYPASS_EN_MASK 0x00000004 +# define VCEPLL_CTLREQ_MASK 0x00000008 +# define VCEPLL_VCO_MODE_MASK 0x00000600 +# define VCEPLL_REF_DIV_MASK 0x003F0000 +# define VCEPLL_CTLACK_MASK 0x40000000 +# define VCEPLL_CTLACK2_MASK 0x80000000 +#define CG_VCEPLL_FUNC_CNTL_2 0xc0030601 +# define VCEPLL_PDIV_A(x) ((x) << 0) +# define VCEPLL_PDIV_A_MASK 0x0000007F +# define VCEPLL_PDIV_B(x) ((x) << 8) +# define VCEPLL_PDIV_B_MASK 0x00007F00 +# define EVCLK_SRC_SEL(x) ((x) << 20) +# define EVCLK_SRC_SEL_MASK 0x01F00000 +# define ECCLK_SRC_SEL(x) ((x) << 25) +# define ECCLK_SRC_SEL_MASK 0x3E000000 +#define CG_VCEPLL_FUNC_CNTL_3 0xc0030602 +# define VCEPLL_FB_DIV(x) ((x) << 0) +# define VCEPLL_FB_DIV_MASK 0x01FFFFFF +#define CG_VCEPLL_FUNC_CNTL_4 0xc0030603 +#define CG_VCEPLL_FUNC_CNTL_5 0xc0030604 +#define CG_VCEPLL_SPREAD_SPECTRUM 0xc0030606 +# define VCEPLL_SSEN_MASK 0x00000001 + #endif diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c index a5b02c575d77..d34bfcdab9be 100644 --- a/drivers/gpu/drm/radeon/trinity_dpm.c +++ b/drivers/gpu/drm/radeon/trinity_dpm.c @@ -336,6 +336,7 @@ static const u32 trinity_override_mgpg_sequences[] = 0x00000204, 0x00000000, }; +extern void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable); static void trinity_program_clk_gating_hw_sequence(struct radeon_device *rdev, const u32 *seq, u32 count); static void trinity_override_dynamic_mg_powergating(struct radeon_device *rdev); @@ -985,6 +986,21 @@ static void trinity_set_uvd_clock_after_set_eng_clock(struct radeon_device *rdev trinity_setup_uvd_clocks(rdev, new_rps, old_rps); } +static void trinity_set_vce_clock(struct radeon_device *rdev, + struct radeon_ps *new_rps, + struct radeon_ps *old_rps) +{ + if ((old_rps->evclk != new_rps->evclk) || + (old_rps->ecclk != new_rps->ecclk)) { + /* turn the clocks on when encoding, off otherwise */ + if (new_rps->evclk || new_rps->ecclk) + vce_v1_0_enable_mgcg(rdev, false); + else + vce_v1_0_enable_mgcg(rdev, true); + radeon_set_vce_clocks(rdev, new_rps->evclk, new_rps->ecclk); + } +} + static void trinity_program_ttt(struct radeon_device *rdev) { struct trinity_power_info *pi = trinity_get_pi(rdev); @@ -1246,6 +1262,7 @@ int trinity_dpm_set_power_state(struct radeon_device *rdev) trinity_force_level_0(rdev); trinity_unforce_levels(rdev); trinity_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps); + trinity_set_vce_clock(rdev, new_ps, old_ps); } trinity_release_mutex(rdev); @@ -1483,7 +1500,35 @@ static void trinity_adjust_uvd_state(struct radeon_device *rdev, } } +static int trinity_get_vce_clock_voltage(struct radeon_device *rdev, + u32 evclk, u32 ecclk, u16 *voltage) +{ + u32 i; + int ret = -EINVAL; + struct radeon_vce_clock_voltage_dependency_table *table = + &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table; + + if (((evclk == 0) && (ecclk == 0)) || + (table && (table->count == 0))) { + *voltage = 0; + return 0; + } + + for (i = 0; i < table->count; i++) { + if ((evclk <= table->entries[i].evclk) && + (ecclk <= table->entries[i].ecclk)) { + *voltage = table->entries[i].v; + ret = 0; + break; + } + } + + /* if no match return the highest voltage */ + if (ret) + *voltage = table->entries[table->count - 1].v; + return ret; +} static void trinity_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *new_rps, @@ -1496,6 +1541,7 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev, u32 min_sclk = pi->sys_info.min_sclk; /* XXX check against disp reqs */ u32 sclk_in_sr = pi->sys_info.min_sclk; /* ??? */ u32 i; + u16 min_vce_voltage; bool force_high; u32 num_active_displays = rdev->pm.dpm.new_active_crtc_count; @@ -1504,6 +1550,14 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev, trinity_adjust_uvd_state(rdev, new_rps); + if (new_rps->vce_active) { + new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; + new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk; + } else { + new_rps->evclk = 0; + new_rps->ecclk = 0; + } + for (i = 0; i < ps->num_levels; i++) { if (ps->levels[i].vddc_index < min_voltage) ps->levels[i].vddc_index = min_voltage; @@ -1512,6 +1566,17 @@ static void trinity_apply_state_adjust_rules(struct radeon_device *rdev, ps->levels[i].sclk = trinity_get_valid_engine_clock(rdev, min_sclk); + /* patch in vce limits */ + if (new_rps->vce_active) { + /* sclk */ + if (ps->levels[i].sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk) + ps->levels[i].sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk; + /* vddc */ + trinity_get_vce_clock_voltage(rdev, new_rps->evclk, new_rps->ecclk, &min_vce_voltage); + if (ps->levels[i].vddc_index < min_vce_voltage) + ps->levels[i].vddc_index = min_vce_voltage; + } + ps->levels[i].ds_divider_index = sumo_get_sleep_divider_id_from_clock(rdev, ps->levels[i].sclk, sclk_in_sr); @@ -1733,6 +1798,19 @@ static int trinity_parse_power_table(struct radeon_device *rdev) power_state_offset += 2 + power_state->v2.ucNumDPMLevels; } rdev->pm.dpm.num_ps = state_array->ucNumEntries; + + /* fill in the vce power states */ + for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) { + u32 sclk; + clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx; + clock_info = (union pplib_clock_info *) + &clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize]; + sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow); + sclk |= clock_info->sumo.ucEngineClockHigh << 16; + rdev->pm.dpm.vce_states[i].sclk = sclk; + rdev->pm.dpm.vce_states[i].mclk = 0; + } + return 0; } @@ -1914,6 +1992,10 @@ int trinity_dpm_init(struct radeon_device *rdev) if (ret) return ret; + ret = r600_parse_extended_power_table(rdev); + if (ret) + return ret; + ret = trinity_parse_power_table(rdev); if (ret) return ret; @@ -2000,6 +2082,7 @@ void trinity_dpm_fini(struct radeon_device *rdev) } kfree(rdev->pm.dpm.ps); kfree(rdev->pm.dpm.priv); + r600_free_extended_power_table(rdev); } u32 trinity_dpm_get_sclk(struct radeon_device *rdev, bool low) diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c index b44d9c842f7b..07a0d378e122 100644 --- a/drivers/gpu/drm/radeon/vce_v1_0.c +++ b/drivers/gpu/drm/radeon/vce_v1_0.c @@ -31,6 +31,23 @@ #include "radeon_asic.h" #include "sid.h" +#define VCE_V1_0_FW_SIZE (256 * 1024) +#define VCE_V1_0_STACK_SIZE (64 * 1024) +#define VCE_V1_0_DATA_SIZE (7808 * (RADEON_MAX_VCE_HANDLES + 1)) + +struct vce_v1_0_fw_signature +{ + int32_t off; + uint32_t len; + int32_t num; + struct { + uint32_t chip_id; + uint32_t keyselect; + uint32_t nonce[4]; + uint32_t sigval[4]; + } val[8]; +}; + /** * vce_v1_0_get_rptr - get read pointer * @@ -82,6 +99,186 @@ void vce_v1_0_set_wptr(struct radeon_device *rdev, WREG32(VCE_RB_WPTR2, ring->wptr); } +void vce_v1_0_enable_mgcg(struct radeon_device *rdev, bool enable) +{ + u32 tmp; + + if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_VCE_MGCG)) { + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp |= CGC_DYN_CLOCK_MODE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~0x1ff000; + tmp |= 0xff800000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3ff; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + } else { + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp &= ~CGC_DYN_CLOCK_MODE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp |= 0x1ff000; + tmp &= ~0xff800000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp |= 0x3ff; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); + } +} + +static void vce_v1_0_init_cg(struct radeon_device *rdev) +{ + u32 tmp; + + tmp = RREG32(VCE_CLOCK_GATING_A); + tmp |= CGC_DYN_CLOCK_MODE; + WREG32(VCE_CLOCK_GATING_A, tmp); + + tmp = RREG32(VCE_CLOCK_GATING_B); + tmp |= 0x1e; + tmp &= ~0xe100e1; + WREG32(VCE_CLOCK_GATING_B, tmp); + + tmp = RREG32(VCE_UENC_CLOCK_GATING); + tmp &= ~0xff9ff000; + WREG32(VCE_UENC_CLOCK_GATING, tmp); + + tmp = RREG32(VCE_UENC_REG_CLOCK_GATING); + tmp &= ~0x3ff; + WREG32(VCE_UENC_REG_CLOCK_GATING, tmp); +} + +int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data) +{ + struct vce_v1_0_fw_signature *sign = (void*)rdev->vce_fw->data; + uint32_t chip_id; + int i; + + switch (rdev->family) { + case CHIP_TAHITI: + chip_id = 0x01000014; + break; + case CHIP_VERDE: + chip_id = 0x01000015; + break; + case CHIP_PITCAIRN: + case CHIP_OLAND: + chip_id = 0x01000016; + break; + case CHIP_ARUBA: + chip_id = 0x01000017; + break; + default: + return -EINVAL; + } + + for (i = 0; i < sign->num; ++i) { + if (sign->val[i].chip_id == chip_id) + break; + } + + if (i == sign->num) + return -EINVAL; + + data += (256 - 64) / 4; + data[0] = sign->val[i].nonce[0]; + data[1] = sign->val[i].nonce[1]; + data[2] = sign->val[i].nonce[2]; + data[3] = sign->val[i].nonce[3]; + data[4] = sign->len + 64; + + memset(&data[5], 0, 44); + memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign)); + + data += data[4] / 4; + data[0] = sign->val[i].sigval[0]; + data[1] = sign->val[i].sigval[1]; + data[2] = sign->val[i].sigval[2]; + data[3] = sign->val[i].sigval[3]; + + rdev->vce.keyselect = sign->val[i].keyselect; + + return 0; +} + +unsigned vce_v1_0_bo_size(struct radeon_device *rdev) +{ + WARN_ON(VCE_V1_0_FW_SIZE < rdev->vce_fw->size); + return VCE_V1_0_FW_SIZE + VCE_V1_0_STACK_SIZE + VCE_V1_0_DATA_SIZE; +} + +int vce_v1_0_resume(struct radeon_device *rdev) +{ + uint64_t addr = rdev->vce.gpu_addr; + uint32_t size; + int i; + + WREG32_P(VCE_CLOCK_GATING_A, 0, ~(1 << 16)); + WREG32_P(VCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000); + WREG32_P(VCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F); + WREG32(VCE_CLOCK_GATING_B, 0); + + WREG32_P(VCE_LMI_FW_PERIODIC_CTRL, 0x4, ~0x4); + + WREG32(VCE_LMI_CTRL, 0x00398000); + WREG32_P(VCE_LMI_CACHE_CTRL, 0x0, ~0x1); + WREG32(VCE_LMI_SWAP_CNTL, 0); + WREG32(VCE_LMI_SWAP_CNTL1, 0); + WREG32(VCE_LMI_VM_CTRL, 0); + + WREG32(VCE_VCPU_SCRATCH7, RADEON_MAX_VCE_HANDLES); + + addr += 256; + size = VCE_V1_0_FW_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE0, size); + + addr += size; + size = VCE_V1_0_STACK_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE1, size); + + addr += size; + size = VCE_V1_0_DATA_SIZE; + WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); + WREG32(VCE_VCPU_CACHE_SIZE2, size); + + WREG32_P(VCE_LMI_CTRL2, 0x0, ~0x100); + + WREG32(VCE_LMI_FW_START_KEYSEL, rdev->vce.keyselect); + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_DONE) + break; + } + + if (i == 10) + return -ETIMEDOUT; + + if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_PASS)) + return -EINVAL; + + for (i = 0; i < 10; ++i) { + mdelay(10); + if (!(RREG32(VCE_FW_REG_STATUS) & VCE_FW_REG_STATUS_BUSY)) + break; + } + + if (i == 10) + return -ETIMEDOUT; + + vce_v1_0_init_cg(rdev); + + return 0; +} + /** * vce_v1_0_start - start VCE block * diff --git a/drivers/gpu/drm/radeon/vce_v2_0.c b/drivers/gpu/drm/radeon/vce_v2_0.c index fbbe78fbd087..cdeaab7c7b1e 100644 --- a/drivers/gpu/drm/radeon/vce_v2_0.c +++ b/drivers/gpu/drm/radeon/vce_v2_0.c @@ -31,6 +31,10 @@ #include "radeon_asic.h" #include "cikd.h" +#define VCE_V2_0_FW_SIZE (256 * 1024) +#define VCE_V2_0_STACK_SIZE (64 * 1024) +#define VCE_V2_0_DATA_SIZE (23552 * RADEON_MAX_VCE_HANDLES) + static void vce_v2_0_set_sw_cg(struct radeon_device *rdev, bool gated) { u32 tmp; @@ -140,6 +144,12 @@ static void vce_v2_0_init_cg(struct radeon_device *rdev) WREG32(VCE_CLOCK_GATING_B, tmp); } +unsigned vce_v2_0_bo_size(struct radeon_device *rdev) +{ + WARN_ON(rdev->vce_fw->size > VCE_V2_0_FW_SIZE); + return VCE_V2_0_FW_SIZE + VCE_V2_0_STACK_SIZE + VCE_V2_0_DATA_SIZE; +} + int vce_v2_0_resume(struct radeon_device *rdev) { uint64_t addr = rdev->vce.gpu_addr; @@ -159,17 +169,17 @@ int vce_v2_0_resume(struct radeon_device *rdev) WREG32(VCE_LMI_VCPU_CACHE_40BIT_BAR, addr >> 8); addr &= 0xff; - size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size); + size = VCE_V2_0_FW_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE0, size); addr += size; - size = RADEON_VCE_STACK_SIZE; + size = VCE_V2_0_STACK_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE1, size); addr += size; - size = RADEON_VCE_HEAP_SIZE; + size = VCE_V2_0_DATA_SIZE; WREG32(VCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff); WREG32(VCE_VCPU_CACHE_SIZE2, size); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 7d0b8ef9bea2..e6a32c4e4040 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -195,26 +195,27 @@ void rcar_du_crtc_route_output(struct drm_crtc *crtc, static unsigned int plane_zpos(struct rcar_du_plane *plane) { - return to_rcar_du_plane_state(plane->plane.state)->zpos; + return to_rcar_plane_state(plane->plane.state)->zpos; } static const struct rcar_du_format_info * plane_format(struct rcar_du_plane *plane) { - return to_rcar_du_plane_state(plane->plane.state)->format; + return to_rcar_plane_state(plane->plane.state)->format; } static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc) { struct rcar_du_plane *planes[RCAR_DU_NUM_HW_PLANES]; unsigned int num_planes = 0; + unsigned int dptsr_planes; + unsigned int hwplanes = 0; unsigned int prio = 0; unsigned int i; - u32 dptsr = 0; u32 dspr = 0; - for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) { - struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i]; + for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes); ++i) { + struct rcar_du_plane *plane = &rcrtc->group->planes[i]; unsigned int j; if (plane->plane.state->crtc != &rcrtc->crtc) @@ -234,41 +235,45 @@ static void rcar_du_crtc_update_planes(struct rcar_du_crtc *rcrtc) for (i = 0; i < num_planes; ++i) { struct rcar_du_plane *plane = planes[i]; struct drm_plane_state *state = plane->plane.state; - unsigned int index = to_rcar_du_plane_state(state)->hwindex; + unsigned int index = to_rcar_plane_state(state)->hwindex; prio -= 4; dspr |= (index + 1) << prio; - dptsr |= DPTSR_PnDK(index) | DPTSR_PnTS(index); + hwplanes |= 1 << index; if (plane_format(plane)->planes == 2) { index = (index + 1) % 8; prio -= 4; dspr |= (index + 1) << prio; - dptsr |= DPTSR_PnDK(index) | DPTSR_PnTS(index); + hwplanes |= 1 << index; } } - /* Select display timing and dot clock generator 2 for planes associated - * with superposition controller 2. + /* Update the planes to display timing and dot clock generator + * associations. + * + * Updating the DPTSR register requires restarting the CRTC group, + * resulting in visible flicker. To mitigate the issue only update the + * association if needed by enabled planes. Planes being disabled will + * keep their current association. */ - if (rcrtc->index % 2) { - /* The DPTSR register is updated when the display controller is - * stopped. We thus need to restart the DU. Once again, sorry - * for the flicker. One way to mitigate the issue would be to - * pre-associate planes with CRTCs (either with a fixed 4/4 - * split, or through a module parameter). Flicker would then - * occur only if we need to break the pre-association. - */ - mutex_lock(&rcrtc->group->lock); - if (rcar_du_group_read(rcrtc->group, DPTSR) != dptsr) { - rcar_du_group_write(rcrtc->group, DPTSR, dptsr); - if (rcrtc->group->used_crtcs) - rcar_du_group_restart(rcrtc->group); - } - mutex_unlock(&rcrtc->group->lock); + mutex_lock(&rcrtc->group->lock); + + dptsr_planes = rcrtc->index % 2 ? rcrtc->group->dptsr_planes | hwplanes + : rcrtc->group->dptsr_planes & ~hwplanes; + + if (dptsr_planes != rcrtc->group->dptsr_planes) { + rcar_du_group_write(rcrtc->group, DPTSR, + (dptsr_planes << 16) | dptsr_planes); + rcrtc->group->dptsr_planes = dptsr_planes; + + if (rcrtc->group->used_crtcs) + rcar_du_group_restart(rcrtc->group); } + mutex_unlock(&rcrtc->group->lock); + rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR, dspr); } @@ -427,8 +432,8 @@ void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc) rcar_du_crtc_start(rcrtc); /* Commit the planes state. */ - for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) { - struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i]; + for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes); ++i) { + struct rcar_du_plane *plane = &rcrtc->group->planes[i]; if (plane->plane.state->crtc != &rcrtc->crtc) continue; @@ -592,7 +597,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) rcrtc->enabled = false; ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, - &rgrp->planes.planes[index % 2].plane, + &rgrp->planes[index % 2].plane, NULL, &crtc_funcs); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index 5d9aa9b33769..4b95d9d08c49 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -22,6 +22,20 @@ struct rcar_du_group; +/** + * struct rcar_du_crtc - the CRTC, representing a DU superposition processor + * @crtc: base DRM CRTC + * @clock: the CRTC functional clock + * @extclock: external pixel dot clock (optional) + * @mmio_offset: offset of the CRTC registers in the DU MMIO block + * @index: CRTC software and hardware index + * @started: whether the CRTC has been started and is running + * @event: event to post when the pending page flip completes + * @flip_wait: wait queue used to signal page flip completion + * @outputs: bitmask of the outputs (enum rcar_du_output) driven by this CRTC + * @enabled: whether the CRTC is enabled, used to control system resume + * @group: CRTC group this CRTC belongs to + */ struct rcar_du_crtc { struct drm_crtc crtc; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index c7c538dd2e68..9f34fc86436a 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -83,6 +83,12 @@ struct rcar_du_device { struct rcar_du_group groups[RCAR_DU_MAX_GROUPS]; + struct { + struct drm_property *alpha; + struct drm_property *colorkey; + struct drm_property *zpos; + } props; + unsigned int dpad0_source; struct rcar_du_lvdsenc *lvds[RCAR_DU_MAX_LVDS]; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c index 1bdc0ee0c248..7fd39a7d91c8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c @@ -85,6 +85,12 @@ static void rcar_du_group_setup(struct rcar_du_group *rgrp) * superposition 0 to DU0 pins. DU1 pins will be configured dynamically. */ rcar_du_group_write(rgrp, DORCR, DORCR_PG1D_DS1 | DORCR_DPRS); + + /* Apply planes to CRTCs association. */ + mutex_lock(&rgrp->lock); + rcar_du_group_write(rgrp, DPTSR, (rgrp->dptsr_planes << 16) | + rgrp->dptsr_planes); + mutex_unlock(&rgrp->lock); } /* diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h index ed36433fbe84..7b414b31c3be 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h @@ -25,9 +25,11 @@ struct rcar_du_device; * @dev: the DU device * @mmio_offset: registers offset in the device memory map * @index: group index + * @num_crtcs: number of CRTCs in this group (1 or 2) * @use_count: number of users of the group (rcar_du_group_(get|put)) * @used_crtcs: number of CRTCs currently in use - * @lock: protects the DPTSR register + * @lock: protects the dptsr_planes field and the DPTSR register + * @dptsr_planes: bitmask of planes driven by dot-clock and timing generator 1 * @planes: planes handled by the group */ struct rcar_du_group { @@ -35,12 +37,14 @@ struct rcar_du_group { unsigned int mmio_offset; unsigned int index; + unsigned int num_crtcs; unsigned int use_count; unsigned int used_crtcs; struct mutex lock; + unsigned int dptsr_planes; - struct rcar_du_planes planes; + struct rcar_du_plane planes[RCAR_DU_NUM_KMS_PLANES]; }; u32 rcar_du_group_read(struct rcar_du_group *rgrp, u32 reg); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 93117f159a3b..20859aae882e 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -221,7 +221,7 @@ static bool rcar_du_plane_needs_realloc(struct rcar_du_plane *plane, { const struct rcar_du_format_info *cur_format; - cur_format = to_rcar_du_plane_state(plane->plane.state)->format; + cur_format = to_rcar_plane_state(plane->plane.state)->format; /* Lowering the number of planes doesn't strictly require reallocation * as the extra hardware plane will be freed when committing, but doing @@ -284,14 +284,19 @@ static int rcar_du_atomic_check(struct drm_device *dev, continue; plane = to_rcar_plane(state->planes[i]); - plane_state = to_rcar_du_plane_state(state->plane_states[i]); + plane_state = to_rcar_plane_state(state->plane_states[i]); + + dev_dbg(rcdu->dev, "%s: checking plane (%u,%u)\n", __func__, + plane->group->index, plane - plane->group->planes); /* If the plane is being disabled we don't need to go through * the full reallocation procedure. Just mark the hardware * plane(s) as freed. */ if (!plane_state->format) { - index = plane - plane->group->planes.planes; + dev_dbg(rcdu->dev, "%s: plane is being disabled\n", + __func__); + index = plane - plane->group->planes; group_freed_planes[plane->group->index] |= 1 << index; plane_state->hwindex = -1; continue; @@ -301,10 +306,12 @@ static int rcar_du_atomic_check(struct drm_device *dev, * mark the hardware plane(s) as free. */ if (rcar_du_plane_needs_realloc(plane, plane_state)) { + dev_dbg(rcdu->dev, "%s: plane needs reallocation\n", + __func__); groups |= 1 << plane->group->index; needs_realloc = true; - index = plane - plane->group->planes.planes; + index = plane - plane->group->planes; group_freed_planes[plane->group->index] |= 1 << index; plane_state->hwindex = -1; } @@ -326,8 +333,11 @@ static int rcar_du_atomic_check(struct drm_device *dev, struct rcar_du_group *group = &rcdu->groups[index]; unsigned int used_planes = 0; + dev_dbg(rcdu->dev, "%s: finding free planes for group %u\n", + __func__, index); + for (i = 0; i < RCAR_DU_NUM_KMS_PLANES; ++i) { - struct rcar_du_plane *plane = &group->planes.planes[i]; + struct rcar_du_plane *plane = &group->planes[i]; struct rcar_du_plane_state *plane_state; struct drm_plane_state *s; @@ -342,28 +352,49 @@ static int rcar_du_atomic_check(struct drm_device *dev, * above. Use the local freed planes list to check for * that condition instead. */ - if (group_freed_planes[index] & (1 << i)) + if (group_freed_planes[index] & (1 << i)) { + dev_dbg(rcdu->dev, + "%s: plane (%u,%u) has been freed, skipping\n", + __func__, plane->group->index, + plane - plane->group->planes); continue; + } - plane_state = to_rcar_du_plane_state(plane->plane.state); + plane_state = to_rcar_plane_state(plane->plane.state); used_planes |= rcar_du_plane_hwmask(plane_state); + + dev_dbg(rcdu->dev, + "%s: plane (%u,%u) uses %u hwplanes (index %d)\n", + __func__, plane->group->index, + plane - plane->group->planes, + plane_state->format ? + plane_state->format->planes : 0, + plane_state->hwindex); } group_free_planes[index] = 0xff & ~used_planes; groups &= ~(1 << index); + + dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n", + __func__, index, group_free_planes[index]); } /* Reallocate hardware planes for each plane that needs it. */ for (i = 0; i < dev->mode_config.num_total_plane; ++i) { struct rcar_du_plane_state *plane_state; struct rcar_du_plane *plane; + unsigned int crtc_planes; + unsigned int free; int idx; if (!state->planes[i]) continue; plane = to_rcar_plane(state->planes[i]); - plane_state = to_rcar_du_plane_state(state->plane_states[i]); + plane_state = to_rcar_plane_state(state->plane_states[i]); + + dev_dbg(rcdu->dev, "%s: allocating plane (%u,%u)\n", __func__, + plane->group->index, plane - plane->group->planes); /* Skip planes that are being disabled or don't need to be * reallocated. @@ -372,18 +403,38 @@ static int rcar_du_atomic_check(struct drm_device *dev, !rcar_du_plane_needs_realloc(plane, plane_state)) continue; + /* Try to allocate the plane from the free planes currently + * associated with the target CRTC to avoid restarting the CRTC + * group and thus minimize flicker. If it fails fall back to + * allocating from all free planes. + */ + crtc_planes = to_rcar_crtc(plane_state->state.crtc)->index % 2 + ? plane->group->dptsr_planes + : ~plane->group->dptsr_planes; + free = group_free_planes[plane->group->index]; + idx = rcar_du_plane_hwalloc(plane_state->format->planes, - group_free_planes[plane->group->index]); + free & crtc_planes); + if (idx < 0) + idx = rcar_du_plane_hwalloc(plane_state->format->planes, + free); if (idx < 0) { dev_dbg(rcdu->dev, "%s: no available hardware plane\n", __func__); return idx; } + dev_dbg(rcdu->dev, "%s: allocated %u hwplanes (index %u)\n", + __func__, plane_state->format->planes, idx); + plane_state->hwindex = idx; group_free_planes[plane->group->index] &= ~rcar_du_plane_hwmask(plane_state); + + dev_dbg(rcdu->dev, "%s: group %u free planes mask 0x%02x\n", + __func__, plane->group->index, + group_free_planes[plane->group->index]); } return 0; @@ -648,6 +699,31 @@ static int rcar_du_encoders_init(struct rcar_du_device *rcdu) return num_encoders; } +static int rcar_du_properties_init(struct rcar_du_device *rcdu) +{ + rcdu->props.alpha = + drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255); + if (rcdu->props.alpha == NULL) + return -ENOMEM; + + /* The color key is expressed as an RGB888 triplet stored in a 32-bit + * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0) + * or enable source color keying (1). + */ + rcdu->props.colorkey = + drm_property_create_range(rcdu->ddev, 0, "colorkey", + 0, 0x01ffffff); + if (rcdu->props.colorkey == NULL) + return -ENOMEM; + + rcdu->props.zpos = + drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7); + if (rcdu->props.zpos == NULL) + return -ENOMEM; + + return 0; +} + int rcar_du_modeset_init(struct rcar_du_device *rcdu) { static const unsigned int mmio_offsets[] = { @@ -672,6 +748,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) rcdu->num_crtcs = rcdu->info->num_crtcs; + ret = rcar_du_properties_init(rcdu); + if (ret < 0) + return ret; + /* Initialize the groups. */ num_groups = DIV_ROUND_UP(rcdu->num_crtcs, 2); @@ -683,6 +763,13 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) rgrp->dev = rcdu; rgrp->mmio_offset = mmio_offsets[i]; rgrp->index = i; + rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U); + + /* If we have more than one CRTCs in this group pre-associate + * planes 0-3 with CRTC 0 and planes 4-7 with CRTC 1 to minimize + * flicker occurring when the association is changed. + */ + rgrp->dptsr_planes = rgrp->num_crtcs > 1 ? 0xf0 : 0; ret = rcar_du_planes_init(rgrp); if (ret < 0) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c index 210e5c3fd982..3e30d84b798f 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c @@ -45,7 +45,7 @@ static void rcar_du_plane_write(struct rcar_du_group *rgrp, static void rcar_du_plane_setup_fb(struct rcar_du_plane *plane) { struct rcar_du_plane_state *state = - to_rcar_du_plane_state(plane->plane.state); + to_rcar_plane_state(plane->plane.state); struct drm_framebuffer *fb = plane->plane.state->fb; struct rcar_du_group *rgrp = plane->group; unsigned int src_x = state->state.src_x >> 16; @@ -109,7 +109,7 @@ static void rcar_du_plane_setup_mode(struct rcar_du_plane *plane, unsigned int index) { struct rcar_du_plane_state *state = - to_rcar_du_plane_state(plane->plane.state); + to_rcar_plane_state(plane->plane.state); struct rcar_du_group *rgrp = plane->group; u32 colorkey; u32 pnmr; @@ -172,7 +172,7 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane, unsigned int index) { struct rcar_du_plane_state *state = - to_rcar_du_plane_state(plane->plane.state); + to_rcar_plane_state(plane->plane.state); struct rcar_du_group *rgrp = plane->group; u32 ddcr2 = PnDDCR2_CODE; u32 ddcr4; @@ -222,7 +222,7 @@ static void __rcar_du_plane_setup(struct rcar_du_plane *plane, void rcar_du_plane_setup(struct rcar_du_plane *plane) { struct rcar_du_plane_state *state = - to_rcar_du_plane_state(plane->plane.state); + to_rcar_plane_state(plane->plane.state); __rcar_du_plane_setup(plane, state->hwindex); if (state->format->planes == 2) @@ -234,7 +234,7 @@ void rcar_du_plane_setup(struct rcar_du_plane *plane) static int rcar_du_plane_atomic_check(struct drm_plane *plane, struct drm_plane_state *state) { - struct rcar_du_plane_state *rstate = to_rcar_du_plane_state(state); + struct rcar_du_plane_state *rstate = to_rcar_plane_state(state); struct rcar_du_plane *rplane = to_rcar_plane(plane); struct rcar_du_device *rcdu = rplane->group->dev; @@ -302,7 +302,7 @@ rcar_du_plane_atomic_duplicate_state(struct drm_plane *plane) struct rcar_du_plane_state *state; struct rcar_du_plane_state *copy; - state = to_rcar_du_plane_state(plane->state); + state = to_rcar_plane_state(plane->state); copy = kmemdup(state, sizeof(*state), GFP_KERNEL); if (copy == NULL) return NULL; @@ -319,7 +319,7 @@ static void rcar_du_plane_atomic_destroy_state(struct drm_plane *plane, if (state->fb) drm_framebuffer_unreference(state->fb); - kfree(to_rcar_du_plane_state(state)); + kfree(to_rcar_plane_state(state)); } static int rcar_du_plane_atomic_set_property(struct drm_plane *plane, @@ -327,15 +327,14 @@ static int rcar_du_plane_atomic_set_property(struct drm_plane *plane, struct drm_property *property, uint64_t val) { - struct rcar_du_plane_state *rstate = to_rcar_du_plane_state(state); - struct rcar_du_plane *rplane = to_rcar_plane(plane); - struct rcar_du_group *rgrp = rplane->group; + struct rcar_du_plane_state *rstate = to_rcar_plane_state(state); + struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev; - if (property == rgrp->planes.alpha) + if (property == rcdu->props.alpha) rstate->alpha = val; - else if (property == rgrp->planes.colorkey) + else if (property == rcdu->props.colorkey) rstate->colorkey = val; - else if (property == rgrp->planes.zpos) + else if (property == rcdu->props.zpos) rstate->zpos = val; else return -EINVAL; @@ -349,14 +348,13 @@ static int rcar_du_plane_atomic_get_property(struct drm_plane *plane, { const struct rcar_du_plane_state *rstate = container_of(state, const struct rcar_du_plane_state, state); - struct rcar_du_plane *rplane = to_rcar_plane(plane); - struct rcar_du_group *rgrp = rplane->group; + struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev; - if (property == rgrp->planes.alpha) + if (property == rcdu->props.alpha) *val = rstate->alpha; - else if (property == rgrp->planes.colorkey) + else if (property == rcdu->props.colorkey) *val = rstate->colorkey; - else if (property == rgrp->planes.zpos) + else if (property == rcdu->props.zpos) *val = rstate->zpos; else return -EINVAL; @@ -391,47 +389,24 @@ static const uint32_t formats[] = { int rcar_du_planes_init(struct rcar_du_group *rgrp) { - struct rcar_du_planes *planes = &rgrp->planes; struct rcar_du_device *rcdu = rgrp->dev; unsigned int num_planes; - unsigned int num_crtcs; unsigned int crtcs; unsigned int i; int ret; - planes->alpha = - drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255); - if (planes->alpha == NULL) - return -ENOMEM; - - /* The color key is expressed as an RGB888 triplet stored in a 32-bit - * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0) - * or enable source color keying (1). - */ - planes->colorkey = - drm_property_create_range(rcdu->ddev, 0, "colorkey", - 0, 0x01ffffff); - if (planes->colorkey == NULL) - return -ENOMEM; - - planes->zpos = - drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7); - if (planes->zpos == NULL) - return -ENOMEM; - - /* Create one primary plane per in this group CRTC and seven overlay + /* Create one primary plane per CRTC in this group and seven overlay * planes. */ - num_crtcs = min(rcdu->num_crtcs - 2 * rgrp->index, 2U); - num_planes = num_crtcs + 7; + num_planes = rgrp->num_crtcs + 7; crtcs = ((1 << rcdu->num_crtcs) - 1) & (3 << (2 * rgrp->index)); for (i = 0; i < num_planes; ++i) { - enum drm_plane_type type = i < num_crtcs + enum drm_plane_type type = i < rgrp->num_crtcs ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY; - struct rcar_du_plane *plane = &planes->planes[i]; + struct rcar_du_plane *plane = &rgrp->planes[i]; plane->group = rgrp; @@ -448,12 +423,12 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp) continue; drm_object_attach_property(&plane->plane.base, - planes->alpha, 255); + rcdu->props.alpha, 255); drm_object_attach_property(&plane->plane.base, - planes->colorkey, + rcdu->props.colorkey, RCAR_DU_COLORKEY_NONE); drm_object_attach_property(&plane->plane.base, - planes->zpos, 1); + rcdu->props.zpos, 1); } return 0; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h index abff0ebeb195..9732bff1911b 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h @@ -38,19 +38,20 @@ static inline struct rcar_du_plane *to_rcar_plane(struct drm_plane *plane) return container_of(plane, struct rcar_du_plane, plane); } -struct rcar_du_planes { - struct rcar_du_plane planes[RCAR_DU_NUM_KMS_PLANES]; - - struct drm_property *alpha; - struct drm_property *colorkey; - struct drm_property *zpos; -}; - +/** + * struct rcar_du_plane_state - Driver-specific plane state + * @state: base DRM plane state + * @format: information about the pixel format used by the plane + * @hwindex: 0-based hardware plane index, -1 means unused + * @alpha: value of the plane alpha property + * @colorkey: value of the plane colorkey property + * @zpos: value of the plane zpos property + */ struct rcar_du_plane_state { struct drm_plane_state state; const struct rcar_du_format_info *format; - int hwindex; /* 0-based, -1 means unused */ + int hwindex; unsigned int alpha; unsigned int colorkey; @@ -58,7 +59,7 @@ struct rcar_du_plane_state { }; static inline struct rcar_du_plane_state * -to_rcar_du_plane_state(struct drm_plane_state *state) +to_rcar_plane_state(struct drm_plane_state *state) { return container_of(state, struct rcar_du_plane_state, state); } diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 01e1d27eb078..3077f1554099 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -342,9 +342,12 @@ static struct dma_page *__ttm_dma_alloc_page(struct dma_pool *pool) d_page->vaddr = dma_alloc_coherent(pool->dev, pool->size, &d_page->dma, pool->gfp_flags); - if (d_page->vaddr) - d_page->p = virt_to_page(d_page->vaddr); - else { + if (d_page->vaddr) { + if (is_vmalloc_addr(d_page->vaddr)) + d_page->p = vmalloc_to_page(d_page->vaddr); + else + d_page->p = virt_to_page(d_page->vaddr); + } else { kfree(d_page); d_page = NULL; } diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index bd0d644d2a03..17c445612e01 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -286,11 +286,9 @@ INTEL_SKL_GT2_IDS(info), \ INTEL_SKL_GT3_IDS(info) - #define INTEL_BXT_IDS(info) \ INTEL_VGA_DEVICE(0x0A84, info), \ - INTEL_VGA_DEVICE(0x0A85, info), \ - INTEL_VGA_DEVICE(0x0A86, info), \ - INTEL_VGA_DEVICE(0x0A87, info) + INTEL_VGA_DEVICE(0x1A84, info), \ + INTEL_VGA_DEVICE(0x5A84, info) #endif /* _I915_PCIIDS_H */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 4851d660243c..6e1a2ed116cb 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -171,8 +171,12 @@ typedef struct _drm_i915_sarea { #define I915_BOX_TEXTURE_LOAD 0x8 #define I915_BOX_LOST_CONTEXT 0x10 -/* I915 specific ioctls - * The device specific ioctl range is 0x40 to 0x79. +/* + * i915 specific ioctls. + * + * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie + * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset + * against DRM_COMMAND_BASE and should be between [0x0, 0x60). */ #define DRM_I915_INIT 0x00 #define DRM_I915_FLUSH 0x01 diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 871e73f99a4d..573cb86a3d6e 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -1038,6 +1038,7 @@ struct drm_radeon_cs { #define RADEON_INFO_CURRENT_GPU_SCLK 0x22 #define RADEON_INFO_CURRENT_GPU_MCLK 0x23 #define RADEON_INFO_READ_REG 0x24 +#define RADEON_INFO_GPU_RESET_COUNTER 0x25 struct drm_radeon_info { uint32_t request; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 4ca35a8f9891..d6833426fdef 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -128,6 +128,32 @@ struct kfd_ioctl_get_process_apertures_args { uint32_t pad; }; +#define MAX_ALLOWED_NUM_POINTS 100 +#define MAX_ALLOWED_AW_BUFF_SIZE 4096 +#define MAX_ALLOWED_WAC_BUFF_SIZE 128 + +struct kfd_ioctl_dbg_register_args { + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_dbg_unregister_args { + uint32_t gpu_id; /* to KFD */ + uint32_t pad; +}; + +struct kfd_ioctl_dbg_address_watch_args { + uint64_t content_ptr; /* a pointer to the actual content */ + uint32_t gpu_id; /* to KFD */ + uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ +}; + +struct kfd_ioctl_dbg_wave_control_args { + uint64_t content_ptr; /* a pointer to the actual content */ + uint32_t gpu_id; /* to KFD */ + uint32_t buf_size_in_bytes; /*including gpu_id and buf_size */ +}; + /* Matching HSA_EVENTTYPE */ #define KFD_IOC_EVENT_SIGNAL 0 #define KFD_IOC_EVENT_NODECHANGE 1 @@ -198,7 +224,8 @@ struct kfd_event_data { }; struct kfd_ioctl_wait_events_args { - uint64_t events_ptr; /* to KFD */ + uint64_t events_ptr; /* pointed to struct + kfd_event_data array, to KFD */ uint32_t num_events; /* to KFD */ uint32_t wait_for_all; /* to KFD */ uint32_t timeout; /* to KFD */ @@ -247,7 +274,19 @@ struct kfd_ioctl_wait_events_args { #define AMDKFD_IOC_WAIT_EVENTS \ AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args) +#define AMDKFD_IOC_DBG_REGISTER \ + AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args) + +#define AMDKFD_IOC_DBG_UNREGISTER \ + AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args) + +#define AMDKFD_IOC_DBG_ADDRESS_WATCH \ + AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args) + +#define AMDKFD_IOC_DBG_WAVE_CONTROL \ + AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x0D +#define AMDKFD_COMMAND_END 0x11 #endif |